protoc-gen-druid-spec

command module

v0.0.0-...-3db8dec Latest Latest Go to latest Published: Sep 23, 2022 License: MIT Imports: 7 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/nguyensinhtu/protoc-gen-druid-spec

README ¶

protoc-gen-druid-spec

Installation

go install github.com/nguyensinhtu/protoc-gen-druid-spec@latest

Usage

protoc --druid-spect_out=path/to/outdir foo.proto

protoc --druid-spec_out=path/to/out/dir foo.proto --proto_path=. --proto_path=<path_to_google_proto_folder>/src

Example

syntax = "proto3";

package foo;

import "druid_ingestion.proto";
import "druid_spec.proto";

import "google/protobuf/wrappers.proto";
import "google/protobuf/timestamp.proto";

message Bar {
  option (gen_druid_spec.druid_opts) = {
    data_source_name: "bar_proto3_table"
    segment_granularity: "day"
    query_granularity: "day"
  };

  string client_id = 1 [ (gen_druid_spec.spec) = {
    dimension : {
      multi_value_handling : "SORTED_SET"
      create_bitmap_index : {value: true}
    }
    metric : {
      metric_name : "client_id_sketch"
      size : 16384
      type : "thetaSketch"
    }
  } ];

  Baz baz = 2 
    [ (gen_druid_spec.spec).flatten = {prefix : "baz_" } ];;

  TimeSpec time_spec = 4[ (gen_druid_spec.spec).flatten = { prefix : "time_spec_" } ];
}

message Baz {
  int32 a = 1 [ (gen_druid_spec.spec).metric = {
    metric_name: "a_metric"
    size: 16384 
    type: "thetaSketch"
  }];
}

message TimeSpec {
  string date_key = 1[ (gen_druid_spec.spec).timestamp = {} ];
}

Output

{
 "spec": {
  "dataSchema": {
   "dataSource": "bar_proto3_table",
   "timestampSpec": {
    "column": "time_spec__date_key"
   },
   "dimensionsSpec": {
    "dimensions": [
     {
      "name": "client_id",
      "type": "string",
      "multiValueHandling": "SORTED_SET",
      "createBitmapIndex": true
     },
     {
      "name": "baz__a",
      "type": "long"
     }
    ],
    "dimensionExclusions": [
     "time_spec__date_key"
    ]
   },
   "metricsSpec": [
    {
     "name": "client_id_sketch",
     "type": "thetaSketch",
     "isInputThetaSketch": false,
     "fieldName": "client_id",
     "size": 16384
    },
    {
     "name": "a_metric",
     "type": "thetaSketch",
     "isInputThetaSketch": false,
     "fieldName": "baz__a",
     "size": 16384
    }
   ],
   "granularitySpec": {
    "type": "uniform",
    "segmentGranularity": "day",
    "queryGranularity": "day",
    "rollup": true,
    "intervals": []
   }
  },
  "ioConfig": {
   "inputFormat": {
    "type": "json",
    "flattenSpec": {
     "fields": [
      {
       "type": "jq",
       "name": "baz__a",
       "expr": ".baz.a"
      },
      {
       "type": "jq",
       "name": "time_spec__date_key",
       "expr": ".time_spec.date_key"
      }
     ],
     "useFieldDiscovery": false
    }
   }
  }
 }
}

Conceptually, after input data records are read, Druid applies ingestion spec components in a particular order: first flattenSpec (if any), then timestampSpec, then transformSpec, and finally dimensionsSpec and metricsSpec. Keep this in mind when writing your ingestion spec.

flattenSpec

Parent field separate with nested field by double underscore '__'

if you dont set prefix or empty prefix, default will be protobuf field name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "foo__i1",
      "expr": ".foo.i1"
    }
  ],
  "useFieldDiscovery": false
}

if you want to remove parent name set ignore_name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {ignore_name: true} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "i1",
      "expr": ".foo.i1"
    }
  ],
  "useFieldDiscovery": false
}

if you want to use diffirent prefix name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {prefix: "my_custom_name"} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "my_custom_name__i1",
      "expr": ".foo.i1"
    }
  ],
  "useFieldDiscovery": false
}

if you want use diffirent name set output_name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {output_name: "rename_foo"} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "rename_foo__i1",
      "expr": ".rename.i1"
    }
  ],
  "useFieldDiscovery": false
}

timestampSpec

if field is set to be timestamp it will be ignore in dimensionExclusions
Can not apply flattenSpec, metricSpec, dimensionSpec with timestampSpec
Can not apply timestampSpec to record type
Can not apply multiple timestampSpec to one configured data source

Query filters

TODO - Support Selector filter, Logical expression filters

IO Config

We do not recommend config the IOConfig for specific schema, because we can re-use one schema for multiple input source , but if you want you still config it

message Bar {
  option (gen_druid_spec.druid_opts) = {
    data_source_name: "bar_proto3_table"
    
    io_config: {
      topic: "abc"
      bootstrap_servers: "PLAINTEXT://abc.com"
      use_earliest_offset: false
    }
    
    ingestion_type: "kafka"
  };
}

Output

{
 "spec": {
  "dataSchema": {
   "dataSource": "bar_proto3_table",
   "dimensionsSpec": {},
   "granularitySpec": {
    "type": "uniform",
    "segmentGranularity": "day",
    "queryGranularity": "none",
    "rollup": true,
    "intervals": []
   }
  },
  "ioConfig": {
   "consumerProperties": {
    "bootstrap.servers": "PLAINTEXT://abc.com"
   },
   "inputFormat": {
    "type": "json"
   },
   "topic": "abc",
   "useEarliestOffset": false
  }
 },
 "type": "kafka"
}

Documentation ¶

There is no documentation for this package.

Source Files ¶

View all Source files

main.go

Directories ¶

Path	Synopsis
pkg
converter
protos

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL