common

package
v0.0.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 5, 2019 License: Apache-2.0 Imports: 13 Imported by: 6

Documentation

Index

Constants

View Source
const (
	MatrixDataKey = "matrixData"
	HeadersKey    = "headers"
)
View Source
const (
	// OldHLLDataHeader is the old magic header for migration
	OldHLLDataHeader uint32 = 0xACED0101
	// HLLDataHeader is the magic header written into serialized format of hyperloglog query result.
	HLLDataHeader uint32 = 0xACED0102
	// EnumDelimiter is the delimiter to delimit enum cases.
	EnumDelimiter = "\u0000\n"
	// DenseDataLength is the length of hll dense data in bytes.
	DenseDataLength = 1 << 14 // 16kb
	// DenseThreshold is the thresold to convert sparse value to dense value.
	DenseThreshold = DenseDataLength / 4
)
View Source
const (

	// SecondsPerMinute is number of seconds per minute
	SecondsPerMinute = 60
	// SecondsPerHour is number of seconds per hour
	SecondsPerHour = SecondsPerMinute * 60
	// SecondsPerDay is number of secods per day
	SecondsPerDay = SecondsPerHour * 24
	// SecondsPer4Day is number of seconds per 4 days
	SecondsPer4Day = SecondsPerDay * 4
	// DaysPerWeek is number of days per week
	DaysPerWeek = 7
	// WeekdayOffset is to compensate 1970-01-01 being a Thursday
	WeekdayOffset = 4
	// SecondsPerWeek is number of seconds per week
	SecondsPerWeek = SecondsPerDay * DaysPerWeek
)
View Source
const (
	// string representing null dimension values
	NULLString = "NULL"
)

Variables

View Source
var BucketSizeToseconds = map[string]int{
	"m": SecondsPerMinute,
	"h": SecondsPerHour,
	"d": SecondsPerDay,
}

BucketSizeToseconds is the map from normalized bucket unit to number of seconds

DataTypeToExprType maps data type from the column schema format to expression AST format.

Functions

func BuildVectorsFromHLLResult added in v0.0.2

func BuildVectorsFromHLLResult(result AQLQueryResult, dimDataTypes []memCom.DataType, enumDicts map[int]map[string]int, dimensionVectorIndex []int) (hllVector, dimVector, countVector []byte, err error)

BuildVectorsFromHLLResult traverses input HLL query result and builds byte slices result must have HLL in the leave nodes this function is useful when converting HLL query result to it's binary format dimDataTypes stores types of each dimension, in the the same order as in query dimensionVectorIndex stores re-ordered dimension index, sorted by dim datatype width

func CalculateEnumCasesBytes

func CalculateEnumCasesBytes(enumCases []string) uint32

CalculateEnumCasesBytes calculates how many bytes the enum case values will occupy including 8 bytes alignment.

func CreateTimeFilterExpr added in v0.0.2

func CreateTimeFilterExpr(expression expr.Expr, from, to *AlignedTime) (fromExpr, toExpr expr.Expr)

CreateTimeFilterExpr creates time filter expr

func DimValResVectorSize added in v0.0.2

func DimValResVectorSize(resultSize int, numDimsPerDimWidth DimCountsPerDimWidth) int

DimValueVectorSize returns the size of final dim value vector on host side.

func GetCurrentCalendarUnit added in v0.0.2

func GetCurrentCalendarUnit(base time.Time, unit string) (start, end time.Time, err error)

GetCurrentCalendarUnit returns the start and end of the calendar unit for base.

func GetDimensionDataBytes added in v0.0.2

func GetDimensionDataBytes(expression expr.Expr) int

GetDimensionDataBytes gets num bytes for given expr

func GetDimensionDataType added in v0.0.2

func GetDimensionDataType(expression expr.Expr) memCom.DataType

GetDimensionDataType gets DataType for given expr

func GetDimensionStartOffsets

func GetDimensionStartOffsets(numDimsPerDimWidth DimCountsPerDimWidth, dimIndex int, length int) (valueOffset, nullOffset int)

GetDimensionStartOffsets calculates the value and null starting position for given dimension inside dimension vector dimIndex is the ordered index of given dimension inside the dimension vector

func ParseTimezone added in v0.0.2

func ParseTimezone(timezone string) (*time.Location, error)

ParseTimezone parses timezone

func ReadDimension

func ReadDimension(valueStart, nullStart unsafe.Pointer,
	index int, dataType memCom.DataType, enumReverseDict []string, meta *TimeDimensionMeta, cache map[TimeDimensionMeta]map[int64]string) *string

ReadDimension reads a dimension value given the index and corresponding data type of node. tzRemedy is used to remedy the timezone offset

Types

type AQLQuery added in v0.0.2

type AQLQuery struct {
	// Name of the main table.
	Table string `json:"table"`

	// Shards of the query
	// If empty then all shards of the table
	// owned by they host will be queried
	Shards []int `json:"shards"`

	// Foreign tables to be joined.
	Joins []Join `json:"joins,omitempty"`

	// Dimensions to group by on.
	Dimensions []Dimension `json:"dimensions,omitempty"`

	// Measures/metrics to report.
	Measures []Measure `json:"measures"`

	// Row level filters to apply for all measures. The filters are ANDed together.
	Filters       []string    `json:"rowFilters,omitempty"`
	FiltersParsed []expr.Expr `json:"-"`

	// Syntax sugar for specifying a time based range filter.
	TimeFilter TimeFilter `json:"timeFilter,omitempty"`

	// Additional supporting dimensions, these dimensions will not be grouped by,
	// but they may be referenced in Dimensions, Measures, SupportingDimensions and SupportingMeasures.
	SupportingDimensions []Dimension `json:"supportingDimensions,omitempty"`
	// Additional supporting measures, these measures will not be reported,
	// but they may be referenced in Measures and SupportingMeasures.
	SupportingMeasures []Measure `json:"supportingMeasures,omitempty"`

	// Timezone to use when converting timestamp to calendar time, specified as:
	//   - -8:00
	//   - GMT
	//   - America/Los_Angeles
	//   - timezone(city_id)
	//   - region_timezone(city_id)
	//   - mega_region_timezone(city_id)
	//   - sub_region_timezone(city_id)
	//   - country_timezone(city_id)
	Timezone string `json:"timezone,omitempty"`

	// This overrides "now" (in seconds)
	Now int64 `json:"now,omitempty"`

	// Limit is the max number of rows need to be return, and only used for non-aggregation
	Limit int `json:"limit,omitempty"`

	Sorts []SortField `json:"sorts,omitempty" yaml:"sorts"`

	// SQLQuery
	SQLQuery string `json:"sql,omitempty"`
}

AQLQuery specifies the query on top of tables.

type AQLQueryResult added in v0.0.2

type AQLQueryResult map[string]interface{}

AQLQueryResult represents final result of one AQL query

It has 2 possible formats: Time series result format: One dimension on each layer:

  • there is always an outermost time dimension. it stores the start time of the bucket/duration (in seconds since Epoch).
  • after the time dimension, there could be zero or more layers of additional dimensions (all values are represented as strings). a special "NULL" string

/ is used to represent NULL values.

  • there is always a single measure, and the measure type is either float64 or nil (not *float64);

Non aggregate query result format:

  • there will be a "headers" key, value will be a list of column names
  • there will be a "matrixData" key, value will be a 2d arary of values (row formated)

user should use it as only 1 of the 2 formats consistently

func ComputeHLLResult added in v0.0.2

func ComputeHLLResult(result AQLQueryResult) AQLQueryResult

ComputeHLLResult computes hll result

func NewTimeSeriesHLLResult

func NewTimeSeriesHLLResult(buffer []byte, magicHeader uint32, ignoreEnum bool) (AQLQueryResult, error)

NewTimeSeriesHLLResult creates a new NewTimeSeriesHLLResult and deserialize the buffer into the result.

func ParseHLLQueryResults

func ParseHLLQueryResults(data []byte, ignoreEnum bool) (queryResults []AQLQueryResult, queryErrors []error, err error)

ParseHLLQueryResults will parse the response body into a slice of query results and a slice of errors.

func (AQLQueryResult) Append added in v0.0.2

func (r AQLQueryResult) Append(dimValues []*string)

func (AQLQueryResult) Set added in v0.0.2

func (r AQLQueryResult) Set(dimValues []*string, measureValue *float64)

Set measure value for dimensions

func (AQLQueryResult) SetHLL added in v0.0.2

func (r AQLQueryResult) SetHLL(dimValues []*string, hll HLL)

SetHLL sets hll struct to be the leaves of the nested map.

func (AQLQueryResult) SetHeaders added in v0.0.2

func (r AQLQueryResult) SetHeaders(headers []string)

SetHeaders sets headers field for the results

type AQLRequest added in v0.0.2

type AQLRequest struct {
	Queries []AQLQuery `json:"queries"`
}

AQLRequest contains multiple of AQLQueries.

type AQLResponse added in v0.0.2

type AQLResponse struct {
	Results      []AQLQueryResult `json:"results"`
	Errors       []error          `json:"errors,omitempty"`
	QueryContext []string         `json:"context,omitempty"`
}

AQLResponse contains results for multiple AQLQueries.

type AlignedTime added in v0.0.2

type AlignedTime struct {
	Time time.Time `json:"time"`
	// Values for unit: y, q, M, w, d, {12, 8, 6, 4, 3, 2}h, h, {30, 20, 15, 12, 10, 6, 5, 4, 3, 2}m, m
	Unit string `json:"unit"`
}

AlignedTime is time that is calendar aligned to the unit.

func ParseTimeFilter added in v0.0.2

func ParseTimeFilter(filter TimeFilter, loc *time.Location, now time.Time) (from, to *AlignedTime, err error)

ParseTimeFilter parses time filter

type DimCountsPerDimWidth

type DimCountsPerDimWidth [5]uint8

DimCountsPerDimWidth defines dimension counts per dimension width 16-byte 8-byte 4-byte 2-byte 1-byte

type Dimension added in v0.0.2

type Dimension struct {
	// Alias/name of the dimension, to be referenced by other dimensions and measures.
	Alias string `json:"alias,omitempty"`
	// The SQL expression for computing the dimension.
	// Expr can be empty when TimeBucketizer is specified, which implies the
	// designated time column from the main table is used as the expresssion.
	Expr       string    `json:"sqlExpression"`
	ExprParsed expr.Expr `json:"-"`

	// Decides how to bucketize a timestamp Dimension before grouping by.
	// See https://github.com/uber/aresdb/wiki/aql#time_bucketizer
	TimeBucketizer string `json:"timeBucketizer,omitempty"`

	TimeUnit string `json:"timeUnit,omitempty"`

	// Bucketizes numeric dimensions for integers and floating point numbers.
	NumericBucketizer NumericBucketizerDef `json:"numericBucketizer,omitempty"`
}

Dimension specifies a row level dimension for grouping by.

func (Dimension) IsTimeDimension added in v0.0.2

func (d Dimension) IsTimeDimension() bool

type HLL

type HLL struct {
	SparseData       []HLLRegister // Unsorted registers.
	DenseData        []byte        // Rho by register index.
	NonZeroRegisters uint16
}

HLL stores only the dense data for now.

func (*HLL) Compute

func (hll *HLL) Compute() float64

Compute computes the result of the HLL.

func (*HLL) ConvertToDense

func (hll *HLL) ConvertToDense()

ConvertToDense converts the HLL to dense format.

func (*HLL) ConvertToSparse

func (hll *HLL) ConvertToSparse() bool

ConvertToSparse try converting the hll to sparse format if it turns out to be cheaper.

func (*HLL) Decode

func (hll *HLL) Decode(data []byte)

Decode decodes the HLL from cache cache. Interprets as dense or sparse format based on len(data).

func (*HLL) Encode

func (hll *HLL) Encode() []byte

Encode encodes the HLL for cache storage. Dense format will have a length of 1<<hllP. Sparse format will have a smaller length

func (*HLL) EncodeBinary added in v0.0.2

func (hll *HLL) EncodeBinary() []byte

EncodeBinary converts HLL to binary format aligns to 4 bytes for sparse hll used to build response for application/hll queries from HLL struct

func (*HLL) Merge

func (hll *HLL) Merge(other HLL)

Merge merges (using max(rho)) the other HLL (sparse or dense) into this one (will be converted to dense).

func (*HLL) Set

func (hll *HLL) Set(index uint16, rho byte)

Set sets rho for the specified register index. Caller must ensure that each register is set no more than once.

type HLLData

type HLLData struct {
	NumDimsPerDimWidth             DimCountsPerDimWidth
	ResultSize                     uint32
	PaddedRawDimValuesVectorLength uint32
	PaddedHLLVectorLength          int64

	DimIndexes []int
	DataTypes  []memCom.DataType
	// map from dimension index => enum cases. It will
	// only include columns used in dimensions.
	EnumDicts map[int][]string
}

HLLData stores fields for serialize and deserialize an hyperloglog query result when client sets Content-Accept header to be application/hll. The serialized buffer of a hll data is in following format:

 [uint32] magic_number [uint32] padding

-----------query result 0-------------------
 <header>
 [uint32] query result 0 size [uint8] error or result [3 bytes padding]
 [uint8] num_enum_columns [uint8] bytes per dim ... [padding for 8 bytes]
 [uint32] result_size [uint32] raw_dim_values_vector_length
 [uint8] dim_index_0... [uint8] dim_index_n [padding for 8 bytes]
 [uint32] data_type_0...[uint32] data_type_n [padding for 8 bytes]

 <enum cases 0>
 [uint32_t] number of bytes of enum cases [uint16] dim_index [2 bytes: padding]
 <enum values 0> delimited by "\u0000\n" [padding for 8 bytes]
 <end of header>
 <raw dim values vector>
 ...
 [padding for 8 byte alignment]

 <raw hll dense vector>
 ...
------------error 1----------
 [uint32] query result 1 size  [uint8] error or result [3 bytes padding]
...

func (*HLLData) CalculateSizes

func (data *HLLData) CalculateSizes() (uint32, int64)

CalculateSizes returns the header size and total size of used by this hll data.

type HLLDataWriter added in v0.0.2

type HLLDataWriter struct {
	HLLData
	Buffer []byte
}

HLLDataWriter is the struct to serialize HLL Data struct.

func (*HLLDataWriter) SerializeHeader added in v0.0.2

func (builder *HLLDataWriter) SerializeHeader() error

SerializeHeader serialize HLL header

-----------query result 0-------------------
 <header>
 [uint8] num_enum_columns [uint8] bytes per dim ... [padding for 8 bytes]
 [uint32] result_size [uint32] raw_dim_values_vector_length
 [uint8] dim_index_0... [uint8] dim_index_n [padding for 8 bytes]
 [uint32] data_type_0...[uint32] data_type_n [padding for 8 bytes]

 <enum cases 0>
 [uint32_t] number of bytes of enum cases [uint16] column_index [2 bytes: padding]
 <enum values 0> delimited by "\u0000\n" [padding for 8 bytes]

 <end of header>

type HLLQueryResults added in v0.0.2

type HLLQueryResults struct {
	// contains filtered or unexported fields
}

HLLQueryResults holds the buffer to store multiple hll query results or errors.

func NewHLLQueryResults added in v0.0.2

func NewHLLQueryResults() *HLLQueryResults

NewHLLQueryResults returns a new NewHLLQueryResults and writes the magical header and padding to underlying buffer.

func (*HLLQueryResults) GetBytes added in v0.0.2

func (r *HLLQueryResults) GetBytes() []byte

GetBytes returns the underlying bytes.

func (*HLLQueryResults) WriteError added in v0.0.2

func (r *HLLQueryResults) WriteError(err error)

WriteError write error to the buffer.

func (*HLLQueryResults) WriteResult added in v0.0.2

func (r *HLLQueryResults) WriteResult(result []byte)

WriteResult write result to the buffer.

type HLLRegister

type HLLRegister struct {
	Index uint16 `json:"index"`
	Rho   byte   `json:"rho"`
}

HLLRegister is the register used in the sparse representation.

type Join added in v0.0.2

type Join struct {
	// Name of the table to join against.
	Table string `json:"table"`

	// Alias for the table. Empty means the table name will be used as alias.
	Alias string `json:"alias"`

	// Condition expressions to be ANDed together for the join.
	Conditions       []string    `json:"conditions"`
	ConditionsParsed []expr.Expr `json:"-"`
}

Join specifies a secondary table to be explicitly joined in the query.

type Measure added in v0.0.2

type Measure struct {
	// Alias/name of the measure, to be referenced by other (derived) measures.
	Alias string `json:"alias,omitempty"`
	// The SQL expression for computing the measure.
	Expr       string    `json:"sqlExpression"`
	ExprParsed expr.Expr `json:"-"`

	// Row level filters to apply for this measure.
	// The filters are ANDed togther.
	Filters       []string    `json:"rowFilters,omitempty"`
	FiltersParsed []expr.Expr `json:"-"`
}

Measure specifies a group level aggregation measure.

type NumericBucketizerDef added in v0.0.2

type NumericBucketizerDef struct {

	// Generates equal-width buckets. BucketWidth should be positive.
	// The generated buckets are:
	// ... [-2w, -w), [-w, 0), [0, w), [w, 2w) ...
	BucketWidth float64 `json:"bucketWidth,omitempty"`

	// Generates exponential/log buckets. LogBase should be positive.
	// The generated buckets are:
	// ... [pow(b, -2), pow(b, -1)), [pow(b, -1), 1), [1, pow(b, 1)), [pow(b, 1), pow(b, 2)) ...
	LogBase float64 `json:"logBase,omitempty"`

	// Generates a fixed number of buckets using the specified partitions.
	// The numbers should be in sorted order. The generated buckets are:
	// [-inf, p0), [p0, p1), [p1, p2), ... [pn-1, inf)
	ManualPartitions []float64 `json:"manualPartitions,omitempty"`
}

NumericBucketizerDef defines how numbers should be bucketized before being grouped by as a dimension. The returned dimension is a string in the format of `lower_bound`, representing `[lower_bound, uper_bound)`.

type SortField added in v0.0.2

type SortField struct {
	// Name or alias of the field
	Name string `json:"name"`

	// Order the column, will be asc or desc
	Order string `json:"order"`
}

SortField represents a field to sort results by.

type TimeDimensionMeta

type TimeDimensionMeta struct {
	TimeBucketizer  string
	TimeUnit        string
	IsTimezoneTable bool
	TimeZone        *time.Location
	DSTSwitchTs     int64
	FromOffset      int
	ToOffset        int
}

TimeDimensionMeta is the aggregation of meta data needed to format time dimensions

type TimeFilter added in v0.0.2

type TimeFilter struct {
	// A table time column in the format of column, or table_alias.column.
	// When empty, it defaults to the designated time column of the main table.
	Column string `json:"column"`

	// The time specified in from and to are both inclusive.
	// See https://github.com/uber/aresdb/wiki/aql#time_filter
	From string `json:"from"`
	To   string `json:"to"`
}

TimeFilter is a syntax sugar for specifying time range.

type TimeSeriesBucketizer

type TimeSeriesBucketizer struct {
	Size int
	Unit string
}

TimeSeriesBucketizer is the helper struct to express parsed time bucketizer, see comment below

func ParseRegularTimeBucketizer

func ParseRegularTimeBucketizer(timeBucketizerString string) (TimeSeriesBucketizer, error)

ParseRegularTimeBucketizer tries to convert a regular time bucketizer(anything below month) input string to a (Size, Unit) pair, reports error if input is invalid/unsupported. e.g. "3m" -> (3, "m") "4 hours" -> (4, "h")

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL