chunk

package
v0.1.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 16, 2022 License: Apache-2.0, Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	InitialCapacity = 32
)

Variables

This section is empty.

Functions

func Compare

func Compare(row Row, colIdx int, ad *types.Datum) int

Compare compares the value with ad. We assume that the collation information of the column is the same with the datum.

func EstimateTypeWidth

func EstimateTypeWidth(colType *types.FieldType) int

EstimateTypeWidth estimates the average width of values of the type. This is used by the planner, which doesn't require absolutely correct results; it's OK (and expected) to guess if we don't know for sure.

mostly study from https://github.com/postgres/postgres/blob/REL_12_STABLE/src/backend/utils/cache/lsyscache.c#L2356

func GetFixedLen

func GetFixedLen(colType *types.FieldType) int

GetFixedLen get the memory size of a fixed-length type. if colType is not fixed-length, it returns varElemLen, aka -1.

Types

type Chunk

type Chunk struct {
	// contains filtered or unexported fields
}

Chunk stores multiple rows of data in Apache Arrow format. See https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout Values are appended in compact format and can be directly accessed without decoding. When the chunk is done processing, we can reuse the allocated memory by resetting it.

func New

func New(fields []*types.FieldType, cap, maxChunkSize int) *Chunk

New creates a new chunk.

cap: the limit for the max number of rows.
maxChunkSize: the max limit for the number of rows.

func NewChunkWithCapacity

func NewChunkWithCapacity(fields []*types.FieldType, cap int) *Chunk

NewChunkWithCapacity creates a new chunk with field types and capacity.

func (*Chunk) Append

func (c *Chunk) Append(other *Chunk, begin, end int)

Append appends rows in [begin, end) in another Chunk to a Chunk.

func (*Chunk) AppendBytes

func (c *Chunk) AppendBytes(colIdx int, b []byte)

AppendBytes appends a bytes value to the chunk.

func (*Chunk) AppendDatum

func (c *Chunk) AppendDatum(colIdx int, d *types.Datum)

AppendDatum appends a datum into the chunk.

func (*Chunk) AppendDuration

func (c *Chunk) AppendDuration(colIdx int, dur types.Duration)

AppendDuration appends a Duration value to the chunk.

func (*Chunk) AppendEnum

func (c *Chunk) AppendEnum(colIdx int, enum types.Enum)

AppendEnum appends an Enum value to the chunk.

func (*Chunk) AppendFloat32

func (c *Chunk) AppendFloat32(colIdx int, f float32)

AppendFloat32 appends a float32 value to the chunk.

func (*Chunk) AppendFloat64

func (c *Chunk) AppendFloat64(colIdx int, f float64)

AppendFloat64 appends a float64 value to the chunk.

func (*Chunk) AppendInt64

func (c *Chunk) AppendInt64(colIdx int, i int64)

AppendInt64 appends a int64 value to the chunk.

func (*Chunk) AppendJSON

func (c *Chunk) AppendJSON(colIdx int, j json.BinaryJSON)

AppendJSON appends a JSON value to the chunk.

func (*Chunk) AppendMyDecimal

func (c *Chunk) AppendMyDecimal(colIdx int, dec *types.MyDecimal)

AppendMyDecimal appends a MyDecimal value to the chunk.

func (*Chunk) AppendNull

func (c *Chunk) AppendNull(colIdx int)

AppendNull appends a null value to the chunk.

func (*Chunk) AppendPartialRow

func (c *Chunk) AppendPartialRow(colOff int, row Row)

AppendPartialRow appends a row to the chunk.

func (*Chunk) AppendPartialRowByColIdxs

func (c *Chunk) AppendPartialRowByColIdxs(colOff int, row Row, colIdxs []int) (wide int)

AppendPartialRowByColIdxs appends a row by its colIdxs to the chunk. 1. every columns are used if colIdxs is nil. 2. no columns are used if colIdxs is not nil but the size of colIdxs is 0.

func (*Chunk) AppendPartialRows

func (c *Chunk) AppendPartialRows(colOff int, rows []Row)

AppendPartialRows appends multiple rows to the chunk.

func (*Chunk) AppendRow

func (c *Chunk) AppendRow(row Row)

AppendRow appends a row to the chunk.

func (*Chunk) AppendRowByColIdxs

func (c *Chunk) AppendRowByColIdxs(row Row, colIdxs []int) (wide int)

AppendRowByColIdxs appends a row by its colIdxs to the chunk. 1. every columns are used if colIdxs is nil. 2. no columns are used if colIdxs is not nil but the size of colIdxs is 0.

func (*Chunk) AppendRows

func (c *Chunk) AppendRows(rows []Row)

AppendRows appends multiple rows to the chunk.

func (*Chunk) AppendSet

func (c *Chunk) AppendSet(colIdx int, set types.Set)

AppendSet appends a Set value to the chunk.

func (*Chunk) AppendString

func (c *Chunk) AppendString(colIdx int, str string)

AppendString appends a string value to the chunk.

func (*Chunk) AppendTime

func (c *Chunk) AppendTime(colIdx int, t types.Time)

AppendTime appends a Time value to the chunk.

func (*Chunk) AppendUint64

func (c *Chunk) AppendUint64(colIdx int, u uint64)

AppendUint64 appends a uint64 value to the chunk.

func (*Chunk) Capacity

func (c *Chunk) Capacity() int

Capacity returns the capacity of the Chunk.

func (*Chunk) Column

func (c *Chunk) Column(colIdx int) *Column

Column returns the specific column.

func (*Chunk) CopyConstruct

func (c *Chunk) CopyConstruct() *Chunk

CopyConstruct creates a new chunk and copies this chunk's data into it.

func (*Chunk) CopyConstructSel

func (c *Chunk) CopyConstructSel() *Chunk

CopyConstructSel is just like CopyConstruct, but ignore the rows that was not selected.

func (*Chunk) GetRow

func (c *Chunk) GetRow(idx int) Row

GetRow gets the Row in the chunk with the row index.

func (*Chunk) GrowAndReset

func (c *Chunk) GrowAndReset(maxChunkSize int)

GrowAndReset resets the Chunk and doubles the capacity of the Chunk. The doubled capacity should not be larger than maxChunkSize. TODO: this method will be used in following PR.

func (*Chunk) IsFull

func (c *Chunk) IsFull() bool

IsFull returns if this chunk is considered full.

func (*Chunk) LowerBound

func (c *Chunk) LowerBound(colIdx int, d *types.Datum) (index int, match bool)

LowerBound searches on the non-decreasing Column colIdx, returns the smallest index i such that the value at row i is not less than `d`.

func (*Chunk) MakeRef

func (c *Chunk) MakeRef(srcColIdx, dstColIdx int)

MakeRef makes Column in "dstColIdx" reference to Column in "srcColIdx".

func (*Chunk) MakeRefTo

func (c *Chunk) MakeRefTo(dstColIdx int, src *Chunk, srcColIdx int) error

MakeRefTo copies columns `src.columns[srcColIdx]` to `c.columns[dstColIdx]`.

func (*Chunk) MemoryUsage

func (c *Chunk) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a Chunk in bytes. We ignore the size of Column.length and Column.nullCount since they have little effect of the total memory usage.

func (*Chunk) NumCols

func (c *Chunk) NumCols() int

NumCols returns the number of columns in the chunk.

func (*Chunk) NumRows

func (c *Chunk) NumRows() int

NumRows returns the number of rows in the chunk.

func (*Chunk) Prune

func (c *Chunk) Prune(usedColIdxs []int) *Chunk

Prune creates a new Chunk according to `c` and prunes the columns whose index is not in `usedColIdxs`

func (*Chunk) Reconstruct

func (c *Chunk) Reconstruct()

Reconstruct removes all filtered rows in this Chunk.

func (*Chunk) RequiredRows

func (c *Chunk) RequiredRows() int

RequiredRows returns how many rows is considered full.

func (*Chunk) Reset

func (c *Chunk) Reset()

Reset resets the chunk, so the memory it allocated can be reused. Make sure all the data in the chunk is not used anymore before you reuse this chunk.

func (*Chunk) Sel

func (c *Chunk) Sel() []int

Sel returns Sel of this Chunk.

func (*Chunk) SetCol

func (c *Chunk) SetCol(colIdx int, col *Column) *Column

SetCol sets the colIdx Column to col and returns the old Column.

func (*Chunk) SetNumVirtualRows

func (c *Chunk) SetNumVirtualRows(numVirtualRows int)

SetNumVirtualRows sets the virtual row number for a Chunk. It should only be used when there exists no Column in the Chunk.

func (*Chunk) SetRequiredRows

func (c *Chunk) SetRequiredRows(requiredRows, maxChunkSize int) *Chunk

SetRequiredRows sets the number of required rows.

func (*Chunk) SetSel

func (c *Chunk) SetSel(sel []int)

SetSel sets a Sel for this Chunk.

func (*Chunk) SwapColumn

func (c *Chunk) SwapColumn(colIdx int, other *Chunk, otherIdx int) error

SwapColumn swaps Column "c.columns[colIdx]" with Column "other.columns[otherIdx]". If there exists columns refer to the Column to be swapped, we need to re-build the reference.

func (*Chunk) SwapColumns

func (c *Chunk) SwapColumns(other *Chunk)

SwapColumns swaps columns with another Chunk.

func (*Chunk) ToString

func (c *Chunk) ToString(ft []*types.FieldType) string

ToString returns all the values in a chunk.

func (*Chunk) TruncateTo

func (c *Chunk) TruncateTo(numRows int)

TruncateTo truncates rows from tail to head in a Chunk to "numRows" rows.

func (*Chunk) UpperBound

func (c *Chunk) UpperBound(colIdx int, d *types.Datum) int

UpperBound searches on the non-decreasing Column colIdx, returns the smallest index i such that the value at row i is larger than `d`.

type Codec

type Codec struct {
	// contains filtered or unexported fields
}

Codec is used to: 1. encode a Chunk to a byte slice. 2. decode a Chunk from a byte slice.

func NewCodec

func NewCodec(colTypes []*types.FieldType) *Codec

NewCodec creates a new Codec object for encode or decode a Chunk.

func (*Codec) Decode

func (c *Codec) Decode(buffer []byte) (*Chunk, []byte)

Decode decodes a Chunk from a byte slice, return the remained unused bytes.

func (*Codec) DecodeToChunk

func (c *Codec) DecodeToChunk(buffer []byte, chk *Chunk) (remained []byte)

DecodeToChunk decodes a Chunk from a byte slice, return the remained unused bytes.

func (*Codec) Encode

func (c *Codec) Encode(chk *Chunk) []byte

Encode encodes a Chunk to a byte slice.

type Column

type Column struct {
	// contains filtered or unexported fields
}

Column stores one column of data in Apache Arrow format. See https://arrow.apache.org/docs/format/Columnar.html#format-columnar

func NewColumn

func NewColumn(ft *types.FieldType, cap int) *Column

NewColumn creates a new column with the specific length and capacity.

func (*Column) AppendBytes

func (c *Column) AppendBytes(b []byte)

AppendBytes appends a byte slice into this Column.

func (*Column) AppendDuration

func (c *Column) AppendDuration(dur types.Duration)

AppendDuration appends a duration value into this Column.

func (*Column) AppendEnum

func (c *Column) AppendEnum(enum types.Enum)

AppendEnum appends a Enum value into this Column.

func (*Column) AppendFloat32

func (c *Column) AppendFloat32(f float32)

AppendFloat32 appends a float32 value into this Column.

func (*Column) AppendFloat64

func (c *Column) AppendFloat64(f float64)

AppendFloat64 appends a float64 value into this Column.

func (*Column) AppendInt64

func (c *Column) AppendInt64(i int64)

AppendInt64 appends an int64 value into this Column.

func (*Column) AppendJSON

func (c *Column) AppendJSON(j json.BinaryJSON)

AppendJSON appends a BinaryJSON value into this Column.

func (*Column) AppendMyDecimal

func (c *Column) AppendMyDecimal(dec *types.MyDecimal)

AppendMyDecimal appends a MyDecimal value into this Column.

func (*Column) AppendNull

func (c *Column) AppendNull()

AppendNull appends a null value into this Column.

func (*Column) AppendSet

func (c *Column) AppendSet(set types.Set)

AppendSet appends a Set value into this Column.

func (*Column) AppendString

func (c *Column) AppendString(str string)

AppendString appends a string value into this Column.

func (*Column) AppendTime

func (c *Column) AppendTime(t types.Time)

AppendTime appends a time value into this Column.

func (*Column) AppendUint64

func (c *Column) AppendUint64(u uint64)

AppendUint64 appends a uint64 value into this Column.

func (*Column) CopyConstruct

func (c *Column) CopyConstruct(dst *Column) *Column

CopyConstruct copies this Column to dst. If dst is nil, it creates a new Column and returns it.

func (*Column) CopyReconstruct

func (c *Column) CopyReconstruct(sel []int, dst *Column) *Column

CopyReconstruct copies this Column to dst and removes unselected rows. If dst is nil, it creates a new Column and returns it.

func (*Column) Decimals

func (c *Column) Decimals() []types.MyDecimal

Decimals returns a MyDecimal slice stored in this Column.

func (*Column) Float32s

func (c *Column) Float32s() []float32

Float32s returns a float32 slice stored in this Column.

func (*Column) Float64s

func (c *Column) Float64s() []float64

Float64s returns a float64 slice stored in this Column.

func (*Column) GetBytes

func (c *Column) GetBytes(rowID int) []byte

GetBytes returns the byte slice in the specific row.

func (*Column) GetDecimal

func (c *Column) GetDecimal(rowID int) *types.MyDecimal

GetDecimal returns the decimal in the specific row.

func (*Column) GetDuration

func (c *Column) GetDuration(rowID int, fillFsp int) types.Duration

GetDuration returns the Duration in the specific row.

func (*Column) GetEnum

func (c *Column) GetEnum(rowID int) types.Enum

GetEnum returns the Enum in the specific row.

func (*Column) GetFloat32

func (c *Column) GetFloat32(rowID int) float32

GetFloat32 returns the float32 in the specific row.

func (*Column) GetFloat64

func (c *Column) GetFloat64(rowID int) float64

GetFloat64 returns the float64 in the specific row.

func (*Column) GetInt64

func (c *Column) GetInt64(rowID int) int64

GetInt64 returns the int64 in the specific row.

func (*Column) GetJSON

func (c *Column) GetJSON(rowID int) json.BinaryJSON

GetJSON returns the JSON in the specific row.

func (*Column) GetRaw

func (c *Column) GetRaw(rowID int) []byte

GetRaw returns the underlying raw bytes in the specific row.

func (*Column) GetSet

func (c *Column) GetSet(rowID int) types.Set

GetSet returns the Set in the specific row.

func (*Column) GetString

func (c *Column) GetString(rowID int) string

GetString returns the string in the specific row.

func (*Column) GetTime

func (c *Column) GetTime(rowID int) types.Time

GetTime returns the Time in the specific row.

func (*Column) GetUint64

func (c *Column) GetUint64(rowID int) uint64

GetUint64 returns the uint64 in the specific row.

func (*Column) GoDurations

func (c *Column) GoDurations() []time.Duration

GoDurations returns a Golang time.Duration slice stored in this Column. Different from the Row.GetDuration method, the argument Fsp is ignored, so the user should handle it outside.

func (*Column) Int64s

func (c *Column) Int64s() []int64

Int64s returns an int64 slice stored in this Column.

func (*Column) IsNull

func (c *Column) IsNull(rowIdx int) bool

IsNull returns if this row is null.

func (*Column) MergeNulls

func (c *Column) MergeNulls(cols ...*Column)

MergeNulls merges these columns' null bitmaps. For a row, if any column of it is null, the result is null. It works like: if col1.IsNull || col2.IsNull || col3.IsNull. The caller should ensure that all these columns have the same length, and data stored in the result column is fixed-length type.

func (*Column) ReserveBytes

func (c *Column) ReserveBytes(n int)

ReserveBytes changes the column capacity to store n bytes elements and set the length to zero.

func (*Column) ReserveEnum

func (c *Column) ReserveEnum(n int)

ReserveEnum changes the column capacity to store n enum elements and set the length to zero.

func (*Column) ReserveJSON

func (c *Column) ReserveJSON(n int)

ReserveJSON changes the column capacity to store n JSON elements and set the length to zero.

func (*Column) ReserveSet

func (c *Column) ReserveSet(n int)

ReserveSet changes the column capacity to store n set elements and set the length to zero.

func (*Column) ReserveString

func (c *Column) ReserveString(n int)

ReserveString changes the column capacity to store n string elements and set the length to zero.

func (*Column) Reset

func (c *Column) Reset(eType types.EvalType)

Reset resets this Column according to the EvalType. Different from reset, Reset will reset the elemBuf.

func (*Column) ResizeDecimal

func (c *Column) ResizeDecimal(n int, isNull bool)

ResizeDecimal resizes the column so that it contains n decimal elements.

func (*Column) ResizeFloat32

func (c *Column) ResizeFloat32(n int, isNull bool)

ResizeFloat32 resizes the column so that it contains n float32 elements.

func (*Column) ResizeFloat64

func (c *Column) ResizeFloat64(n int, isNull bool)

ResizeFloat64 resizes the column so that it contains n float64 elements.

func (*Column) ResizeGoDuration

func (c *Column) ResizeGoDuration(n int, isNull bool)

ResizeGoDuration resizes the column so that it contains n duration elements.

func (*Column) ResizeInt64

func (c *Column) ResizeInt64(n int, isNull bool)

ResizeInt64 resizes the column so that it contains n int64 elements.

func (*Column) ResizeTime

func (c *Column) ResizeTime(n int, isNull bool)

ResizeTime resizes the column so that it contains n Time elements.

func (*Column) ResizeUint64

func (c *Column) ResizeUint64(n int, isNull bool)

ResizeUint64 resizes the column so that it contains n uint64 elements.

func (*Column) SetNull

func (c *Column) SetNull(rowIdx int, isNull bool)

SetNull sets the rowIdx to null.

func (*Column) SetNulls

func (c *Column) SetNulls(begin, end int, isNull bool)

SetNulls sets rows in [begin, end) to null.

func (*Column) SetRaw

func (c *Column) SetRaw(rowID int, bs []byte)

SetRaw sets the raw bytes for the rowIdx-th element. NOTE: Two conditions must be satisfied before calling this function: 1. The column should be stored with variable-length elements. 2. The length of the new element should be exactly the same as the old one.

func (*Column) Times

func (c *Column) Times() []types.Time

Times returns a Time slice stored in this Column.

func (*Column) Uint64s

func (c *Column) Uint64s() []uint64

Uint64s returns a uint64 slice stored in this Column.

type CompareFunc

type CompareFunc = func(l Row, lCol int, r Row, rCol int) int

CompareFunc is a function to compare the two values in Row, the two columns must have the same type.

func GetCompareFunc

func GetCompareFunc(tp *types.FieldType) CompareFunc

GetCompareFunc gets a compare function for the field type.

type Iterator

type Iterator interface {
	// Begin resets the cursor of the iterator and returns the first Row.
	Begin() Row

	// Next returns the next Row.
	Next() Row

	// End returns the invalid end Row.
	End() Row

	// Len returns the length.
	Len() int

	// Current returns the current Row.
	Current() Row

	// ReachEnd reaches the end of iterator.
	ReachEnd()

	// Error returns none-nil error if anything wrong happens during the iteration.
	Error() error
}

Iterator is used to iterate a number of rows.

for row := it.Begin(); row != it.End(); row = it.Next() {
    ...
}

type Iterator4Chunk

type Iterator4Chunk struct {
	// contains filtered or unexported fields
}

Iterator4Chunk is used to iterate rows inside a chunk.

func NewIterator4Chunk

func NewIterator4Chunk(chk *Chunk) *Iterator4Chunk

NewIterator4Chunk returns a iterator for Chunk.

func (*Iterator4Chunk) Begin

func (it *Iterator4Chunk) Begin() Row

Begin implements the Iterator interface.

func (*Iterator4Chunk) Current

func (it *Iterator4Chunk) Current() Row

Current implements the Iterator interface.

func (*Iterator4Chunk) End

func (it *Iterator4Chunk) End() Row

End implements the Iterator interface.

func (*Iterator4Chunk) Error

func (it *Iterator4Chunk) Error() error

Error returns none-nil error if anything wrong happens during the iteration.

func (*Iterator4Chunk) GetChunk

func (it *Iterator4Chunk) GetChunk() *Chunk

GetChunk returns the chunk stored in the Iterator4Chunk

func (*Iterator4Chunk) Len

func (it *Iterator4Chunk) Len() int

Len implements the Iterator interface

func (*Iterator4Chunk) Next

func (it *Iterator4Chunk) Next() Row

Next implements the Iterator interface.

func (*Iterator4Chunk) ReachEnd

func (it *Iterator4Chunk) ReachEnd()

ReachEnd implements the Iterator interface.

type Row

type Row struct {
	// contains filtered or unexported fields
}

Row represents a row of data, can be used to access values.

func (Row) Chunk

func (r Row) Chunk() *Chunk

Chunk returns the Chunk which the row belongs to.

func (Row) CopyConstruct

func (r Row) CopyConstruct() Row

CopyConstruct creates a new row and copies this row's data into it.

func (Row) GetBytes

func (r Row) GetBytes(colIdx int) []byte

GetBytes returns the bytes value with the colIdx.

func (Row) GetDatum

func (r Row) GetDatum(colIdx int, tp *types.FieldType) types.Datum

GetDatum implements the chunk.Row interface.

func (Row) GetDatumRow

func (r Row) GetDatumRow(fields []*types.FieldType) []types.Datum

GetDatumRow converts chunk.Row to types.DatumRow. Keep in mind that GetDatumRow has a reference to r.c, which is a chunk, this function works only if the underlying chunk is valid or unchanged.

func (Row) GetDuration

func (r Row) GetDuration(colIdx int, fillFsp int) types.Duration

GetDuration returns the Duration value with the colIdx.

func (Row) GetEnum

func (r Row) GetEnum(colIdx int) types.Enum

GetEnum returns the Enum value with the colIdx.

func (Row) GetFloat32

func (r Row) GetFloat32(colIdx int) float32

GetFloat32 returns the float32 value with the colIdx.

func (Row) GetFloat64

func (r Row) GetFloat64(colIdx int) float64

GetFloat64 returns the float64 value with the colIdx.

func (Row) GetInt64

func (r Row) GetInt64(colIdx int) int64

GetInt64 returns the int64 value with the colIdx.

func (Row) GetJSON

func (r Row) GetJSON(colIdx int) json.BinaryJSON

GetJSON returns the JSON value with the colIdx.

func (Row) GetMyDecimal

func (r Row) GetMyDecimal(colIdx int) *types.MyDecimal

GetMyDecimal returns the MyDecimal value with the colIdx.

func (Row) GetRaw

func (r Row) GetRaw(colIdx int) []byte

GetRaw returns the underlying raw bytes with the colIdx.

func (Row) GetSet

func (r Row) GetSet(colIdx int) types.Set

GetSet returns the Set value with the colIdx.

func (Row) GetString

func (r Row) GetString(colIdx int) string

GetString returns the string value with the colIdx.

func (Row) GetTime

func (r Row) GetTime(colIdx int) types.Time

GetTime returns the Time value with the colIdx.

func (Row) GetUint64

func (r Row) GetUint64(colIdx int) uint64

GetUint64 returns the uint64 value with the colIdx.

func (Row) Idx

func (r Row) Idx() int

Idx returns the row index of Chunk.

func (Row) IsEmpty

func (r Row) IsEmpty() bool

IsEmpty returns true if the Row is empty.

func (Row) IsNull

func (r Row) IsNull(colIdx int) bool

IsNull returns if the datum in the chunk.Row is null.

func (Row) Len

func (r Row) Len() int

Len returns the number of values in the row.

func (Row) ToString

func (r Row) ToString(ft []*types.FieldType) string

ToString returns all the values in a row.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL