layout

package
v1.6.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 5, 2021 License: Apache-2.0 Imports: 14 Imported by: 43

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func DecodeDictChunk

func DecodeDictChunk(chunk *Chunk)

Decode a dict chunk

func ReadDataPageValues

func ReadDataPageValues(bytesReader *bytes.Reader, encodingMethod parquet.Encoding, dataType parquet.Type, convertedType parquet.ConvertedType, cnt uint64, bitWidth uint64) ([]interface{}, error)

Read data page values

func ReadPageHeader

func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)

Read page header

Types

type Chunk

type Chunk struct {
	Pages       []*Page
	ChunkHeader *parquet.ColumnChunk
}

Chunk stores the ColumnChunk in parquet file

func PagesToChunk

func PagesToChunk(pages []*Page) *Chunk

Convert several pages to one chunk

func PagesToDictChunk

func PagesToDictChunk(pages []*Page) *Chunk

Convert several pages to one chunk with dict page first

func ReadChunk

func ReadChunk(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, chunkHeader *parquet.ColumnChunk) (*Chunk, error)

Read one chunk from parquet file (Deprecated)

type DictRecType

type DictRecType struct {
	DictMap   map[interface{}]int32
	DictSlice []interface{}
	Type      parquet.Type
}

func NewDictRec

func NewDictRec(pT parquet.Type) *DictRecType

type Page

type Page struct {
	//Header of a page
	Header *parquet.PageHeader
	//Table to store values
	DataTable *Table
	//Compressed data of the page, which is written in parquet file
	RawData []byte
	//Compress type: gzip/snappy/zstd/none
	CompressType parquet.CompressionCodec
	//Schema
	Schema *parquet.SchemaElement
	//Path in schema(include the root)
	Path []string
	//Maximum of the values
	MaxVal interface{}
	//Minimum of the values
	MinVal interface{}
	//NullCount
	NullCount *int64
	//Tag info
	Info *common.Tag

	PageSize int32
}

Page is used to store the page data

func DictRecToDictPage

func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)

func NewDataPage

func NewDataPage() *Page

Create a new data page

func NewDictPage

func NewDictPage() *Page

Create a new dict page

func NewPage

func NewPage() *Page

Create a new page

func ReadPage

func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)

Read page from parquet file

func ReadPage2

func ReadPage2(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)

This is a test function

func ReadPageRawData

func ReadPageRawData(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, error)

Read page RawData

func TableToDataPages

func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)

Convert a table to data pages

func TableToDictDataPages

func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, compressType parquet.CompressionCodec) ([]*Page, int64)

Convert a table to dict data pages

func (*Page) DataPageCompress

func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte

Compress the data page to parquet file

func (*Page) DataPageV2Compress

func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte

Compress data page v2 to parquet file

func (*Page) Decode

func (page *Page) Decode(dictPage *Page)

Decode dict page

func (*Page) DictDataPageCompress

func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32, values []int32) []byte

Compress the data page to parquet file

func (*Page) DictPageCompress

func (page *Page) DictPageCompress(compressType parquet.CompressionCodec, pT parquet.Type) []byte

Compress the dict page to parquet file

func (*Page) EncodingValues

func (page *Page) EncodingValues(valuesBuf []interface{}) []byte

Encoding values

func (*Page) GetRLDLFromRawData

func (p *Page) GetRLDLFromRawData(schemaHandler *schema.SchemaHandler) (int64, int64, error)

Get RepetitionLevels and Definitions from RawData

func (*Page) GetValueFromRawData

func (p *Page) GetValueFromRawData(schemaHandler *schema.SchemaHandler) error

Get values from raw data

type RowGroup

type RowGroup struct {
	Chunks         []*Chunk
	RowGroupHeader *parquet.RowGroup
}

RowGroup stores the RowGroup in parquet file

func NewRowGroup

func NewRowGroup() *RowGroup

Create a RowGroup

func ReadRowGroup

func ReadRowGroup(rowGroupHeader *parquet.RowGroup, PFile source.ParquetFile, schemaHandler *schema.SchemaHandler, NP int64) (*RowGroup, error)

Read one RowGroup from parquet file (Deprecated)

func (*RowGroup) RowGroupToTableMap

func (rowGroup *RowGroup) RowGroupToTableMap() *map[string]*Table

Convert a RowGroup to table map

type Table

type Table struct {
	//Repetition type of the values: REQUIRED/OPTIONAL/REPEATED
	RepetitionType parquet.FieldRepetitionType
	//Schema
	Schema *parquet.SchemaElement
	//Path of this column
	Path []string
	//Maximum of definition levels
	MaxDefinitionLevel int32
	//Maximum of repetition levels
	MaxRepetitionLevel int32

	//Parquet values
	Values []interface{}
	//Definition Levels slice
	DefinitionLevels []int32
	//Repetition Levels slice
	RepetitionLevels []int32

	//Tag info
	Info *common.Tag
}

Table is the core data structure used to store the values

func NewEmptyTable

func NewEmptyTable() *Table

func NewTableFromTable

func NewTableFromTable(src *Table) *Table

func (*Table) Merge

func (t *Table) Merge(tables ...*Table)

Merge several tables to one table(the first table)

func (*Table) Pop

func (t *Table) Pop(numRows int64) *Table

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL