layout

package
v0.0.0-...-6331073 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 18, 2020 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ReadDataPageValues

func ReadDataPageValues(bytesReader *bytes.Reader, encodingMethod parquet.Encoding, dataType parquet.Type, cnt uint64, bitWidth uint64) ([]interface{}, error)

Read data page values

func ReadPageHeader

func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)

Read page header

Types

type Chunk

type Chunk struct {
	Pages       []*Page
	ChunkHeader *parquet.ColumnChunk
}

Chunk stores the ColumnChunk in parquet file

func PagesToChunk

func PagesToChunk(pages []*Page) *Chunk

Convert several pages to one chunk

func PagesToDictChunk

func PagesToDictChunk(pages []*Page) *Chunk

Convert several pages to one chunk with dict page first

type DictRecType

type DictRecType struct {
	DictMap   map[interface{}]int32
	DictSlice []interface{}
	Type      parquet.Type
}

func NewDictRec

func NewDictRec(pT parquet.Type) *DictRecType

type Page

type Page struct {
	//Header of a page
	Header *parquet.PageHeader
	//Table to store values
	DataTable *Table
	//Compressed data of the page, which is written in parquet file
	RawData []byte
	//Compress type: gzip/snappy/zstd/none
	CompressType parquet.CompressionCodec
	//Schema
	Schema *parquet.SchemaElement
	//Path in schema(include the root)
	Path []string
	//Maximum of the values
	MaxVal interface{}
	//Minimum of the values
	MinVal interface{}
	// contains filtered or unexported fields
}

Page is used to store the page data

func DictRecToDictPage

func DictRecToDictPage(dictRec *DictRecType, compressType parquet.CompressionCodec) (*Page, int64)

func NewDataPage

func NewDataPage() *Page

Create a new data page

func NewDictPage

func NewDictPage() *Page

Create a new dict page

func ReadPage

func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)

Read page from parquet file

func ReadPageRawData

func ReadPageRawData(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, error)

Read page RawData

func TableToDataPages

func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)

Convert a table to data pages

func TableToDictDataPages

func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, compressType parquet.CompressionCodec) ([]*Page, int64)

Convert a table to dict data pages

func (*Page) DataPageCompress

func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte

Compress the data page to parquet file

func (*Page) Decode

func (page *Page) Decode(dictPage *Page)

Decode dict page

func (*Page) DictDataPageCompress

func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32, values []int32) []byte

Compress the data page to parquet file

func (*Page) DictPageCompress

func (page *Page) DictPageCompress(compressType parquet.CompressionCodec, pT parquet.Type) []byte

Compress the dict page to parquet file

func (*Page) EncodingValues

func (page *Page) EncodingValues(valuesBuf []interface{}) []byte

Encoding values

func (*Page) GetRLDLFromRawData

func (self *Page) GetRLDLFromRawData(schemaHandler *schema.SchemaHandler) (int64, int64, error)

Get RepetitionLevels and Definitions from RawData

func (*Page) GetValueFromRawData

func (self *Page) GetValueFromRawData(schemaHandler *schema.SchemaHandler) error

Get values from raw data

func (*Page) UseDictionaryEncoding

func (page *Page) UseDictionaryEncoding() bool

type RowGroup

type RowGroup struct {
	Chunks         []*Chunk
	RowGroupHeader *parquet.RowGroup
}

RowGroup stores the RowGroup in parquet file

func NewRowGroup

func NewRowGroup() *RowGroup

Create a RowGroup

type Table

type Table struct {
	//Repetition type of the values: REQUIRED/OPTIONAL/REPEATED
	RepetitionType parquet.FieldRepetitionType
	//Schema
	Schema *parquet.SchemaElement
	//Path of this column
	Path []string
	//Maximum of definition levels
	MaxDefinitionLevel int32
	//Maximum of repetition levels
	MaxRepetitionLevel int32

	//Parquet values
	Values []interface{}
	//Definition Levels slice
	DefinitionLevels []int32
	//Repetition Levels slice
	RepetitionLevels []int32

	//Tag info
	Info *common.Tag
}

Table is the core data structure used to store the values

func NewEmptyTable

func NewEmptyTable() *Table

func NewTableFromTable

func NewTableFromTable(src *Table) *Table

func (*Table) Merge

func (self *Table) Merge(tables ...*Table)

Merge several tables to one table(the first table)

func (*Table) Pop

func (self *Table) Pop(numRows int64) *Table

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL