reader

package
v1.6.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 5, 2021 License: Apache-2.0 Imports: 14 Imported by: 145

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ColumnBufferType

type ColumnBufferType struct {
	PFile        source.ParquetFile
	ThriftReader *thrift.TBufferedTransport

	Footer        *parquet.FileMetaData
	SchemaHandler *schema.SchemaHandler

	PathStr       string
	RowGroupIndex int64
	ChunkHeader   *parquet.ColumnChunk

	ChunkReadValues int64

	DictPage *layout.Page

	DataTable        *layout.Table
	DataTableNumRows int64
}

func NewColumnBuffer

func NewColumnBuffer(pFile source.ParquetFile, footer *parquet.FileMetaData, schemaHandler *schema.SchemaHandler, pathStr string) (*ColumnBufferType, error)

func (*ColumnBufferType) NextRowGroup

func (cbt *ColumnBufferType) NextRowGroup() error

func (*ColumnBufferType) ReadPage

func (cbt *ColumnBufferType) ReadPage() error

func (*ColumnBufferType) ReadPageForSkip

func (cbt *ColumnBufferType) ReadPageForSkip() (*layout.Page, error)

func (*ColumnBufferType) ReadRows

func (cbt *ColumnBufferType) ReadRows(num int64) (*layout.Table, int64)

func (*ColumnBufferType) SkipRows

func (cbt *ColumnBufferType) SkipRows(num int64) int64

type ParquetReader

type ParquetReader struct {
	SchemaHandler *schema.SchemaHandler
	NP            int64 //parallel number
	Footer        *parquet.FileMetaData
	PFile         source.ParquetFile

	ColumnBuffers map[string]*ColumnBufferType

	//One reader can only read one type objects
	ObjType        reflect.Type
	ObjPartialType reflect.Type
}

func NewParquetColumnReader

func NewParquetColumnReader(pFile source.ParquetFile, np int64) (*ParquetReader, error)

NewParquetColumnReader creates a parquet column reader

func NewParquetReader

func NewParquetReader(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetReader, error)

Create a parquet reader: obj is a object with schema tags or a JSON schema string

func (*ParquetReader) GetFooterSize

func (pr *ParquetReader) GetFooterSize() (uint32, error)

Get the footer size

func (*ParquetReader) GetNumRows

func (pr *ParquetReader) GetNumRows() int64

func (*ParquetReader) Read

func (pr *ParquetReader) Read(dstInterface interface{}) error

Read rows of parquet file and unmarshal all to dst

func (*ParquetReader) ReadByNumber added in v1.4.0

func (pr *ParquetReader) ReadByNumber(maxReadNumber int) ([]interface{}, error)

Read maxReadNumber objects

func (*ParquetReader) ReadColumnByIndex

func (pr *ParquetReader) ReadColumnByIndex(index int64, num int64) (values []interface{}, rls []int32, dls []int32, err error)

ReadColumnByIndex reads column by index. The index of first column is 0.

func (*ParquetReader) ReadColumnByPath

func (pr *ParquetReader) ReadColumnByPath(pathStr string, num int64) (values []interface{}, rls []int32, dls []int32, err error)

ReadColumnByPath reads column by path in schema.

func (*ParquetReader) ReadFooter

func (pr *ParquetReader) ReadFooter() error

Read footer from parquet file

func (*ParquetReader) ReadPartial added in v1.4.0

func (pr *ParquetReader) ReadPartial(dstInterface interface{}, prefixPath string) error

Read rows of parquet file and unmarshal all to dst

func (*ParquetReader) ReadPartialByNumber added in v1.4.0

func (pr *ParquetReader) ReadPartialByNumber(maxReadNumber int, prefixPath string) ([]interface{}, error)

Read maxReadNumber partial objects

func (*ParquetReader) ReadStop

func (pr *ParquetReader) ReadStop()

Stop Read

func (*ParquetReader) RenameSchema

func (pr *ParquetReader) RenameSchema()

Rename schema name to inname

func (*ParquetReader) SetSchemaHandlerFromJSON

func (pr *ParquetReader) SetSchemaHandlerFromJSON(jsonSchema string) error

func (*ParquetReader) SkipRows

func (pr *ParquetReader) SkipRows(num int64) error

Skip rows of parquet file

func (*ParquetReader) SkipRowsByIndex

func (pr *ParquetReader) SkipRowsByIndex(index int64, num int64)

func (*ParquetReader) SkipRowsByPath

func (pr *ParquetReader) SkipRowsByPath(pathStr string, num int64) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL