ParquetReader

package
v0.0.0-...-ead641c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 23, 2019 License: Apache-2.0 Imports: 13 Imported by: 2

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ColumnBufferType

type ColumnBufferType struct {
	PFile        ParquetFile.ParquetFile
	ThriftReader *thrift.TBufferedTransport

	Footer        *parquet.FileMetaData
	SchemaHandler *SchemaHandler.SchemaHandler

	PathStr       string
	RowGroupIndex int64
	ChunkHeader   *parquet.ColumnChunk

	ChunkReadValues int64

	DictPage *Layout.Page

	DataTable        *Layout.Table
	DataTableNumRows int64
}

func NewColumnBuffer

func NewColumnBuffer(pFile ParquetFile.ParquetFile, footer *parquet.FileMetaData, schemaHandler *SchemaHandler.SchemaHandler, pathStr string) (*ColumnBufferType, error)

func (*ColumnBufferType) NextRowGroup

func (self *ColumnBufferType) NextRowGroup() error

func (*ColumnBufferType) ReadPage

func (self *ColumnBufferType) ReadPage() error

func (*ColumnBufferType) ReadPageForSkip

func (self *ColumnBufferType) ReadPageForSkip() (*Layout.Page, error)

func (*ColumnBufferType) ReadRows

func (self *ColumnBufferType) ReadRows(num int64) (*Layout.Table, int64)

func (*ColumnBufferType) SkipRows

func (self *ColumnBufferType) SkipRows(num int64) int64

type ParquetReader

type ParquetReader struct {
	SchemaHandler *SchemaHandler.SchemaHandler
	NP            int64 //parallel number
	Footer        *parquet.FileMetaData
	PFile         ParquetFile.ParquetFile

	ColumnBuffers map[string]*ColumnBufferType
}

func NewParquetColumnReader

func NewParquetColumnReader(pFile ParquetFile.ParquetFile, np int64) (*ParquetReader, error)

NewParquetColumnReader creates a parquet column reader

func NewParquetReader

func NewParquetReader(pFile ParquetFile.ParquetFile, obj interface{}, np int64) (*ParquetReader, error)

Create a parquet reader

func (*ParquetReader) GetFooterSize

func (self *ParquetReader) GetFooterSize() (uint32, error)

Get the footer size

func (*ParquetReader) GetNumRows

func (self *ParquetReader) GetNumRows() int64

func (*ParquetReader) Read

func (self *ParquetReader) Read(dstInterface interface{}) error

Read rows of parquet file

func (*ParquetReader) ReadColumnByIndex

func (self *ParquetReader) ReadColumnByIndex(index int, num int) (values []interface{}, rls []int32, dls []int32)

ReadColumnByIndex reads column by index. The index of first column is 0.

func (*ParquetReader) ReadColumnByPath

func (self *ParquetReader) ReadColumnByPath(pathStr string, num int) (values []interface{}, rls []int32, dls []int32)

ReadColumnByPath reads column by path in schema.

func (*ParquetReader) ReadFooter

func (self *ParquetReader) ReadFooter() error

Read footer from parquet file

func (*ParquetReader) ReadStop

func (self *ParquetReader) ReadStop()

Stop Read

func (*ParquetReader) RenameSchema

func (self *ParquetReader) RenameSchema()

Rename schema name to inname

func (*ParquetReader) SetSchemaHandlerFromJSON

func (self *ParquetReader) SetSchemaHandlerFromJSON(jsonSchema string) error

func (*ParquetReader) SkipRows

func (self *ParquetReader) SkipRows(num int64) error

Skip rows of parquet file

func (*ParquetReader) SkipRowsByIndex

func (self *ParquetReader) SkipRowsByIndex(index int, num int)

func (*ParquetReader) SkipRowsByPath

func (self *ParquetReader) SkipRowsByPath(pathStr string, num int)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL