Documentation ¶
Overview ¶
Package parquet is not intended to be used as a general library. The code generated by the 'parquetgen' command is what actually uses it for reading and writing parquet files.
Index ¶
- func BoolType(se *sch.SchemaElement)
- func ConvertedTypeDate(se *sch.SchemaElement)
- func ConvertedTypeEmpty(se *sch.SchemaElement)
- func ConvertedTypeTimestampMillis(se *sch.SchemaElement)
- func ConvertedTypeUTF8(se *sch.SchemaElement)
- func DateToString(date int32) string
- func Float32Type(se *sch.SchemaElement)
- func Float64Type(se *sch.SchemaElement)
- func GetBools(r io.Reader, n int, pageSizes []int) ([]bool, error)
- func Int32Type(se *sch.SchemaElement)
- func Int64Type(se *sch.SchemaElement)
- func LogicalTypeDate(se *sch.SchemaElement)
- func LogicalTypeString(se *sch.SchemaElement)
- func LogicalTypeTimestampMillis(se *sch.SchemaElement)
- func OptionalFieldGZIP(r *OptionalField)
- func OptionalFieldSnappy(r *OptionalField)
- func OptionalFieldUncompressed(o *OptionalField)
- func OptionalSchemaOption(opts ...SchemeOption) func(f *OptionalField)
- func PageHeader(r io.Reader) (*sch.PageHeader, error)
- func PageHeaders(footer *sch.FileMetaData, r io.ReadSeeker) ([]sch.PageHeader, error)
- func PageHeadersAtOffset(r io.ReadSeeker, o, n int64) ([]sch.PageHeader, error)
- func ReadMetaData(r io.ReadSeeker) (*sch.FileMetaData, error)
- func RepetitionOptional(se *sch.SchemaElement)
- func RepetitionRepeated(se *sch.SchemaElement)
- func RepetitionRequired(se *sch.SchemaElement)
- func RequiredFieldGZIP(r *RequiredField)
- func RequiredFieldSnappy(r *RequiredField)
- func RequiredFieldUncompressed(r *RequiredField)
- func SchemaOption(opts ...SchemeOption) func(f *RequiredField)
- func StringToDate(ts string) int32
- func StringToTime(ts string) *time.Time
- func StringType(se *sch.SchemaElement)
- func TimeToString(time time.Time) string
- func Uint32Type(se *sch.SchemaElement)
- func Uint64Type(se *sch.SchemaElement)
- type Field
- type MaxLevel
- type Metadata
- func (m *Metadata) Footer(w io.Writer) error
- func (m *Metadata) NextDoc()
- func (m *Metadata) Pages() (map[string][]Page, error)
- func (m *Metadata) ReadFooter(r io.ReadSeeker) error
- func (m *Metadata) RowGroups() []RowGroup
- func (m *Metadata) Rows() int64
- func (m *Metadata) StartRowGroup(fields ...Field)
- func (m *Metadata) WritePageHeader(w io.Writer, pth []string, dataLen, compressedLen, defCount, count int, ...) error
- type OptionalField
- func (f *OptionalField) DoRead(r io.ReadSeeker, pg Page) (io.Reader, []int, error)
- func (f *OptionalField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int, stats Stats) error
- func (f *OptionalField) Name() string
- func (f *OptionalField) Options() []SchemeOption
- func (f *OptionalField) Path() []string
- func (f *OptionalField) Values() int
- type Page
- type RepetitionType
- type RepetitionTypes
- type RequiredField
- func (f *RequiredField) DoRead(r io.ReadSeeker, pg Page) (io.Reader, []int, error)
- func (f *RequiredField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int, stats Stats) error
- func (f *RequiredField) Name() string
- func (f *RequiredField) Options() []SchemeOption
- func (f *RequiredField) Path() []string
- type RowGroup
- type SchemeOption
- type Stats
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func BoolType ¶
func BoolType(se *sch.SchemaElement)
func ConvertedTypeDate ¶
func ConvertedTypeDate(se *sch.SchemaElement)
func ConvertedTypeEmpty ¶
func ConvertedTypeEmpty(se *sch.SchemaElement)
func ConvertedTypeTimestampMillis ¶
func ConvertedTypeTimestampMillis(se *sch.SchemaElement)
func ConvertedTypeUTF8 ¶
func ConvertedTypeUTF8(se *sch.SchemaElement)
func DateToString ¶
func Float32Type ¶
func Float32Type(se *sch.SchemaElement)
func Float64Type ¶
func Float64Type(se *sch.SchemaElement)
func Int32Type ¶
func Int32Type(se *sch.SchemaElement)
func Int64Type ¶
func Int64Type(se *sch.SchemaElement)
func LogicalTypeDate ¶
func LogicalTypeDate(se *sch.SchemaElement)
func LogicalTypeString ¶
func LogicalTypeString(se *sch.SchemaElement)
func LogicalTypeTimestampMillis ¶
func LogicalTypeTimestampMillis(se *sch.SchemaElement)
func OptionalFieldGZIP ¶
func OptionalFieldGZIP(r *OptionalField)
OptionalFieldSnappy sets the compression for a column to snappy It is an optional arg to NewOptionalField
func OptionalFieldSnappy ¶
func OptionalFieldSnappy(r *OptionalField)
OptionalFieldSnappy sets the compression for a column to snappy It is an optional arg to NewOptionalField
func OptionalFieldUncompressed ¶
func OptionalFieldUncompressed(o *OptionalField)
OptionalFieldUncompressed sets the compression to none It is an optional arg to NewOptionalField
func OptionalSchemaOption ¶
func OptionalSchemaOption(opts ...SchemeOption) func(f *OptionalField)
func PageHeader ¶
func PageHeader(r io.Reader) (*sch.PageHeader, error)
PageHeader reads the page header from a column page
func PageHeaders ¶
func PageHeaders(footer *sch.FileMetaData, r io.ReadSeeker) ([]sch.PageHeader, error)
PageHeaders reads all the page headers without reading the actual data. It is used by parquetgen to print the page headers.
func PageHeadersAtOffset ¶
func PageHeadersAtOffset(r io.ReadSeeker, o, n int64) ([]sch.PageHeader, error)
PageHeadersAtOffset seeks to the given offset, then reads the PageHeader without reading the data.
func ReadMetaData ¶
func ReadMetaData(r io.ReadSeeker) (*sch.FileMetaData, error)
ReadMetaData reads the FileMetaData from the end of a parquet file
func RepetitionOptional ¶
func RepetitionOptional(se *sch.SchemaElement)
RepetitionOptional sets the repetition type to optional
func RepetitionRepeated ¶
func RepetitionRepeated(se *sch.SchemaElement)
RepetitionRepeated sets the repetition type to repeated
func RepetitionRequired ¶
func RepetitionRequired(se *sch.SchemaElement)
RepetitionRequired sets the repetition type to required
func RequiredFieldGZIP ¶
func RequiredFieldGZIP(r *RequiredField)
RequiredFieldSnappy sets the compression for a column to snappy It is an optional arg to NewRequiredField
func RequiredFieldSnappy ¶
func RequiredFieldSnappy(r *RequiredField)
RequiredFieldSnappy sets the compression for a column to snappy It is an optional arg to NewRequiredField
func RequiredFieldUncompressed ¶
func RequiredFieldUncompressed(r *RequiredField)
RequiredFieldUncompressed sets the compression to none It is an optional arg to NewRequiredField
func SchemaOption ¶
func SchemaOption(opts ...SchemeOption) func(f *RequiredField)
func StringToDate ¶
func StringToTime ¶
func StringType ¶
func StringType(se *sch.SchemaElement)
func TimeToString ¶
func Uint32Type ¶
func Uint32Type(se *sch.SchemaElement)
func Uint64Type ¶
func Uint64Type(se *sch.SchemaElement)
Types ¶
type Field ¶
type Field struct { Name string Path []string Types []int Options []SchemeOption }
Field holds the type information for a parquet column
type Metadata ¶
type Metadata struct {
// contains filtered or unexported fields
}
Metadata keeps track of the things that need to be kept track of in order to write the FileMetaData at the end of the parquet file.
func (*Metadata) NextDoc ¶
func (m *Metadata) NextDoc()
NextDoc keeps track of how many documents have been added to this parquet file. The final value of m.docs is used for the FileMetaData.NumRows
func (*Metadata) ReadFooter ¶
func (m *Metadata) ReadFooter(r io.ReadSeeker) error
ReadFooter reads the parquet metadata
func (*Metadata) Rows ¶
Rows return the total number of rows that are being written in to a parquet file.
func (*Metadata) StartRowGroup ¶
StartRowGroup is called when starting a new row group
type OptionalField ¶
type OptionalField struct { Defs []uint8 Reps []uint8 MaxLevels MaxLevel RepetitionType SchemeOption Types []int // contains filtered or unexported fields }
OptionalField is any exported field in a struct that is a pointer.
func NewOptionalField ¶
func NewOptionalField(pth []string, types []int, opts ...func(*OptionalField)) OptionalField
NewOptionalField creates an optional field
func (*OptionalField) DoRead ¶
func (f *OptionalField) DoRead(r io.ReadSeeker, pg Page) (io.Reader, []int, error)
DoRead is called by all optional fields. It reads the definition levels and uses them to interpret the raw data.
func (*OptionalField) DoWrite ¶
func (f *OptionalField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int, stats Stats) error
DoWrite is called by all optional field types to write the definition levels and raw data to the io.Writer
func (*OptionalField) Name ¶
func (f *OptionalField) Name() string
Name returns the column name of this field
func (*OptionalField) Options ¶
func (f *OptionalField) Options() []SchemeOption
func (*OptionalField) Path ¶
func (f *OptionalField) Path() []string
Path returns the path of this field
func (*OptionalField) Values ¶
func (f *OptionalField) Values() int
Values reads the definition levels and uses them to return the values from the page data.
type Page ¶
type Page struct { // N is the number of values in the ColumnChunk N int Size int Offset int64 Codec sch.CompressionCodec }
Page keeps track of metadata for each ColumnChunk
type RepetitionType ¶
type RepetitionType int
RepetitionType is an enum of the possible parquet repetition types
const ( Required RepetitionType = 0 Optional RepetitionType = 1 Repeated RepetitionType = 2 )
type RepetitionTypes ¶
type RepetitionTypes []RepetitionType
RepetitionTypes provides several functions used by parquetgen's go templates to generate code.
func (RepetitionTypes) MaxDef ¶
func (r RepetitionTypes) MaxDef() uint8
MaxDef returns the largest definition level
func (RepetitionTypes) MaxRep ¶
func (r RepetitionTypes) MaxRep() uint8
MaxRep returns the largest repetition level
func (RepetitionTypes) Optional ¶
func (r RepetitionTypes) Optional() bool
Optional figures out if there is an optional field
func (RepetitionTypes) Repeated ¶
func (r RepetitionTypes) Repeated() bool
Repeated figures out if there is a repeated field
func (RepetitionTypes) Required ¶
func (r RepetitionTypes) Required() bool
Required figures out if there are no optional or repeated fields
type RequiredField ¶
type RequiredField struct {
// contains filtered or unexported fields
}
RequiredField writes the raw data for required columns
func NewRequiredField ¶
func NewRequiredField(pth []string, opts ...func(*RequiredField)) RequiredField
NewRequiredField creates a required field.
func (*RequiredField) DoRead ¶
func (f *RequiredField) DoRead(r io.ReadSeeker, pg Page) (io.Reader, []int, error)
DoRead reads the actual raw data.
func (*RequiredField) DoWrite ¶
func (f *RequiredField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int, stats Stats) error
DoWrite writes the actual raw data.
func (*RequiredField) Name ¶
func (f *RequiredField) Name() string
Name returns the column name of this field
func (*RequiredField) Options ¶
func (f *RequiredField) Options() []SchemeOption
func (*RequiredField) Path ¶
func (f *RequiredField) Path() []string
Path returns the path of this field
type RowGroup ¶
type RowGroup struct { Rows int64 // contains filtered or unexported fields }
RowGroup wraps schema.RowGroup and adds accounting functions that are used to keep track of number of rows written, byte size, etc.
func (*RowGroup) Columns ¶
func (r *RowGroup) Columns() []*sch.ColumnChunk
Columns returns the Columns of the row group.
type SchemeOption ¶
type SchemeOption func(*sch.SchemaElement)
SchemeOption is used to set some of the metadata for each column