icegopher

package module
v0.0.0-...-920a96b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 21, 2023 License: Apache-2.0 Imports: 15 Imported by: 0

README

icegopher

Golang iceberg library

This is an experimental WIP still!

Need to add a lot more comments....

FYI

This is slowly getting merged into https://github.com/apache/iceberg-go which will become the Official Apache Iceberg Go library.

Documentation

Index

Constants

View Source
const (
	InitialPartitionSpecID = 0
)
View Source
const Version = "0.1.0"

Variables

View Source
var (
	ErrInvalidTypeString = errors.New("invalid type")
	ErrNotImplemented    = errors.New("not implemented")
	ErrInvalidArgument   = errors.New("invalid argument")
)
View Source
var (
	ErrInvalidSchema = errors.New("invalid schema")
)
View Source
var (
	ErrInvalidTransform = errors.New("invalid transform syntax")
)
View Source
var PositionalDeleteSchema = NewSchema(0,
	NestedField{ID: 2147483546, Type: PrimitiveTypes.String, Name: "file_path", Required: true},
	NestedField{ID: 2147483545, Type: PrimitiveTypes.Int32, Name: "pos", Required: true},
)
View Source
var PrimitiveTypes = struct {
	Bool        Type
	Int32       Type
	Int64       Type
	Float32     Type
	Float64     Type
	Date        Type
	Time        Type
	Timestamp   Type
	TimestampTz Type
	String      Type
	Binary      Type
	UUID        Type
}{
	Bool:        BooleanType{},
	Int32:       Int32Type{},
	Int64:       Int64Type{},
	Float32:     Float32Type{},
	Float64:     Float64Type{},
	Date:        DateType{},
	Time:        TimeType{},
	Timestamp:   TimestampType{},
	TimestampTz: TimestampTzType{},
	String:      StringType{},
	Binary:      BinaryType{},
	UUID:        UUIDType{},
}
View Source
var (
	UnpartitionedPartitionSpec = &PartitionSpec{id: 0}
)

Functions

func BuildPositionAccessors

func BuildPositionAccessors(schema *Schema) (map[int]*Accessor, error)

func IndexByID

func IndexByID(schema *Schema) (map[int]NestedField, error)

func IndexByName

func IndexByName(schema *Schema) (map[string]int, error)

func IndexNameByID

func IndexNameByID(schema *Schema) (map[int]string, error)

func Visit

func Visit[T any](sc *Schema, visitor SchemaVisitor[T]) (res T, err error)

Types

type Accessor

type Accessor struct {
	// contains filtered or unexported fields
}

func (*Accessor) Get

func (a *Accessor) Get(container Row) (any, error)

type AfterFieldVisitor

type AfterFieldVisitor interface {
	AfterField(field NestedField)
}

type AfterListElementVisitor

type AfterListElementVisitor interface {
	AfterListElement(elem NestedField)
}

type AfterMapKeyVisitor

type AfterMapKeyVisitor interface {
	AfterMapKey(key NestedField)
}

type AfterMapValueVisitor

type AfterMapValueVisitor interface {
	AfterMapValue(value NestedField)
}

type BeforeFieldVisitor

type BeforeFieldVisitor interface {
	BeforeField(field NestedField)
}

type BeforeListElementVisitor

type BeforeListElementVisitor interface {
	BeforeListElement(elem NestedField)
}

type BeforeMapKeyVisitor

type BeforeMapKeyVisitor interface {
	BeforeMapKey(key NestedField)
}

type BeforeMapValueVisitor

type BeforeMapValueVisitor interface {
	BeforeMapValue(value NestedField)
}

type BinaryType

type BinaryType struct{}

func (BinaryType) Equals

func (BinaryType) Equals(other Type) bool

func (BinaryType) String

func (BinaryType) String() string

func (BinaryType) Type

func (BinaryType) Type() string

type BooleanType

type BooleanType struct{}

func (BooleanType) Equals

func (BooleanType) Equals(other Type) bool

func (BooleanType) String

func (BooleanType) String() string

func (BooleanType) Type

func (BooleanType) Type() string

type BucketTransform

type BucketTransform struct {
	N int
}

func (BucketTransform) MarshalText

func (t BucketTransform) MarshalText() ([]byte, error)

func (BucketTransform) String

func (t BucketTransform) String() string

type DataFile

type DataFile interface {
	ContentType() ManifestEntryContent
	FilePath() string
	FileFormat() FileFormat
	Partition() map[string]any
	Count() int64
	FileSizeBytes() int64
	ColumnSizes() map[int]int64
	ValueCounts() map[int]int64
	NullValueCounts() map[int]int64
	NaNValueCounts() map[int]int64
	DistinctValueCounts() map[int]int64
	LowerBoundValues() map[int][]byte
	UpperBoundValues() map[int][]byte
	MetadataKey() []byte
	Splits() []int64
	EqualityFieldIDs() []int
	SortOrderID() *int
}

type Date

type Date int32

type DateType

type DateType struct{}

func (DateType) Equals

func (DateType) Equals(other Type) bool

func (DateType) String

func (DateType) String() string

func (DateType) Type

func (DateType) Type() string

type DayTransform

type DayTransform struct{}

func (DayTransform) MarshalText

func (t DayTransform) MarshalText() ([]byte, error)

func (DayTransform) String

func (DayTransform) String() string

type DecimalType

type DecimalType struct {
	// contains filtered or unexported fields
}

func DecimalTypeOf

func DecimalTypeOf(prec, scale int) DecimalType

func (DecimalType) Equals

func (d DecimalType) Equals(other Type) bool

func (DecimalType) Precision

func (d DecimalType) Precision() int

func (DecimalType) Scale

func (d DecimalType) Scale() int

func (DecimalType) String

func (d DecimalType) String() string

func (DecimalType) Type

func (d DecimalType) Type() string

type FileFormat

type FileFormat string
const (
	AvroFile    FileFormat = "AVRO"
	OrcFile     FileFormat = "ORC"
	ParquetFile FileFormat = "PARQUET"
)

type FixedType

type FixedType struct {
	// contains filtered or unexported fields
}

func FixedTypeOf

func FixedTypeOf(n int) FixedType

func (FixedType) Equals

func (f FixedType) Equals(other Type) bool

func (FixedType) Len

func (f FixedType) Len() int

func (FixedType) String

func (f FixedType) String() string

func (FixedType) Type

func (f FixedType) Type() string

type Float32Type

type Float32Type struct{}

func (Float32Type) Equals

func (Float32Type) Equals(other Type) bool

func (Float32Type) String

func (Float32Type) String() string

func (Float32Type) Type

func (Float32Type) Type() string

type Float64Type

type Float64Type struct{}

func (Float64Type) Equals

func (Float64Type) Equals(other Type) bool

func (Float64Type) String

func (Float64Type) String() string

func (Float64Type) Type

func (Float64Type) Type() string

type HourTransform

type HourTransform struct{}

func (HourTransform) MarshalText

func (t HourTransform) MarshalText() ([]byte, error)

func (HourTransform) String

func (HourTransform) String() string

type IdentityTransform

type IdentityTransform struct{}

func (IdentityTransform) MarshalText

func (t IdentityTransform) MarshalText() ([]byte, error)

func (IdentityTransform) String

func (IdentityTransform) String() string

type Int32Type

type Int32Type struct{}

func (Int32Type) Equals

func (Int32Type) Equals(other Type) bool

func (Int32Type) String

func (Int32Type) String() string

func (Int32Type) Type

func (Int32Type) Type() string

type Int64Type

type Int64Type struct{}

func (Int64Type) Equals

func (Int64Type) Equals(other Type) bool

func (Int64Type) String

func (Int64Type) String() string

func (Int64Type) Type

func (Int64Type) Type() string

type ListType

type ListType struct {
	ElementID       int  `json:"element-id"`
	Element         Type `json:"element"`
	ElementRequired bool `json:"element-required"`
}

func (*ListType) Children

func (l *ListType) Children() []NestedField

func (*ListType) ElementField

func (l *ListType) ElementField() NestedField

func (*ListType) Equals

func (l *ListType) Equals(other Type) bool

func (*ListType) String

func (l *ListType) String() string

func (*ListType) Type

func (*ListType) Type() string

func (*ListType) UnmarshalJSON

func (l *ListType) UnmarshalJSON(b []byte) error

type ManifestContent

type ManifestContent int32
const (
	ManifestContentData    ManifestContent = 0
	ManifestContentDeletes ManifestContent = 1
)

type ManifestEntry

type ManifestEntry interface {
	Status() ManifestEntryStatus
	SnapshotID() int64
	SequenceNum() int64
	FileSequenceNum() *int64
	DataFile() DataFile
	// contains filtered or unexported methods
}

type ManifestEntryContent

type ManifestEntryContent int8
const (
	EntryContentData       ManifestEntryContent = 0
	EntryContentPosDeletes ManifestEntryContent = 1
	EntryContentEqDeletes  ManifestEntryContent = 2
)

type ManifestEntryStatus

type ManifestEntryStatus int8
const (
	EntryStatusEXISTING ManifestEntryStatus = 0
	EntryStatusADDED    ManifestEntryStatus = 1
	EntryStatusDELETED  ManifestEntryStatus = 2
)

type ManifestFile

type ManifestFile interface {
	Version() int
	FilePath() string
	Length() int64
	PartitionID() int32
	ManifestContent() ManifestContent
	SnapshotID() int64
	AddedDataFiles() int32
	ExistingDataFiles() int32
	DeletedDataFiles() int32
	AddedRows() int64
	ExistingRows() int64
	DeletedRows() int64
	SequenceNum() int64
	MinSequenceNum() int64
	Metadata() []byte
	PartitionList() []fieldSummary

	HasAddedFiles() bool
	HasExistingFiles() bool
	FetchEntries(fs iceio.IO, discardDeleted bool) ([]ManifestEntry, error)
}

func ReadManifestList

func ReadManifestList(in io.Reader) ([]ManifestFile, error)

type MapType

type MapType struct {
	KeyID         int  `json:"key-id"`
	KeyType       Type `json:"key"`
	ValueID       int  `json:"value-id"`
	ValueType     Type `json:"value"`
	ValueRequired bool `json:"value-required"`
}

func (*MapType) Children

func (m *MapType) Children() []NestedField

func (*MapType) Equals

func (m *MapType) Equals(other Type) bool

func (*MapType) KeyField

func (m *MapType) KeyField() NestedField

func (*MapType) String

func (m *MapType) String() string

func (*MapType) Type

func (*MapType) Type() string

func (*MapType) UnmarshalJSON

func (m *MapType) UnmarshalJSON(b []byte) error

func (*MapType) ValueField

func (m *MapType) ValueField() NestedField

type MonthTransform

type MonthTransform struct{}

func (MonthTransform) MarshalText

func (t MonthTransform) MarshalText() ([]byte, error)

func (MonthTransform) String

func (MonthTransform) String() string

type NestedField

type NestedField struct {
	Type `json:"-"`

	ID             int    `json:"id"`
	Name           string `json:"name"`
	Required       bool   `json:"required"`
	Doc            string `json:"doc,omitempty"`
	InitialDefault any    `json:"initial-default,omitempty"`
	WriteDefault   any    `json:"write-default,omitempty"`
}

func (*NestedField) Equals

func (n *NestedField) Equals(other NestedField) bool

func (*NestedField) MarshalJSON

func (n *NestedField) MarshalJSON() ([]byte, error)

func (NestedField) String

func (n NestedField) String() string

func (*NestedField) UnmarshalJSON

func (n *NestedField) UnmarshalJSON(b []byte) error

type NestedType

type NestedType interface {
	Type
	Children() []NestedField
}

type PartitionField

type PartitionField struct {
	SourceID  int       `json:"source-id"`
	FieldID   int       `json:"field-id"`
	Name      string    `json:"name"`
	Transform Transform `json:"transform"`
}

func (*PartitionField) String

func (p *PartitionField) String() string

func (*PartitionField) UnmarshalJSON

func (p *PartitionField) UnmarshalJSON(b []byte) error

type PartitionSpec

type PartitionSpec struct {
	// contains filtered or unexported fields
}

func NewPartitionSpec

func NewPartitionSpec(fields ...PartitionField) PartitionSpec

func NewPartitionSpecID

func NewPartitionSpecID(id int, fields ...PartitionField) PartitionSpec

func (*PartitionSpec) CompatibleWith

func (ps *PartitionSpec) CompatibleWith(other *PartitionSpec) bool

func (*PartitionSpec) Equals

func (ps *PartitionSpec) Equals(other PartitionSpec) bool

func (*PartitionSpec) Field

func (ps *PartitionSpec) Field(i int) PartitionField

func (*PartitionSpec) FieldsBySourceID

func (ps *PartitionSpec) FieldsBySourceID(fieldID int) []PartitionField

func (*PartitionSpec) ID

func (ps *PartitionSpec) ID() int

func (*PartitionSpec) IsUnpartitioned

func (ps *PartitionSpec) IsUnpartitioned() bool

func (*PartitionSpec) LastAssignedFieldID

func (ps *PartitionSpec) LastAssignedFieldID() int

func (PartitionSpec) MarshalJSON

func (ps PartitionSpec) MarshalJSON() ([]byte, error)

func (*PartitionSpec) NumFields

func (ps *PartitionSpec) NumFields() int

func (*PartitionSpec) PartitionType

func (ps *PartitionSpec) PartitionType(schema *Schema) StructType

func (PartitionSpec) String

func (ps PartitionSpec) String() string

func (*PartitionSpec) UnmarshalJSON

func (ps *PartitionSpec) UnmarshalJSON(b []byte) error

type PrimitiveType

type PrimitiveType interface {
	Type
	// contains filtered or unexported methods
}

type Properties

type Properties map[string]string

type Row

type Row interface {
	Get(pos uint) (any, error)
	Set(pos uint, value any) error
}

type Schema

type Schema struct {
	ID                 int   `json:"schema-id"`
	IdentifierFieldIDs []int `json:"identifier-field-ids"`
	// contains filtered or unexported fields
}

func NewSchema

func NewSchema(id int, fields ...NestedField) *Schema

func NewSchemaWithIdentifiers

func NewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) *Schema

func PruneColumns

func PruneColumns(schema *Schema, selected map[int]struct{}, selectFullTypes bool) (*Schema, error)

func (*Schema) AccessorForField

func (s *Schema) AccessorForField(id int) (*Accessor, error)

func (*Schema) AsStruct

func (s *Schema) AsStruct() StructType

func (*Schema) Children

func (s *Schema) Children() []NestedField

func (*Schema) Equals

func (s *Schema) Equals(other *Schema) bool

func (*Schema) Field

func (s *Schema) Field(i int) NestedField

func (*Schema) FindColumnName

func (s *Schema) FindColumnName(fieldID int) (string, bool)

func (*Schema) FindFieldByID

func (s *Schema) FindFieldByID(id int) (NestedField, bool)

func (*Schema) FindFieldByName

func (s *Schema) FindFieldByName(name string) (NestedField, bool)

func (*Schema) FindFieldByNameCaseInsensitive

func (s *Schema) FindFieldByNameCaseInsensitive(name string) (NestedField, bool)

func (*Schema) FindTypeByID

func (s *Schema) FindTypeByID(id int) (Type, bool)

func (*Schema) FindTypeByName

func (s *Schema) FindTypeByName(name string) (Type, bool)

func (*Schema) FindTypeByNameCaseInsensitive

func (s *Schema) FindTypeByNameCaseInsensitive(name string) (Type, bool)

func (*Schema) HighestFieldID

func (s *Schema) HighestFieldID() int

func (*Schema) MarshalJSON

func (s *Schema) MarshalJSON() ([]byte, error)

func (*Schema) NumFields

func (s *Schema) NumFields() int

func (*Schema) Select

func (s *Schema) Select(caseSensitive bool, names ...string) (*Schema, error)

func (*Schema) Type

func (s *Schema) Type() string

func (*Schema) UnmarshalJSON

func (s *Schema) UnmarshalJSON(b []byte) error

type SchemaVisitor

type SchemaVisitor[T any] interface {
	Schema(schema *Schema, structResult T) T
	Struct(st StructType, fieldResults []T) T
	Field(field NestedField, fieldResult T) T
	List(list ListType, elemResult T) T
	Map(mapType MapType, keyResult, valueResult T) T
	Primitive(p PrimitiveType) T
}

type StringType

type StringType struct{}

func (StringType) Equals

func (StringType) Equals(other Type) bool

func (StringType) String

func (StringType) String() string

func (StringType) Type

func (StringType) Type() string

type StructType

type StructType struct {
	Fields []NestedField `json:"fields"`
}

func (*StructType) Children

func (s *StructType) Children() []NestedField

func (*StructType) Equals

func (s *StructType) Equals(other Type) bool

func (*StructType) MarshalJSON

func (s *StructType) MarshalJSON() ([]byte, error)

func (*StructType) String

func (s *StructType) String() string

func (*StructType) Type

func (*StructType) Type() string

type Time

type Time int64

type TimeType

type TimeType struct{}

func (TimeType) Equals

func (TimeType) Equals(other Type) bool

func (TimeType) String

func (TimeType) String() string

func (TimeType) Type

func (TimeType) Type() string

type Timestamp

type Timestamp int64

type TimestampType

type TimestampType struct{}

func (TimestampType) Equals

func (TimestampType) Equals(other Type) bool

func (TimestampType) String

func (TimestampType) String() string

func (TimestampType) Type

func (TimestampType) Type() string

type TimestampTzType

type TimestampTzType struct{}

func (TimestampTzType) Equals

func (TimestampTzType) Equals(other Type) bool

func (TimestampTzType) String

func (TimestampTzType) String() string

func (TimestampTzType) Type

func (TimestampTzType) Type() string

type Transform

type Transform interface {
	fmt.Stringer
	encoding.TextMarshaler
}

func ParseTransform

func ParseTransform(s string) (Transform, error)

type TruncateTransform

type TruncateTransform struct {
	W int
}

func (TruncateTransform) MarshalText

func (t TruncateTransform) MarshalText() ([]byte, error)

func (TruncateTransform) String

func (t TruncateTransform) String() string

type Type

type Type interface {
	fmt.Stringer
	Type() string
	Equals(Type) bool
}

type UUIDType

type UUIDType struct{}

func (UUIDType) Equals

func (UUIDType) Equals(other Type) bool

func (UUIDType) String

func (UUIDType) String() string

func (UUIDType) Type

func (UUIDType) Type() string

type VoidTransform

type VoidTransform struct{}

func (VoidTransform) MarshalText

func (t VoidTransform) MarshalText() ([]byte, error)

func (VoidTransform) String

func (VoidTransform) String() string

type YearTransform

type YearTransform struct{}

func (YearTransform) MarshalText

func (t YearTransform) MarshalText() ([]byte, error)

func (YearTransform) String

func (YearTransform) String() string

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL