iceberg

package module

v0.0.0-...-de72dcf Latest Latest Go to latest Published: Jul 26, 2023 License: MIT Imports: 18 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/jacobmarble/iceberg

README ¶

iceberg

Documentation ¶

Index ¶

Constants
Variables
type AvroFileWriter
- func NewAvroFileWriter[T any, C Codec](schema avro.Schema, metadata map[string]string, objectsPerBlock int, ...) (*AvroFileWriter[T, C], error)
- func (w *AvroFileWriter[T, C]) Flush() error
- func (w *AvroFileWriter[T, C]) WriteVal(v T) error
type BinaryType
- func (*BinaryType) AsString() string
- func (*BinaryType) AvroSchema() (avro.Schema, error)
- func (t *BinaryType) MarshalJSON() ([]byte, error)
type BooleanType
- func (*BooleanType) AsString() string
- func (*BooleanType) AvroSchema() (avro.Schema, error)
- func (t *BooleanType) MarshalJSON() ([]byte, error)
type Codec
type CodecDeflate
type CodecNull
type CodecSnappy
type DataType
type DateType
- func (*DateType) AsString() string
- func (*DateType) AvroSchema() (avro.Schema, error)
- func (t *DateType) MarshalJSON() ([]byte, error)
type DecimalType
- func (t *DecimalType) AsString() string
- func (t *DecimalType) AvroSchema() (avro.Schema, error)
- func (t *DecimalType) MarshalJSON() ([]byte, error)
type DoubleType
- func (*DoubleType) AsString() string
- func (*DoubleType) AvroSchema() (avro.Schema, error)
- func (t *DoubleType) MarshalJSON() ([]byte, error)
type FixedType
- func (t *FixedType) AsString() string
- func (t *FixedType) AvroSchema() (avro.Schema, error)
- func (t *FixedType) MarshalJSON() ([]byte, error)
type FloatType
- func (*FloatType) AsString() string
- func (*FloatType) AvroSchema() (avro.Schema, error)
- func (t *FloatType) MarshalJSON() ([]byte, error)
type IntegerType
- func (*IntegerType) AsString() string
- func (*IntegerType) AvroSchema() (avro.Schema, error)
- func (t *IntegerType) MarshalJSON() ([]byte, error)
type ListType
- func (t *ListType) AsString() string
- func (t *ListType) AvroSchema() (avro.Schema, error)
- func (t *ListType) MarshalJSON() ([]byte, error)
- func (t *ListType) UnmarshalJSON(b []byte) error
type LongType
- func (*LongType) AsString() string
- func (*LongType) AvroSchema() (avro.Schema, error)
- func (t *LongType) MarshalJSON() ([]byte, error)
type ManifestEntryDataFileContent
type ManifestEntryDataFileFileFormat
type ManifestEntryDataFileV2
type ManifestEntryStatus
type ManifestEntryV2
type ManifestFileContent
- func (c ManifestFileContent) ManifestMetadataContent() ManifestMetadataContent
type ManifestFileFieldSummaryV2
type ManifestFileV2
type ManifestListV2
- func (m *ManifestListV2) Write(w io.Writer) error
type ManifestMetadataContent
- func (c ManifestMetadataContent) ManifestFileContent() ManifestFileContent
type ManifestV2
- func (m *ManifestV2) Write(w io.Writer) error
type MapType
- func (t *MapType) AsString() string
- func (t *MapType) AvroSchema() (avro.Schema, error)
- func (t *MapType) MarshalJSON() ([]byte, error)
- func (t *MapType) UnmarshalJSON(b []byte) error
type Namespace
- func NewNamespace(s string) Namespace
- func (n Namespace) Equal(other Namespace) bool
- func (n Namespace) QueryString() string
- func (n Namespace) String() string
type NullOrder
type PartitionFieldV1
type PartitionFieldV2
- func (f *PartitionFieldV2) AvroField() (*avro.Field, error)
- func (f *PartitionFieldV2) MarshalJSON() ([]byte, error)
- func (f *PartitionFieldV2) UnmarshalJSON(b []byte) error
type PartitionSpecV1
type PartitionSpecV2
- func (ps *PartitionSpecV2) AvroSchema(name string) (*avro.RecordSchema, error)
type PartitionTransform
type PartitionTransformBucket
- func NewPartitionTransformBucket(sourceType DataType, n int32) (*PartitionTransformBucket, error)
- func (pt *PartitionTransformBucket) Name() string
- func (pt *PartitionTransformBucket) ResultType() DataType
type PartitionTransformDay
- func NewPartitionTransformDay(sourceType DataType) (*PartitionTransformDay, error)
- func (pt *PartitionTransformDay) Name() string
- func (pt *PartitionTransformDay) ResultType() DataType
type PartitionTransformHour
- func NewPartitionTransformHour(sourceType DataType) (*PartitionTransformHour, error)
- func (pt *PartitionTransformHour) Name() string
- func (pt *PartitionTransformHour) ResultType() DataType
type PartitionTransformIdentity
- func NewPartitionTransformIdentity(sourceType DataType) *PartitionTransformIdentity
- func (pt *PartitionTransformIdentity) Name() string
- func (pt *PartitionTransformIdentity) ResultType() DataType
type PartitionTransformMonth
- func NewPartitionTransformMonth(sourceType DataType) (*PartitionTransformMonth, error)
- func (pt *PartitionTransformMonth) Name() string
- func (pt *PartitionTransformMonth) ResultType() DataType
type PartitionTransformTruncate
- func NewPartitionTransformTruncate(sourceType DataType, width int32) (*PartitionTransformTruncate, error)
- func (pt *PartitionTransformTruncate) Name() string
- func (pt *PartitionTransformTruncate) ResultType() DataType
type PartitionTransformVoid
- func NewPartitionTransformVoid() *PartitionTransformVoid
- func (pt *PartitionTransformVoid) Name() string
- func (pt *PartitionTransformVoid) ResultType() DataType
type PartitionTransformYear
- func NewPartitionTransformYear(sourceType DataType) (*PartitionTransformYear, error)
- func (pt *PartitionTransformYear) Name() string
- func (pt *PartitionTransformYear) ResultType() DataType
type PrimitiveType
type Properties
- func (p Properties) Equal(other Properties) bool
- func (p Properties) String() string
type SchemaV1
type SchemaV2
- func (s *SchemaV2) MarshalJSON() ([]byte, error)
type SnapshotV1
type SnapshotV2
type SortDirection
type SortField
- func (f *SortField) MarshalJSON() ([]byte, error)
- func (f *SortField) UnmarshalJSON(b []byte) error
type SortOrder
type StringType
- func (*StringType) AsString() string
- func (*StringType) AvroSchema() (avro.Schema, error)
- func (t *StringType) MarshalJSON() ([]byte, error)
type StructField
- func (f *StructField) ParquetNode() (parquetschema.Node, error)
- func (f *StructField) UnmarshalJSON(b []byte) error
type StructType
- func (t *StructType) AsString() string
- func (t *StructType) AvroSchema() (avro.Schema, error)
- func (t *StructType) MarshalJSON() ([]byte, error)
- func (t *StructType) UnmarshalJSON(b []byte) error
type Table
type TableIdentifier
- func NewTableIdentifier(namespace, name string) TableIdentifier
- func (ti *TableIdentifier) String() any
type TableMetadata
- func TableMetadataFromJSON(data []byte) (TableMetadata, error)
type TableMetadataV1
- func (tm *TableMetadataV1) FormatVersion() int32
- func (tm *TableMetadataV1) MarshalJSON() ([]byte, error)
type TableMetadataV2
- func (tm *TableMetadataV2) CurrentSchema() *SchemaV2
- func (tm *TableMetadataV2) FormatVersion() int32
- func (tm *TableMetadataV2) MarshalJSON() ([]byte, error)
- func (tm *TableMetadataV2) PartitionFieldById(fieldId int32) *PartitionFieldV2
- func (tm *TableMetadataV2) PartitionSpecById(specId int32) *PartitionSpecV2
- func (tm *TableMetadataV2) SchemaById(schemaId int32) *SchemaV2
- func (tm *TableMetadataV2) SchemaFieldById(fieldId int32) *StructField
type TimeType
- func (*TimeType) AsString() string
- func (*TimeType) AvroSchema() (avro.Schema, error)
- func (t *TimeType) MarshalJSON() ([]byte, error)
type TimestampType
- func (*TimestampType) AsString() string
- func (*TimestampType) AvroSchema() (avro.Schema, error)
- func (t *TimestampType) MarshalJSON() ([]byte, error)
type TimestampTzType
- func (*TimestampTzType) AsString() string
- func (*TimestampTzType) AvroSchema() (avro.Schema, error)
- func (t *TimestampTzType) MarshalJSON() ([]byte, error)
type UUIDType
- func (*UUIDType) AsString() string
- func (t *UUIDType) AvroSchema() (avro.Schema, error)
- func (t *UUIDType) MarshalJSON() ([]byte, error)

Constants ¶

View Source

const (
	SummaryOperationAppend    = "append"
	SummaryOperationReplace   = "replace"
	SummaryOperationOverwrite = "overwrite"
	SummaryOperationDelete    = "delete"
)

View Source

const (
	SortDirectionAsc  = SortDirection("asc")
	SortDirectionDesc = SortDirection("desc")
)

View Source

const (
	NullOrderNullsFirst = NullOrder("nulls-first")
	NullOrderNullsLast  = NullOrder("nulls-last")
)

Variables ¶

View Source

var (
	PrimitiveTypes = struct {
		Boolean     PrimitiveType
		Integer     PrimitiveType
		Long        PrimitiveType
		Float       PrimitiveType
		Double      PrimitiveType
		Date        PrimitiveType
		Time        PrimitiveType
		Timestamp   PrimitiveType
		TimestampTz PrimitiveType
		String      PrimitiveType
		UUID        PrimitiveType
		Binary      PrimitiveType
		Decimal     func(precision, scale int32) PrimitiveType
		Fixed       func(length int32) PrimitiveType
	}{
		Boolean:     new(BooleanType),
		Integer:     new(IntegerType),
		Long:        new(LongType),
		Float:       new(FloatType),
		Double:      new(DoubleType),
		Date:        new(DateType),
		Time:        new(TimeType),
		Timestamp:   new(TimestampType),
		TimestampTz: new(TimestampTzType),
		String:      new(StringType),
		UUID:        new(UUIDType),
		Binary:      new(BinaryType),
		Decimal: func(precision, scale int32) PrimitiveType {
			return &DecimalType{Precision: precision, Scale: scale}
		},
		Fixed: func(length int32) PrimitiveType {
			return &FixedType{Length: length}
		},
	}
)

Functions ¶

This section is empty.

Types ¶

type AvroFileWriter ¶

type AvroFileWriter[T any, C Codec] struct {
	// contains filtered or unexported fields
}

func NewAvroFileWriter ¶

func NewAvroFileWriter[T any, C Codec](schema avro.Schema, metadata map[string]string, objectsPerBlock int, w io.Writer) (*AvroFileWriter[T, C], error)

func (*AvroFileWriter[T, C]) Flush ¶

func (w *AvroFileWriter[T, C]) Flush() error

func (*AvroFileWriter[T, C]) WriteVal ¶

func (w *AvroFileWriter[T, C]) WriteVal(v T) error

type BinaryType ¶

type BinaryType struct{}

func (*BinaryType) AsString ¶

func (*BinaryType) AsString() string

func (*BinaryType) AvroSchema ¶

func (*BinaryType) AvroSchema() (avro.Schema, error)

func (*BinaryType) MarshalJSON ¶

func (t *BinaryType) MarshalJSON() ([]byte, error)

type BooleanType ¶

type BooleanType struct{}

func (*BooleanType) AsString ¶

func (*BooleanType) AsString() string

func (*BooleanType) AvroSchema ¶

func (*BooleanType) AvroSchema() (avro.Schema, error)

func (*BooleanType) MarshalJSON ¶

func (t *BooleanType) MarshalJSON() ([]byte, error)

type Codec ¶

type Codec interface {
	// contains filtered or unexported methods
}

type CodecDeflate ¶

type CodecDeflate struct{}

type CodecNull ¶

type CodecNull struct{}

type CodecSnappy ¶

type CodecSnappy struct{}

type DataType ¶

type DataType interface {
	AsString() string
	AvroSchema() (avro.Schema, error)
}

DataType represents an Iceberg data type. https://iceberg.apache.org/spec/#schemas-and-data-types https://github.com/apache/iceberg/blob/apache-iceberg-1.3.0/core/src/main/java/org/apache/iceberg/types/Types.java

type DateType ¶

type DateType struct{}

func (*DateType) AsString ¶

func (*DateType) AsString() string

func (*DateType) AvroSchema ¶

func (*DateType) AvroSchema() (avro.Schema, error)

func (*DateType) MarshalJSON ¶

func (t *DateType) MarshalJSON() ([]byte, error)

type DecimalType ¶

type DecimalType struct {
	Precision int32
	Scale     int32
}

func (*DecimalType) AsString ¶

func (t *DecimalType) AsString() string

func (*DecimalType) AvroSchema ¶

func (t *DecimalType) AvroSchema() (avro.Schema, error)

func (*DecimalType) MarshalJSON ¶

func (t *DecimalType) MarshalJSON() ([]byte, error)

type DoubleType ¶

type DoubleType struct{}

func (*DoubleType) AsString ¶

func (*DoubleType) AsString() string

func (*DoubleType) AvroSchema ¶

func (*DoubleType) AvroSchema() (avro.Schema, error)

func (*DoubleType) MarshalJSON ¶

func (t *DoubleType) MarshalJSON() ([]byte, error)

type FixedType ¶

type FixedType struct {
	Length int32
}

func (*FixedType) AsString ¶

func (t *FixedType) AsString() string

func (*FixedType) AvroSchema ¶

func (t *FixedType) AvroSchema() (avro.Schema, error)

func (*FixedType) MarshalJSON ¶

func (t *FixedType) MarshalJSON() ([]byte, error)

type FloatType ¶

type FloatType struct{}

func (*FloatType) AsString ¶

func (*FloatType) AsString() string

func (*FloatType) AvroSchema ¶

func (*FloatType) AvroSchema() (avro.Schema, error)

func (*FloatType) MarshalJSON ¶

func (t *FloatType) MarshalJSON() ([]byte, error)

type IntegerType ¶

type IntegerType struct{}

func (*IntegerType) AsString ¶

func (*IntegerType) AsString() string

func (*IntegerType) AvroSchema ¶

func (*IntegerType) AvroSchema() (avro.Schema, error)

func (*IntegerType) MarshalJSON ¶

func (t *IntegerType) MarshalJSON() ([]byte, error)

type ListType ¶

type ListType struct {
	ElementId       int32
	Element         DataType
	ElementRequired bool
}

func (*ListType) AsString ¶

func (t *ListType) AsString() string

func (*ListType) AvroSchema ¶

func (t *ListType) AvroSchema() (avro.Schema, error)

func (*ListType) MarshalJSON ¶

func (t *ListType) MarshalJSON() ([]byte, error)

func (*ListType) UnmarshalJSON ¶

func (t *ListType) UnmarshalJSON(b []byte) error

type LongType ¶

type LongType struct{}

func (*LongType) AsString ¶

func (*LongType) AsString() string

func (*LongType) AvroSchema ¶

func (*LongType) AvroSchema() (avro.Schema, error)

func (*LongType) MarshalJSON ¶

func (t *LongType) MarshalJSON() ([]byte, error)

type ManifestEntryDataFileContent ¶

type ManifestEntryDataFileContent int32

const (
	ManifestEntryContentData            ManifestEntryDataFileContent = 0
	ManifestEntryContentPositionDeletes ManifestEntryDataFileContent = 1
	ManifestEntryContentEqualityDeletes ManifestEntryDataFileContent = 2
)

type ManifestEntryDataFileFileFormat ¶

type ManifestEntryDataFileFileFormat string

const (
	FileFormatAvro    ManifestEntryDataFileFileFormat = "AVRO"
	FileFormatOrc     ManifestEntryDataFileFileFormat = "ORC"
	FileFormatParquet ManifestEntryDataFileFileFormat = "PARQUET"
)

type ManifestEntryDataFileV2 ¶

type ManifestEntryDataFileV2 struct {
	// Type of content stored by the data file: data, equality deletes, or position deletes
	Content ManifestEntryDataFileContent `avro:"content"`
	// Full URI for the file with FS scheme
	FilePath string `avro:"file_path"`
	// String file format name: avro, orc or parquet
	FileFormat ManifestEntryDataFileFileFormat `avro:"file_format"`
	// Partition data tuple, schema based on the partition spec output using partition field ids for the struct field ids
	Partition map[string]any `avro:"partition"`
	// Number of records in this file
	RecordCount int64 `avro:"record_count"`
	// Total file size in bytes
	FileSizeInBytes int64 `avro:"file_size_in_bytes"`
	// Map from column id to the total size on disk of all regions that store the column. Does not include bytes necessary to read other columns, like footers.
	ColumnSizes *map[int32]int64 `avro:"column_sizes"`
	// Map from column id to number of values in the column (including null and NaN values)
	ValueCounts *map[int32]int64 `avro:"value_counts"`
	// Map from column id to number of null values in the column
	NullValueCounts *map[int32]int64 `avro:"null_value_counts"`
	// Map from column id to number of NaN values in the column
	NanValueCounts *map[int32]int64 `avro:"nan_value_counts"`
	// Map from column id to number of distinct values in the column; distinct counts must be derived using values in the file by counting or using sketches, but not using methods like merging existing distinct counts
	DistinctValueCounts *map[int32]int64 `avro:"distinct_counts"`
	// Map from column id to lower bound in the column serialized as binary. Each value must be less than or equal to all non-null, non-NaN values in the column for the file
	LowerBounds *map[int32]any `avro:"lower_bounds"`
	// Map from column id to upper bound in the column serialized as binary. Each value must be greater than or equal to all non-null, non-Nan values in the column for the file
	UpperBounds *map[int32]any `avro:"upper_bounds"`
	// Implementation-specific key metadata for encryption
	KeyMetadata *[]byte `avro:"key_metadata"`
	// Split offsets for the data file. For example, all row group offsets in a Parquet file. Must be sorted ascending
	SplitOffsets *[]int64 `avro:"split_offsets"`
	// Field ids used to determine row equality in equality delete files. Required when content=2 and should be null otherwise. Fields with ids listed in this column must be present in the delete file
	EqualityIds *[]int32 `avro:"equality_ids"`
	// ID representing sort order for this file
	SortOrderId *int32 `avro:"sort_order_id"`
}

type ManifestEntryStatus ¶

type ManifestEntryStatus int32

const (
	ManifestEntryStatusExisting ManifestEntryStatus = 0
	ManifestEntryStatusAdded    ManifestEntryStatus = 1
	ManifestEntryStatusDeleted  ManifestEntryStatus = 2
)

type ManifestEntryV2 ¶

type ManifestEntryV2 struct {
	// Status is used to track additions and deletions. Deletes are informational only and not used in scans.
	Status ManifestEntryStatus `avro:"status"`
	// SnapshotId is the snapshot id where the file was added, if status is "existing" or "added", or deleted, if status is "deleted".
	// Inherited when null.
	SnapshotId *int64 `avro:"snapshot_id"`
	// SequenceNumber represents the data sequence number, a relative age of the file content and should be used for planning which delete files apply to a data file.
	// Inherited when null and status is "added".
	SequenceNumber *int64 `avro:"sequence_number"`
	// FileSequenceNumber represents the sequence number of the snapshot that added the file. The file sequence number can’t be used for pruning delete files as the data within the file may have an older data sequence number.
	// Inherited when null and status is "added".
	FileSequenceNumber *int64 `avro:"file_sequence_number"`
	// DataFile is nested inside the manifest entry so that it can be easily passed to job planning without the manifest entry fields
	DataFile ManifestEntryDataFileV2 `avro:"data_file"`
}

type ManifestFileContent ¶

type ManifestFileContent int32

const (
	ManifestFileContentData    ManifestFileContent = 0
	ManifestFileContentDeletes ManifestFileContent = 1
)

func (ManifestFileContent) ManifestMetadataContent ¶

func (c ManifestFileContent) ManifestMetadataContent() ManifestMetadataContent

type ManifestFileFieldSummaryV2 ¶

type ManifestFileFieldSummaryV2 struct {
	// Whether the manifest contains at least one partition with a null value for the field
	ContainsNull bool `avro:"contains_null"`
	// Whether the manifest contains at least one partition with a NaN value for the field
	ContainsNan *bool `avro:"contains_nan"`
	// Lower bound for the non-null, non-NaN values in the partition field, or null if all values are null or NaN
	LowerBound *[]byte `avro:"lower_bound"`
	// Upper bound for the non-null, non-NaN values in the partition field, or null if all values are null or NaN
	UpperBound *[]byte `avro:"upper_bound"`
}

type ManifestFileV2 ¶

type ManifestFileV2 struct {
	// Location of the manifest file
	ManifestPath string `avro:"manifest_path"`
	// Length of the manifest file in bytes
	ManifestLength int64 `avro:"manifest_length"`
	// ID of a partition spec used to write the manifest; must be listed in table metadata partition-specs
	PartitionSpecId int32 `avro:"partition_spec_id"`
	// The type of files tracked by the manifest, either data or delete files
	Content ManifestFileContent `avro:"content"`
	// The sequence number when the manifest was added to the table
	SequenceNumber int64 `avro:"sequence_number"`
	// The minimum data sequence number of all live data or delete files in the manifest
	MinSequenceNumber int64 `avro:"min_sequence_number"`
	// ID of the snapshot where the manifest file was added
	AddedSnapshotId int64 `avro:"added_snapshot_id"`
	// Number of entries in the manifest that have status ADDED, when null this is assumed to be non-zero
	AddedDataFilesCount int32 `avro:"added_data_files_count"`
	// Number of entries in the manifest that have status EXISTING, when null this is assumed to be non-zero
	ExistingDataFilesCount int32 `avro:"existing_data_files_count"`
	// Number of entries in the manifest that have status DELETED, when null this is assumed to be non-zero
	DeletedDataFilesCount int32 `avro:"deleted_data_files_count"`
	// Number of rows in all of files in the manifest that have status ADDED, when null this is assumed to be non-zero
	AddedRowsCount int64 `avro:"added_rows_count"`
	// Number of rows in all of files in the manifest that have status EXISTING, when null this is assumed to be non-zero
	ExistingRowsCount int64 `avro:"existing_rows_count"`
	// Number of rows in all of files in the manifest that have status DELETED, when null this is assumed to be non-zero
	DeletedRowsCount int64 `avro:"deleted_rows_count"`
	// A list of field summaries for each partition field in the spec. Each field in the list corresponds to a field in the manifest file’s partition spec
	Partitions *[]ManifestFileFieldSummaryV2 `avro:"partitions"`
	// Implementation-specific key metadata for encryption
	KeyMetadata *[]byte `avro:"key_metadata"`
}

type ManifestListV2 ¶

type ManifestListV2 struct {
	Files []ManifestFileV2
}

ManifestListV2 stores the list of manifests for a snapshot, and includes summary metadata that can be used to avoid scanning all the manifests in a snapshot when planning a table scan https://iceberg.apache.org/spec/#manifest-lists

func (*ManifestListV2) Write ¶

func (m *ManifestListV2) Write(w io.Writer) error

type ManifestMetadataContent ¶

type ManifestMetadataContent string

const (
	ManifestMetadataContentData    ManifestMetadataContent = "data"
	ManifestMetadataContentDeletes ManifestMetadataContent = "deletes"
)

func (ManifestMetadataContent) ManifestFileContent ¶

func (c ManifestMetadataContent) ManifestFileContent() ManifestFileContent

type ManifestV2 ¶

type ManifestV2 struct {
	// Schema is the table schema at the time the manifest was written
	Schema *SchemaV2
	// PartitionSpec is the partition spec used to write the manifest
	PartitionSpec PartitionSpecV2
	// Type of content files tracked by the manifest: “data” or “deletes”
	Content ManifestMetadataContent
	// Entries represents the data files in the manifest
	Entries []ManifestEntryV2
}

ManifestV2 represents the Iceberg Manifest File https://iceberg.apache.org/spec/#manifests

func (*ManifestV2) Write ¶

func (m *ManifestV2) Write(w io.Writer) error

type MapType ¶

type MapType struct {
	KeyId         int32
	Key           DataType
	ValueId       int32
	Value         DataType
	ValueRequired bool
}

func (*MapType) AsString ¶

func (t *MapType) AsString() string

func (*MapType) AvroSchema ¶

func (t *MapType) AvroSchema() (avro.Schema, error)

func (*MapType) MarshalJSON ¶

func (t *MapType) MarshalJSON() ([]byte, error)

func (*MapType) UnmarshalJSON ¶

func (t *MapType) UnmarshalJSON(b []byte) error

type Namespace ¶

type Namespace []string

func NewNamespace ¶

func NewNamespace(s string) Namespace

func (Namespace) Equal ¶

func (n Namespace) Equal(other Namespace) bool

func (Namespace) QueryString ¶

func (n Namespace) QueryString() string

func (Namespace) String ¶

func (n Namespace) String() string

type NullOrder ¶

type NullOrder string

type PartitionFieldV1 ¶

type PartitionFieldV1 struct {
	// SourceId matches SchemaV1.Fields.[*].Id
	SourceId int32 `json:"source-id"`
	// FieldId identifies this partition field uniquely within a partition spec
	FieldId *int32 `json:"field-id,omitempty"`
	// Name is the (human-readable?) name of this partition field
	Name string `json:"name"`
	// Transform describes how to map the source value to generate a partition field value
	Transform PartitionTransform `json:"transform"`
}

type PartitionFieldV2 ¶

type PartitionFieldV2 struct {
	// SourceId matches SchemaV2.Fields.[*].Id
	SourceId int32
	// FieldId identifies this partition field uniquely among all partition specs in the table
	FieldId int32
	// Name is the (human-readable?) name of this partition field
	Name string
	// Transform describes how to map the source value to generate a partition field value
	Transform PartitionTransform
}

func (*PartitionFieldV2) AvroField ¶

func (f *PartitionFieldV2) AvroField() (*avro.Field, error)

func (*PartitionFieldV2) MarshalJSON ¶

func (f *PartitionFieldV2) MarshalJSON() ([]byte, error)

func (*PartitionFieldV2) UnmarshalJSON ¶

func (f *PartitionFieldV2) UnmarshalJSON(b []byte) error

type PartitionSpecV1 ¶

type PartitionSpecV1 struct {
	Id     int32              `json:"spec-id"`
	Fields []PartitionFieldV1 `json:"fields"`
}

type PartitionSpecV2 ¶

type PartitionSpecV2 struct {
	Id     int32              `json:"spec-id"`
	Fields []PartitionFieldV2 `json:"fields"`
}

func (*PartitionSpecV2) AvroSchema ¶

func (ps *PartitionSpecV2) AvroSchema(name string) (*avro.RecordSchema, error)

type PartitionTransform ¶

type PartitionTransform interface {
	Name() string
	ResultType() DataType
}

https://iceberg.apache.org/spec/#partition-transforms

type PartitionTransformBucket ¶

type PartitionTransformBucket struct {
	// contains filtered or unexported fields
}

func NewPartitionTransformBucket ¶

func NewPartitionTransformBucket(sourceType DataType, n int32) (*PartitionTransformBucket, error)

func (*PartitionTransformBucket) Name ¶

func (pt *PartitionTransformBucket) Name() string

func (*PartitionTransformBucket) ResultType ¶

func (pt *PartitionTransformBucket) ResultType() DataType

type PartitionTransformDay ¶

type PartitionTransformDay struct{}

func NewPartitionTransformDay ¶

func NewPartitionTransformDay(sourceType DataType) (*PartitionTransformDay, error)

func (*PartitionTransformDay) Name ¶

func (pt *PartitionTransformDay) Name() string

func (*PartitionTransformDay) ResultType ¶

func (pt *PartitionTransformDay) ResultType() DataType

type PartitionTransformHour ¶

type PartitionTransformHour struct{}

func NewPartitionTransformHour ¶

func NewPartitionTransformHour(sourceType DataType) (*PartitionTransformHour, error)

func (*PartitionTransformHour) Name ¶

func (pt *PartitionTransformHour) Name() string

func (*PartitionTransformHour) ResultType ¶

func (pt *PartitionTransformHour) ResultType() DataType

type PartitionTransformIdentity ¶

type PartitionTransformIdentity struct {
	// contains filtered or unexported fields
}

func NewPartitionTransformIdentity ¶

func NewPartitionTransformIdentity(sourceType DataType) *PartitionTransformIdentity

func (*PartitionTransformIdentity) Name ¶

func (pt *PartitionTransformIdentity) Name() string

func (*PartitionTransformIdentity) ResultType ¶

func (pt *PartitionTransformIdentity) ResultType() DataType

type PartitionTransformMonth ¶

type PartitionTransformMonth struct{}

func NewPartitionTransformMonth ¶

func NewPartitionTransformMonth(sourceType DataType) (*PartitionTransformMonth, error)

func (*PartitionTransformMonth) Name ¶

func (pt *PartitionTransformMonth) Name() string

func (*PartitionTransformMonth) ResultType ¶

func (pt *PartitionTransformMonth) ResultType() DataType

type PartitionTransformTruncate ¶

type PartitionTransformTruncate struct {
	// contains filtered or unexported fields
}

func NewPartitionTransformTruncate ¶

func NewPartitionTransformTruncate(sourceType DataType, width int32) (*PartitionTransformTruncate, error)

func (*PartitionTransformTruncate) Name ¶

func (pt *PartitionTransformTruncate) Name() string

func (*PartitionTransformTruncate) ResultType ¶

func (pt *PartitionTransformTruncate) ResultType() DataType

type PartitionTransformVoid ¶

type PartitionTransformVoid struct{}

func NewPartitionTransformVoid ¶

func NewPartitionTransformVoid() *PartitionTransformVoid

func (*PartitionTransformVoid) Name ¶

func (pt *PartitionTransformVoid) Name() string

func (*PartitionTransformVoid) ResultType ¶

func (pt *PartitionTransformVoid) ResultType() DataType

type PartitionTransformYear ¶

type PartitionTransformYear struct{}

func NewPartitionTransformYear ¶

func NewPartitionTransformYear(sourceType DataType) (*PartitionTransformYear, error)

func (*PartitionTransformYear) Name ¶

func (pt *PartitionTransformYear) Name() string

func (*PartitionTransformYear) ResultType ¶

func (pt *PartitionTransformYear) ResultType() DataType

type PrimitiveType ¶

type PrimitiveType interface {
	DataType
}

type Properties ¶

type Properties map[string]string

func (Properties) Equal ¶

func (p Properties) Equal(other Properties) bool

func (Properties) String ¶

func (p Properties) String() string

type SchemaV1 ¶

type SchemaV1 struct {
	StructType
	Id                 *int32   `json:"schema-id,omitempty"`
	IdentifierFieldIds *[]int32 `json:"identifier-field-ids,omitempty"`
}

type SchemaV2 ¶

type SchemaV2 struct {
	StructType
	Id                 int32    `json:"schema-id"`
	IdentifierFieldIds *[]int32 `json:"identifier-field-ids,omitempty"`
}

SchemaV2 is an ordered list of named columns. https://iceberg.apache.org/spec/#schemas

func (*SchemaV2) MarshalJSON ¶

func (s *SchemaV2) MarshalJSON() ([]byte, error)

type SnapshotV1 ¶

type SnapshotV1 struct {
	Id           int64              `json:"snapshot-id"`
	ParentId     *int64             `json:"parent-snapshot-id,omitempty"`
	TimestampMs  int64              `json:"timestamp-ms"`
	ManifestList *string            `json:"manifest-list,omitempty"`
	Manifests    *[]string          `json:"manifests,omitempty"`
	Summary      *map[string]string `json:"summary,omitempty"`
	SchemaId     *int32             `json:"schema-id,omitempty"`
}

type SnapshotV2 ¶

type SnapshotV2 struct {
	Id             int64             `json:"snapshot-id"`
	ParentId       *int64            `json:"parent-snapshot-id,omitempty"`
	SequenceNumber int64             `json:"sequence-number"`
	TimestampMs    int64             `json:"timestamp-ms"`
	ManifestList   string            `json:"manifest-list"`
	Summary        map[string]string `json:"summary"`
	SchemaId       *int32            `json:"schema-id,omitempty"`
}

SnapshotV2 represents the state of a table at some time and is used to access the complete set of data files in the table

A snapshot consist of one or more file manifests, and the complete table contents is the union of all the data files in those manifests. https://iceberg.apache.org/spec/#snapshots https://github.com/apache/iceberg/blob/apache-iceberg-1.3.0/core/src/main/java/org/apache/iceberg/BaseSnapshot.java https://github.com/apache/iceberg/blob/apache-iceberg-1.3.0/core/src/main/java/org/apache/iceberg/SnapshotParser.java

type SortDirection ¶

type SortDirection string

type SortField ¶

type SortField struct {
	// SourceId matches SchemaV2.Fields.[*].Id
	SourceId int32
	// Transform describes how to map the source value to a sort value
	Transform PartitionTransform
	Direction SortDirection
	NullOrder NullOrder
}

func (*SortField) MarshalJSON ¶

func (f *SortField) MarshalJSON() ([]byte, error)

func (*SortField) UnmarshalJSON ¶

func (f *SortField) UnmarshalJSON(b []byte) error

type SortOrder ¶

type SortOrder struct {
	Id     int32       `json:"order-id"`
	Fields []SortField `json:"fields"`
}

type StringType ¶

type StringType struct{}

func (*StringType) AsString ¶

func (*StringType) AsString() string

func (*StringType) AvroSchema ¶

func (*StringType) AvroSchema() (avro.Schema, error)

func (*StringType) MarshalJSON ¶

func (t *StringType) MarshalJSON() ([]byte, error)

type StructField ¶

type StructField struct {
	Id       int32    `json:"id"`
	Name     string   `json:"name"`
	Type     DataType `json:"type"`
	Required bool     `json:"required"`
	Doc      string   `json:"doc,omitempty"`
}

func (*StructField) ParquetNode ¶

func (f *StructField) ParquetNode() (parquetschema.Node, error)

ParquetNode converts the StructField to a Parquet Node. https://iceberg.apache.org/spec/#parquet

func (*StructField) UnmarshalJSON ¶

func (f *StructField) UnmarshalJSON(b []byte) error

type StructType ¶

type StructType struct {
	Fields []StructField
}

func (*StructType) AsString ¶

func (t *StructType) AsString() string

func (*StructType) AvroSchema ¶

func (t *StructType) AvroSchema() (avro.Schema, error)

func (*StructType) MarshalJSON ¶

func (t *StructType) MarshalJSON() ([]byte, error)

func (*StructType) UnmarshalJSON ¶

func (t *StructType) UnmarshalJSON(b []byte) error

type Table ¶

type Table struct {
	TableId  TableIdentifier
	Metadata TableMetadata
}

type TableIdentifier ¶

type TableIdentifier struct {
	Namespace Namespace `json:"namespace"`
	Name      string    `json:"name"`
}

func NewTableIdentifier ¶

func NewTableIdentifier(namespace, name string) TableIdentifier

func (*TableIdentifier) String ¶

func (ti *TableIdentifier) String() any

type TableMetadata ¶

type TableMetadata interface {
	FormatVersion() int32
	json.Marshaler
}

TableMetadata files track a table's schema, partitioning config, custom properties, and snapshots of the data

func TableMetadataFromJSON ¶

func TableMetadataFromJSON(data []byte) (TableMetadata, error)

type TableMetadataV1 ¶

type TableMetadataV1 struct {
	TableUUID     *string `json:"table-uuid,omitempty"`
	Location      string  `json:"location"`
	LastUpdatedMs int64   `json:"last-updated-ms"`
	// LastColumnId is the highest assigned column ID in all Schemas in table
	LastColumnId           int32                     `json:"last-column-id"`
	Schema                 SchemaV1                  `json:"schema"`
	Schemas                *[]SchemaV1               `json:"schemas,omitempty"`
	CurrentSchemaId        *int32                    `json:"current-schema-id,omitempty"`
	PartitionSpec          []PartitionFieldV1        `json:"partition-spec"`
	PartitionSpecs         *[]PartitionSpecV1        `json:"partition-specs,omitempty"`
	DefaultPartitionSpecId *int32                    `json:"default-spec-id,omitempty"`
	LastPartitionId        *int32                    `json:"last-partition-id,omitempty"`
	Properties             *Properties               `json:"properties,omitempty"`
	CurrentSnapshotId      *int64                    `json:"current-snapshot-id,omitempty"`
	Snapshots              *[]SnapshotV1             `json:"snapshots,omitempty"`
	SnapshotLog            *[]map[string]interface{} `json:"snapshot-log,omitempty"`
	MetadataLog            *[]map[string]interface{} `json:"metadata-log,omitempty"`
	SortOrders             *[]SortOrder              `json:"sort-orders,omitempty"`
	DefaultSortOrderId     *int32                    `json:"default-sort-order-id,omitempty"`
	Statistics             *[]interface{}            `json:"statistics,omitempty"`
}

func (*TableMetadataV1) FormatVersion ¶

func (tm *TableMetadataV1) FormatVersion() int32

func (*TableMetadataV1) MarshalJSON ¶

func (tm *TableMetadataV1) MarshalJSON() ([]byte, error)

type TableMetadataV2 ¶

type TableMetadataV2 struct {
	TableUUID          string `json:"table-uuid"`
	Location           string `json:"location"`
	LastSequenceNumber int64  `json:"last-sequence-number"`
	LastUpdatedMs      int64  `json:"last-updated-ms"`
	// LastColumnId is the highest assigned column ID in all Schemas in table
	LastColumnId           int32             `json:"last-column-id"`
	Schemas                []SchemaV2        `json:"schemas"`
	CurrentSchemaId        int32             `json:"current-schema-id"`
	PartitionSpecs         []PartitionSpecV2 `json:"partition-specs"`
	DefaultPartitionSpecId int32             `json:"default-spec-id"`
	LastPartitionId        int32             `json:"last-partition-id"`
	Properties             *Properties       `json:"properties,omitempty"`
	CurrentSnapshotId      *int64            `json:"current-snapshot-id,omitempty"`
	Snapshots              *[]SnapshotV2     `json:"snapshots,omitempty"`
	SnapshotLog            *interface{}      `json:"snapshot-log,omitempty"` // TODO implement
	MetadataLog            *interface{}      `json:"metadata-log,omitempty"` // TODO implement
	SortOrders             []SortOrder       `json:"sort-orders"`
	DefaultSortOrderId     int32             `json:"default-sort-order-id"`
	Refs                   *interface{}      `json:"refs,omitempty"`       // TODO implement
	Statistics             *interface{}      `json:"statistics,omitempty"` // TODO implement
}

func (*TableMetadataV2) CurrentSchema ¶

func (tm *TableMetadataV2) CurrentSchema() *SchemaV2

func (*TableMetadataV2) FormatVersion ¶

func (tm *TableMetadataV2) FormatVersion() int32

func (*TableMetadataV2) MarshalJSON ¶

func (tm *TableMetadataV2) MarshalJSON() ([]byte, error)

func (*TableMetadataV2) PartitionFieldById ¶

func (tm *TableMetadataV2) PartitionFieldById(fieldId int32) *PartitionFieldV2

func (*TableMetadataV2) PartitionSpecById ¶

func (tm *TableMetadataV2) PartitionSpecById(specId int32) *PartitionSpecV2

func (*TableMetadataV2) SchemaById ¶

func (tm *TableMetadataV2) SchemaById(schemaId int32) *SchemaV2

func (*TableMetadataV2) SchemaFieldById ¶

func (tm *TableMetadataV2) SchemaFieldById(fieldId int32) *StructField

type TimeType ¶

type TimeType struct{}

func (*TimeType) AsString ¶

func (*TimeType) AsString() string

func (*TimeType) AvroSchema ¶

func (*TimeType) AvroSchema() (avro.Schema, error)

func (*TimeType) MarshalJSON ¶

func (t *TimeType) MarshalJSON() ([]byte, error)

type TimestampType ¶

type TimestampType struct{}

func (*TimestampType) AsString ¶

func (*TimestampType) AsString() string

func (*TimestampType) AvroSchema ¶

func (*TimestampType) AvroSchema() (avro.Schema, error)

func (*TimestampType) MarshalJSON ¶

func (t *TimestampType) MarshalJSON() ([]byte, error)

type TimestampTzType ¶

type TimestampTzType struct{}

func (*TimestampTzType) AsString ¶

func (*TimestampTzType) AsString() string

func (*TimestampTzType) AvroSchema ¶

func (*TimestampTzType) AvroSchema() (avro.Schema, error)

func (*TimestampTzType) MarshalJSON ¶

func (t *TimestampTzType) MarshalJSON() ([]byte, error)

type UUIDType ¶

type UUIDType struct{}

func (*UUIDType) AsString ¶

func (*UUIDType) AsString() string

func (*UUIDType) AvroSchema ¶

func (t *UUIDType) AvroSchema() (avro.Schema, error)

func (*UUIDType) MarshalJSON ¶

func (t *UUIDType) MarshalJSON() ([]byte, error)

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
catalog-rest

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL