iceberg

package module
v0.0.0-...-de72dcf Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 26, 2023 License: MIT Imports: 18 Imported by: 0

README

iceberg

Documentation

Index

Constants

View Source
const (
	SummaryOperationAppend    = "append"
	SummaryOperationReplace   = "replace"
	SummaryOperationOverwrite = "overwrite"
	SummaryOperationDelete    = "delete"
)
View Source
const (
	SortDirectionAsc  = SortDirection("asc")
	SortDirectionDesc = SortDirection("desc")
)
View Source
const (
	NullOrderNullsFirst = NullOrder("nulls-first")
	NullOrderNullsLast  = NullOrder("nulls-last")
)

Variables

View Source
var (
	PrimitiveTypes = struct {
		Boolean     PrimitiveType
		Integer     PrimitiveType
		Long        PrimitiveType
		Float       PrimitiveType
		Double      PrimitiveType
		Date        PrimitiveType
		Time        PrimitiveType
		Timestamp   PrimitiveType
		TimestampTz PrimitiveType
		String      PrimitiveType
		UUID        PrimitiveType
		Binary      PrimitiveType
		Decimal     func(precision, scale int32) PrimitiveType
		Fixed       func(length int32) PrimitiveType
	}{
		Boolean:     new(BooleanType),
		Integer:     new(IntegerType),
		Long:        new(LongType),
		Float:       new(FloatType),
		Double:      new(DoubleType),
		Date:        new(DateType),
		Time:        new(TimeType),
		Timestamp:   new(TimestampType),
		TimestampTz: new(TimestampTzType),
		String:      new(StringType),
		UUID:        new(UUIDType),
		Binary:      new(BinaryType),
		Decimal: func(precision, scale int32) PrimitiveType {
			return &DecimalType{Precision: precision, Scale: scale}
		},
		Fixed: func(length int32) PrimitiveType {
			return &FixedType{Length: length}
		},
	}
)

Functions

This section is empty.

Types

type AvroFileWriter

type AvroFileWriter[T any, C Codec] struct {
	// contains filtered or unexported fields
}

func NewAvroFileWriter

func NewAvroFileWriter[T any, C Codec](schema avro.Schema, metadata map[string]string, objectsPerBlock int, w io.Writer) (*AvroFileWriter[T, C], error)

func (*AvroFileWriter[T, C]) Flush

func (w *AvroFileWriter[T, C]) Flush() error

func (*AvroFileWriter[T, C]) WriteVal

func (w *AvroFileWriter[T, C]) WriteVal(v T) error

type BinaryType

type BinaryType struct{}

func (*BinaryType) AsString

func (*BinaryType) AsString() string

func (*BinaryType) AvroSchema

func (*BinaryType) AvroSchema() (avro.Schema, error)

func (*BinaryType) MarshalJSON

func (t *BinaryType) MarshalJSON() ([]byte, error)

type BooleanType

type BooleanType struct{}

func (*BooleanType) AsString

func (*BooleanType) AsString() string

func (*BooleanType) AvroSchema

func (*BooleanType) AvroSchema() (avro.Schema, error)

func (*BooleanType) MarshalJSON

func (t *BooleanType) MarshalJSON() ([]byte, error)

type Codec

type Codec interface {
	// contains filtered or unexported methods
}

type CodecDeflate

type CodecDeflate struct{}

type CodecNull

type CodecNull struct{}

type CodecSnappy

type CodecSnappy struct{}

type DateType

type DateType struct{}

func (*DateType) AsString

func (*DateType) AsString() string

func (*DateType) AvroSchema

func (*DateType) AvroSchema() (avro.Schema, error)

func (*DateType) MarshalJSON

func (t *DateType) MarshalJSON() ([]byte, error)

type DecimalType

type DecimalType struct {
	Precision int32
	Scale     int32
}

func (*DecimalType) AsString

func (t *DecimalType) AsString() string

func (*DecimalType) AvroSchema

func (t *DecimalType) AvroSchema() (avro.Schema, error)

func (*DecimalType) MarshalJSON

func (t *DecimalType) MarshalJSON() ([]byte, error)

type DoubleType

type DoubleType struct{}

func (*DoubleType) AsString

func (*DoubleType) AsString() string

func (*DoubleType) AvroSchema

func (*DoubleType) AvroSchema() (avro.Schema, error)

func (*DoubleType) MarshalJSON

func (t *DoubleType) MarshalJSON() ([]byte, error)

type FixedType

type FixedType struct {
	Length int32
}

func (*FixedType) AsString

func (t *FixedType) AsString() string

func (*FixedType) AvroSchema

func (t *FixedType) AvroSchema() (avro.Schema, error)

func (*FixedType) MarshalJSON

func (t *FixedType) MarshalJSON() ([]byte, error)

type FloatType

type FloatType struct{}

func (*FloatType) AsString

func (*FloatType) AsString() string

func (*FloatType) AvroSchema

func (*FloatType) AvroSchema() (avro.Schema, error)

func (*FloatType) MarshalJSON

func (t *FloatType) MarshalJSON() ([]byte, error)

type IntegerType

type IntegerType struct{}

func (*IntegerType) AsString

func (*IntegerType) AsString() string

func (*IntegerType) AvroSchema

func (*IntegerType) AvroSchema() (avro.Schema, error)

func (*IntegerType) MarshalJSON

func (t *IntegerType) MarshalJSON() ([]byte, error)

type ListType

type ListType struct {
	ElementId       int32
	Element         DataType
	ElementRequired bool
}

func (*ListType) AsString

func (t *ListType) AsString() string

func (*ListType) AvroSchema

func (t *ListType) AvroSchema() (avro.Schema, error)

func (*ListType) MarshalJSON

func (t *ListType) MarshalJSON() ([]byte, error)

func (*ListType) UnmarshalJSON

func (t *ListType) UnmarshalJSON(b []byte) error

type LongType

type LongType struct{}

func (*LongType) AsString

func (*LongType) AsString() string

func (*LongType) AvroSchema

func (*LongType) AvroSchema() (avro.Schema, error)

func (*LongType) MarshalJSON

func (t *LongType) MarshalJSON() ([]byte, error)

type ManifestEntryDataFileContent

type ManifestEntryDataFileContent int32
const (
	ManifestEntryContentData            ManifestEntryDataFileContent = 0
	ManifestEntryContentPositionDeletes ManifestEntryDataFileContent = 1
	ManifestEntryContentEqualityDeletes ManifestEntryDataFileContent = 2
)

type ManifestEntryDataFileFileFormat

type ManifestEntryDataFileFileFormat string
const (
	FileFormatAvro    ManifestEntryDataFileFileFormat = "AVRO"
	FileFormatOrc     ManifestEntryDataFileFileFormat = "ORC"
	FileFormatParquet ManifestEntryDataFileFileFormat = "PARQUET"
)

type ManifestEntryDataFileV2

type ManifestEntryDataFileV2 struct {
	// Type of content stored by the data file: data, equality deletes, or position deletes
	Content ManifestEntryDataFileContent `avro:"content"`
	// Full URI for the file with FS scheme
	FilePath string `avro:"file_path"`
	// String file format name: avro, orc or parquet
	FileFormat ManifestEntryDataFileFileFormat `avro:"file_format"`
	// Partition data tuple, schema based on the partition spec output using partition field ids for the struct field ids
	Partition map[string]any `avro:"partition"`
	// Number of records in this file
	RecordCount int64 `avro:"record_count"`
	// Total file size in bytes
	FileSizeInBytes int64 `avro:"file_size_in_bytes"`
	// Map from column id to the total size on disk of all regions that store the column. Does not include bytes necessary to read other columns, like footers.
	ColumnSizes *map[int32]int64 `avro:"column_sizes"`
	// Map from column id to number of values in the column (including null and NaN values)
	ValueCounts *map[int32]int64 `avro:"value_counts"`
	// Map from column id to number of null values in the column
	NullValueCounts *map[int32]int64 `avro:"null_value_counts"`
	// Map from column id to number of NaN values in the column
	NanValueCounts *map[int32]int64 `avro:"nan_value_counts"`
	// Map from column id to number of distinct values in the column; distinct counts must be derived using values in the file by counting or using sketches, but not using methods like merging existing distinct counts
	DistinctValueCounts *map[int32]int64 `avro:"distinct_counts"`
	// Map from column id to lower bound in the column serialized as binary. Each value must be less than or equal to all non-null, non-NaN values in the column for the file
	LowerBounds *map[int32]any `avro:"lower_bounds"`
	// Map from column id to upper bound in the column serialized as binary. Each value must be greater than or equal to all non-null, non-Nan values in the column for the file
	UpperBounds *map[int32]any `avro:"upper_bounds"`
	// Implementation-specific key metadata for encryption
	KeyMetadata *[]byte `avro:"key_metadata"`
	// Split offsets for the data file. For example, all row group offsets in a Parquet file. Must be sorted ascending
	SplitOffsets *[]int64 `avro:"split_offsets"`
	// Field ids used to determine row equality in equality delete files. Required when content=2 and should be null otherwise. Fields with ids listed in this column must be present in the delete file
	EqualityIds *[]int32 `avro:"equality_ids"`
	// ID representing sort order for this file
	SortOrderId *int32 `avro:"sort_order_id"`
}

type ManifestEntryStatus

type ManifestEntryStatus int32
const (
	ManifestEntryStatusExisting ManifestEntryStatus = 0
	ManifestEntryStatusAdded    ManifestEntryStatus = 1
	ManifestEntryStatusDeleted  ManifestEntryStatus = 2
)

type ManifestEntryV2

type ManifestEntryV2 struct {
	// Status is used to track additions and deletions. Deletes are informational only and not used in scans.
	Status ManifestEntryStatus `avro:"status"`
	// SnapshotId is the snapshot id where the file was added, if status is "existing" or "added", or deleted, if status is "deleted".
	// Inherited when null.
	SnapshotId *int64 `avro:"snapshot_id"`
	// SequenceNumber represents the data sequence number, a relative age of the file content and should be used for planning which delete files apply to a data file.
	// Inherited when null and status is "added".
	SequenceNumber *int64 `avro:"sequence_number"`
	// FileSequenceNumber represents the sequence number of the snapshot that added the file. The file sequence number can’t be used for pruning delete files as the data within the file may have an older data sequence number.
	// Inherited when null and status is "added".
	FileSequenceNumber *int64 `avro:"file_sequence_number"`
	// DataFile is nested inside the manifest entry so that it can be easily passed to job planning without the manifest entry fields
	DataFile ManifestEntryDataFileV2 `avro:"data_file"`
}

type ManifestFileContent

type ManifestFileContent int32
const (
	ManifestFileContentData    ManifestFileContent = 0
	ManifestFileContentDeletes ManifestFileContent = 1
)

func (ManifestFileContent) ManifestMetadataContent

func (c ManifestFileContent) ManifestMetadataContent() ManifestMetadataContent

type ManifestFileFieldSummaryV2

type ManifestFileFieldSummaryV2 struct {
	// Whether the manifest contains at least one partition with a null value for the field
	ContainsNull bool `avro:"contains_null"`
	// Whether the manifest contains at least one partition with a NaN value for the field
	ContainsNan *bool `avro:"contains_nan"`
	// Lower bound for the non-null, non-NaN values in the partition field, or null if all values are null or NaN
	LowerBound *[]byte `avro:"lower_bound"`
	// Upper bound for the non-null, non-NaN values in the partition field, or null if all values are null or NaN
	UpperBound *[]byte `avro:"upper_bound"`
}

type ManifestFileV2

type ManifestFileV2 struct {
	// Location of the manifest file
	ManifestPath string `avro:"manifest_path"`
	// Length of the manifest file in bytes
	ManifestLength int64 `avro:"manifest_length"`
	// ID of a partition spec used to write the manifest; must be listed in table metadata partition-specs
	PartitionSpecId int32 `avro:"partition_spec_id"`
	// The type of files tracked by the manifest, either data or delete files
	Content ManifestFileContent `avro:"content"`
	// The sequence number when the manifest was added to the table
	SequenceNumber int64 `avro:"sequence_number"`
	// The minimum data sequence number of all live data or delete files in the manifest
	MinSequenceNumber int64 `avro:"min_sequence_number"`
	// ID of the snapshot where the manifest file was added
	AddedSnapshotId int64 `avro:"added_snapshot_id"`
	// Number of entries in the manifest that have status ADDED, when null this is assumed to be non-zero
	AddedDataFilesCount int32 `avro:"added_data_files_count"`
	// Number of entries in the manifest that have status EXISTING, when null this is assumed to be non-zero
	ExistingDataFilesCount int32 `avro:"existing_data_files_count"`
	// Number of entries in the manifest that have status DELETED, when null this is assumed to be non-zero
	DeletedDataFilesCount int32 `avro:"deleted_data_files_count"`
	// Number of rows in all of files in the manifest that have status ADDED, when null this is assumed to be non-zero
	AddedRowsCount int64 `avro:"added_rows_count"`
	// Number of rows in all of files in the manifest that have status EXISTING, when null this is assumed to be non-zero
	ExistingRowsCount int64 `avro:"existing_rows_count"`
	// Number of rows in all of files in the manifest that have status DELETED, when null this is assumed to be non-zero
	DeletedRowsCount int64 `avro:"deleted_rows_count"`
	// A list of field summaries for each partition field in the spec. Each field in the list corresponds to a field in the manifest file’s partition spec
	Partitions *[]ManifestFileFieldSummaryV2 `avro:"partitions"`
	// Implementation-specific key metadata for encryption
	KeyMetadata *[]byte `avro:"key_metadata"`
}

type ManifestListV2

type ManifestListV2 struct {
	Files []ManifestFileV2
}

ManifestListV2 stores the list of manifests for a snapshot, and includes summary metadata that can be used to avoid scanning all the manifests in a snapshot when planning a table scan https://iceberg.apache.org/spec/#manifest-lists

func (*ManifestListV2) Write

func (m *ManifestListV2) Write(w io.Writer) error

type ManifestMetadataContent

type ManifestMetadataContent string
const (
	ManifestMetadataContentData    ManifestMetadataContent = "data"
	ManifestMetadataContentDeletes ManifestMetadataContent = "deletes"
)

func (ManifestMetadataContent) ManifestFileContent

func (c ManifestMetadataContent) ManifestFileContent() ManifestFileContent

type ManifestV2

type ManifestV2 struct {
	// Schema is the table schema at the time the manifest was written
	Schema *SchemaV2
	// PartitionSpec is the partition spec used to write the manifest
	PartitionSpec PartitionSpecV2
	// Type of content files tracked by the manifest: “data” or “deletes”
	Content ManifestMetadataContent
	// Entries represents the data files in the manifest
	Entries []ManifestEntryV2
}

ManifestV2 represents the Iceberg Manifest File https://iceberg.apache.org/spec/#manifests

func (*ManifestV2) Write

func (m *ManifestV2) Write(w io.Writer) error

type MapType

type MapType struct {
	KeyId         int32
	Key           DataType
	ValueId       int32
	Value         DataType
	ValueRequired bool
}

func (*MapType) AsString

func (t *MapType) AsString() string

func (*MapType) AvroSchema

func (t *MapType) AvroSchema() (avro.Schema, error)

func (*MapType) MarshalJSON

func (t *MapType) MarshalJSON() ([]byte, error)

func (*MapType) UnmarshalJSON

func (t *MapType) UnmarshalJSON(b []byte) error

type Namespace

type Namespace []string

func NewNamespace

func NewNamespace(s string) Namespace

func (Namespace) Equal

func (n Namespace) Equal(other Namespace) bool

func (Namespace) QueryString

func (n Namespace) QueryString() string

func (Namespace) String

func (n Namespace) String() string

type NullOrder

type NullOrder string

type PartitionFieldV1

type PartitionFieldV1 struct {
	// SourceId matches SchemaV1.Fields.[*].Id
	SourceId int32 `json:"source-id"`
	// FieldId identifies this partition field uniquely within a partition spec
	FieldId *int32 `json:"field-id,omitempty"`
	// Name is the (human-readable?) name of this partition field
	Name string `json:"name"`
	// Transform describes how to map the source value to generate a partition field value
	Transform PartitionTransform `json:"transform"`
}

type PartitionFieldV2

type PartitionFieldV2 struct {
	// SourceId matches SchemaV2.Fields.[*].Id
	SourceId int32
	// FieldId identifies this partition field uniquely among all partition specs in the table
	FieldId int32
	// Name is the (human-readable?) name of this partition field
	Name string
	// Transform describes how to map the source value to generate a partition field value
	Transform PartitionTransform
}

func (*PartitionFieldV2) AvroField

func (f *PartitionFieldV2) AvroField() (*avro.Field, error)

func (*PartitionFieldV2) MarshalJSON

func (f *PartitionFieldV2) MarshalJSON() ([]byte, error)

func (*PartitionFieldV2) UnmarshalJSON

func (f *PartitionFieldV2) UnmarshalJSON(b []byte) error

type PartitionSpecV1

type PartitionSpecV1 struct {
	Id     int32              `json:"spec-id"`
	Fields []PartitionFieldV1 `json:"fields"`
}

type PartitionSpecV2

type PartitionSpecV2 struct {
	Id     int32              `json:"spec-id"`
	Fields []PartitionFieldV2 `json:"fields"`
}

func (*PartitionSpecV2) AvroSchema

func (ps *PartitionSpecV2) AvroSchema(name string) (*avro.RecordSchema, error)

type PartitionTransform

type PartitionTransform interface {
	Name() string
	ResultType() DataType
}

https://iceberg.apache.org/spec/#partition-transforms

type PartitionTransformBucket

type PartitionTransformBucket struct {
	// contains filtered or unexported fields
}

func NewPartitionTransformBucket

func NewPartitionTransformBucket(sourceType DataType, n int32) (*PartitionTransformBucket, error)

func (*PartitionTransformBucket) Name

func (pt *PartitionTransformBucket) Name() string

func (*PartitionTransformBucket) ResultType

func (pt *PartitionTransformBucket) ResultType() DataType

type PartitionTransformDay

type PartitionTransformDay struct{}

func NewPartitionTransformDay

func NewPartitionTransformDay(sourceType DataType) (*PartitionTransformDay, error)

func (*PartitionTransformDay) Name

func (pt *PartitionTransformDay) Name() string

func (*PartitionTransformDay) ResultType

func (pt *PartitionTransformDay) ResultType() DataType

type PartitionTransformHour

type PartitionTransformHour struct{}

func NewPartitionTransformHour

func NewPartitionTransformHour(sourceType DataType) (*PartitionTransformHour, error)

func (*PartitionTransformHour) Name

func (pt *PartitionTransformHour) Name() string

func (*PartitionTransformHour) ResultType

func (pt *PartitionTransformHour) ResultType() DataType

type PartitionTransformIdentity

type PartitionTransformIdentity struct {
	// contains filtered or unexported fields
}

func NewPartitionTransformIdentity

func NewPartitionTransformIdentity(sourceType DataType) *PartitionTransformIdentity

func (*PartitionTransformIdentity) Name

func (*PartitionTransformIdentity) ResultType

func (pt *PartitionTransformIdentity) ResultType() DataType

type PartitionTransformMonth

type PartitionTransformMonth struct{}

func NewPartitionTransformMonth

func NewPartitionTransformMonth(sourceType DataType) (*PartitionTransformMonth, error)

func (*PartitionTransformMonth) Name

func (pt *PartitionTransformMonth) Name() string

func (*PartitionTransformMonth) ResultType

func (pt *PartitionTransformMonth) ResultType() DataType

type PartitionTransformTruncate

type PartitionTransformTruncate struct {
	// contains filtered or unexported fields
}

func NewPartitionTransformTruncate

func NewPartitionTransformTruncate(sourceType DataType, width int32) (*PartitionTransformTruncate, error)

func (*PartitionTransformTruncate) Name

func (*PartitionTransformTruncate) ResultType

func (pt *PartitionTransformTruncate) ResultType() DataType

type PartitionTransformVoid

type PartitionTransformVoid struct{}

func NewPartitionTransformVoid

func NewPartitionTransformVoid() *PartitionTransformVoid

func (*PartitionTransformVoid) Name

func (pt *PartitionTransformVoid) Name() string

func (*PartitionTransformVoid) ResultType

func (pt *PartitionTransformVoid) ResultType() DataType

type PartitionTransformYear

type PartitionTransformYear struct{}

func NewPartitionTransformYear

func NewPartitionTransformYear(sourceType DataType) (*PartitionTransformYear, error)

func (*PartitionTransformYear) Name

func (pt *PartitionTransformYear) Name() string

func (*PartitionTransformYear) ResultType

func (pt *PartitionTransformYear) ResultType() DataType

type PrimitiveType

type PrimitiveType interface {
	DataType
}

type Properties

type Properties map[string]string

func (Properties) Equal

func (p Properties) Equal(other Properties) bool

func (Properties) String

func (p Properties) String() string

type SchemaV1

type SchemaV1 struct {
	StructType
	Id                 *int32   `json:"schema-id,omitempty"`
	IdentifierFieldIds *[]int32 `json:"identifier-field-ids,omitempty"`
}

type SchemaV2

type SchemaV2 struct {
	StructType
	Id                 int32    `json:"schema-id"`
	IdentifierFieldIds *[]int32 `json:"identifier-field-ids,omitempty"`
}

SchemaV2 is an ordered list of named columns. https://iceberg.apache.org/spec/#schemas

func (*SchemaV2) MarshalJSON

func (s *SchemaV2) MarshalJSON() ([]byte, error)

type SnapshotV1

type SnapshotV1 struct {
	Id           int64              `json:"snapshot-id"`
	ParentId     *int64             `json:"parent-snapshot-id,omitempty"`
	TimestampMs  int64              `json:"timestamp-ms"`
	ManifestList *string            `json:"manifest-list,omitempty"`
	Manifests    *[]string          `json:"manifests,omitempty"`
	Summary      *map[string]string `json:"summary,omitempty"`
	SchemaId     *int32             `json:"schema-id,omitempty"`
}

type SnapshotV2

type SnapshotV2 struct {
	Id             int64             `json:"snapshot-id"`
	ParentId       *int64            `json:"parent-snapshot-id,omitempty"`
	SequenceNumber int64             `json:"sequence-number"`
	TimestampMs    int64             `json:"timestamp-ms"`
	ManifestList   string            `json:"manifest-list"`
	Summary        map[string]string `json:"summary"`
	SchemaId       *int32            `json:"schema-id,omitempty"`
}

SnapshotV2 represents the state of a table at some time and is used to access the complete set of data files in the table

A snapshot consist of one or more file manifests, and the complete table contents is the union of all the data files in those manifests. https://iceberg.apache.org/spec/#snapshots https://github.com/apache/iceberg/blob/apache-iceberg-1.3.0/core/src/main/java/org/apache/iceberg/BaseSnapshot.java https://github.com/apache/iceberg/blob/apache-iceberg-1.3.0/core/src/main/java/org/apache/iceberg/SnapshotParser.java

type SortDirection

type SortDirection string

type SortField

type SortField struct {
	// SourceId matches SchemaV2.Fields.[*].Id
	SourceId int32
	// Transform describes how to map the source value to a sort value
	Transform PartitionTransform
	Direction SortDirection
	NullOrder NullOrder
}

func (*SortField) MarshalJSON

func (f *SortField) MarshalJSON() ([]byte, error)

func (*SortField) UnmarshalJSON

func (f *SortField) UnmarshalJSON(b []byte) error

type SortOrder

type SortOrder struct {
	Id     int32       `json:"order-id"`
	Fields []SortField `json:"fields"`
}

type StringType

type StringType struct{}

func (*StringType) AsString

func (*StringType) AsString() string

func (*StringType) AvroSchema

func (*StringType) AvroSchema() (avro.Schema, error)

func (*StringType) MarshalJSON

func (t *StringType) MarshalJSON() ([]byte, error)

type StructField

type StructField struct {
	Id       int32    `json:"id"`
	Name     string   `json:"name"`
	Type     DataType `json:"type"`
	Required bool     `json:"required"`
	Doc      string   `json:"doc,omitempty"`
}

func (*StructField) ParquetNode

func (f *StructField) ParquetNode() (parquetschema.Node, error)

ParquetNode converts the StructField to a Parquet Node. https://iceberg.apache.org/spec/#parquet

func (*StructField) UnmarshalJSON

func (f *StructField) UnmarshalJSON(b []byte) error

type StructType

type StructType struct {
	Fields []StructField
}

func (*StructType) AsString

func (t *StructType) AsString() string

func (*StructType) AvroSchema

func (t *StructType) AvroSchema() (avro.Schema, error)

func (*StructType) MarshalJSON

func (t *StructType) MarshalJSON() ([]byte, error)

func (*StructType) UnmarshalJSON

func (t *StructType) UnmarshalJSON(b []byte) error

type Table

type Table struct {
	TableId  TableIdentifier
	Metadata TableMetadata
}

type TableIdentifier

type TableIdentifier struct {
	Namespace Namespace `json:"namespace"`
	Name      string    `json:"name"`
}

func NewTableIdentifier

func NewTableIdentifier(namespace, name string) TableIdentifier

func (*TableIdentifier) String

func (ti *TableIdentifier) String() any

type TableMetadata

type TableMetadata interface {
	FormatVersion() int32
	json.Marshaler
}

TableMetadata files track a table's schema, partitioning config, custom properties, and snapshots of the data

func TableMetadataFromJSON

func TableMetadataFromJSON(data []byte) (TableMetadata, error)

type TableMetadataV1

type TableMetadataV1 struct {
	TableUUID     *string `json:"table-uuid,omitempty"`
	Location      string  `json:"location"`
	LastUpdatedMs int64   `json:"last-updated-ms"`
	// LastColumnId is the highest assigned column ID in all Schemas in table
	LastColumnId           int32                     `json:"last-column-id"`
	Schema                 SchemaV1                  `json:"schema"`
	Schemas                *[]SchemaV1               `json:"schemas,omitempty"`
	CurrentSchemaId        *int32                    `json:"current-schema-id,omitempty"`
	PartitionSpec          []PartitionFieldV1        `json:"partition-spec"`
	PartitionSpecs         *[]PartitionSpecV1        `json:"partition-specs,omitempty"`
	DefaultPartitionSpecId *int32                    `json:"default-spec-id,omitempty"`
	LastPartitionId        *int32                    `json:"last-partition-id,omitempty"`
	Properties             *Properties               `json:"properties,omitempty"`
	CurrentSnapshotId      *int64                    `json:"current-snapshot-id,omitempty"`
	Snapshots              *[]SnapshotV1             `json:"snapshots,omitempty"`
	SnapshotLog            *[]map[string]interface{} `json:"snapshot-log,omitempty"`
	MetadataLog            *[]map[string]interface{} `json:"metadata-log,omitempty"`
	SortOrders             *[]SortOrder              `json:"sort-orders,omitempty"`
	DefaultSortOrderId     *int32                    `json:"default-sort-order-id,omitempty"`
	Statistics             *[]interface{}            `json:"statistics,omitempty"`
}

func (*TableMetadataV1) FormatVersion

func (tm *TableMetadataV1) FormatVersion() int32

func (*TableMetadataV1) MarshalJSON

func (tm *TableMetadataV1) MarshalJSON() ([]byte, error)

type TableMetadataV2

type TableMetadataV2 struct {
	TableUUID          string `json:"table-uuid"`
	Location           string `json:"location"`
	LastSequenceNumber int64  `json:"last-sequence-number"`
	LastUpdatedMs      int64  `json:"last-updated-ms"`
	// LastColumnId is the highest assigned column ID in all Schemas in table
	LastColumnId           int32             `json:"last-column-id"`
	Schemas                []SchemaV2        `json:"schemas"`
	CurrentSchemaId        int32             `json:"current-schema-id"`
	PartitionSpecs         []PartitionSpecV2 `json:"partition-specs"`
	DefaultPartitionSpecId int32             `json:"default-spec-id"`
	LastPartitionId        int32             `json:"last-partition-id"`
	Properties             *Properties       `json:"properties,omitempty"`
	CurrentSnapshotId      *int64            `json:"current-snapshot-id,omitempty"`
	Snapshots              *[]SnapshotV2     `json:"snapshots,omitempty"`
	SnapshotLog            *interface{}      `json:"snapshot-log,omitempty"` // TODO implement
	MetadataLog            *interface{}      `json:"metadata-log,omitempty"` // TODO implement
	SortOrders             []SortOrder       `json:"sort-orders"`
	DefaultSortOrderId     int32             `json:"default-sort-order-id"`
	Refs                   *interface{}      `json:"refs,omitempty"`       // TODO implement
	Statistics             *interface{}      `json:"statistics,omitempty"` // TODO implement
}

func (*TableMetadataV2) CurrentSchema

func (tm *TableMetadataV2) CurrentSchema() *SchemaV2

func (*TableMetadataV2) FormatVersion

func (tm *TableMetadataV2) FormatVersion() int32

func (*TableMetadataV2) MarshalJSON

func (tm *TableMetadataV2) MarshalJSON() ([]byte, error)

func (*TableMetadataV2) PartitionFieldById

func (tm *TableMetadataV2) PartitionFieldById(fieldId int32) *PartitionFieldV2

func (*TableMetadataV2) PartitionSpecById

func (tm *TableMetadataV2) PartitionSpecById(specId int32) *PartitionSpecV2

func (*TableMetadataV2) SchemaById

func (tm *TableMetadataV2) SchemaById(schemaId int32) *SchemaV2

func (*TableMetadataV2) SchemaFieldById

func (tm *TableMetadataV2) SchemaFieldById(fieldId int32) *StructField

type TimeType

type TimeType struct{}

func (*TimeType) AsString

func (*TimeType) AsString() string

func (*TimeType) AvroSchema

func (*TimeType) AvroSchema() (avro.Schema, error)

func (*TimeType) MarshalJSON

func (t *TimeType) MarshalJSON() ([]byte, error)

type TimestampType

type TimestampType struct{}

func (*TimestampType) AsString

func (*TimestampType) AsString() string

func (*TimestampType) AvroSchema

func (*TimestampType) AvroSchema() (avro.Schema, error)

func (*TimestampType) MarshalJSON

func (t *TimestampType) MarshalJSON() ([]byte, error)

type TimestampTzType

type TimestampTzType struct{}

func (*TimestampTzType) AsString

func (*TimestampTzType) AsString() string

func (*TimestampTzType) AvroSchema

func (*TimestampTzType) AvroSchema() (avro.Schema, error)

func (*TimestampTzType) MarshalJSON

func (t *TimestampTzType) MarshalJSON() ([]byte, error)

type UUIDType

type UUIDType struct{}

func (*UUIDType) AsString

func (*UUIDType) AsString() string

func (*UUIDType) AvroSchema

func (t *UUIDType) AvroSchema() (avro.Schema, error)

func (*UUIDType) MarshalJSON

func (t *UUIDType) MarshalJSON() ([]byte, error)

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL