serialization

package
v0.0.0-...-0d40728 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 12, 2021 License: Apache-2.0 Imports: 20 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ReadMetadata

func ReadMetadata(schemaPath string) (*model.Metadata, error)

ReadMetadata reads the metadata in the specified path.

func ResultToInputCSV

func ResultToInputCSV(resultURI string) ([][]string, error)

ResultToInputCSV takes a result produced by a TA2 pipeline run ensures that it is in a format suitable for storage as a D3M dataset.

func WriteData

func WriteData(uri string, data [][]string) error

WriteData writes data to storage using the specified URI.

func WriteDataset

func WriteDataset(folderPath string, dataset *RawDataset) error

WriteDataset determines which storage engine to use and then writes out the metadata and the data using it.

func WriteMetadata

func WriteMetadata(uri string, metadata *model.Metadata) error

WriteMetadata writes the metadata to disk.

Types

type CSV

type CSV struct {
}

CSV represents a dataset storage backed with csv data and json schema doc.

func NewCSV

func NewCSV() *CSV

NewCSV creates a new csv backed storage.

func (*CSV) ReadData

func (d *CSV) ReadData(uri string) ([][]string, error)

ReadData reads the data from a csv file.

func (*CSV) ReadDataset

func (d *CSV) ReadDataset(schemaFile string) (*RawDataset, error)

ReadDataset reads a raw dataset from the file system, loading the csv data into memory.

func (*CSV) ReadMetadata

func (d *CSV) ReadMetadata(uri string) (*model.Metadata, error)

ReadMetadata reads the dataset doc from disk.

func (*CSV) ReadRawVariables

func (d *CSV) ReadRawVariables(uri string) ([]string, error)

ReadRawVariables reads the csv header file to get a list of variables in the file.

func (*CSV) WriteData

func (d *CSV) WriteData(uri string, data [][]string) error

WriteData writes data to a csv file.

func (*CSV) WriteDataset

func (d *CSV) WriteDataset(uri string, data *RawDataset) error

WriteDataset writes the raw dataset to the file system, writing out the data to a csv file.

func (*CSV) WriteMetadata

func (d *CSV) WriteMetadata(uri string, meta *model.Metadata, extended bool, update bool) error

WriteMetadata writes the dataset doc to disk.

type Parquet

type Parquet struct {
}

Parquet represents a dataset storage backed with parquet data and json schema doc.

func NewParquet

func NewParquet() *Parquet

NewParquet creates a new parquet backed storage.

func (*Parquet) ReadData

func (d *Parquet) ReadData(uri string) ([][]string, error)

ReadData reads the data from a parquet file.

func (*Parquet) ReadDataset

func (d *Parquet) ReadDataset(schemaFile string) (*RawDataset, error)

ReadDataset reads a raw dataset from the file system, loading the parquet data into memory.

func (*Parquet) ReadMetadata

func (d *Parquet) ReadMetadata(uri string) (*model.Metadata, error)

ReadMetadata reads the dataset doc from disk.

func (*Parquet) ReadRawVariables

func (d *Parquet) ReadRawVariables(uri string) ([]string, error)

ReadRawVariables reads the metadata and extracts the field names.

func (*Parquet) WriteData

func (d *Parquet) WriteData(uri string, data [][]string) error

WriteData writes data to a parquet file.

func (*Parquet) WriteDataset

func (d *Parquet) WriteDataset(uri string, data *RawDataset) error

WriteDataset writes the raw dataset to the file system, writing out the data to a parquet file.

func (*Parquet) WriteMetadata

func (d *Parquet) WriteMetadata(uri string, meta *model.Metadata, extended bool, update bool) error

WriteMetadata writes the dataset doc to disk.

type RawDataset

type RawDataset struct {
	ID              string
	Name            string
	Metadata        *model.Metadata
	Data            [][]string
	DefinitiveTypes bool
}

RawDataset contains basic information about the structure of the dataset as well as the raw learning data.

func ReadDataset

func ReadDataset(schemaPath string) (*RawDataset, error)

ReadDataset reads the metadata to find the main data reference, then reads that.

func (*RawDataset) AddField

func (d *RawDataset) AddField(variable *model.Variable) error

AddField adds a field to the dataset, updating both the data and the metadata.

func (*RawDataset) FieldExists

func (d *RawDataset) FieldExists(variable *model.Variable) bool

FieldExists returns true if a field is already part of the metadata.

func (*RawDataset) FilterDataset

func (d *RawDataset) FilterDataset(filter map[string]bool)

FilterDataset updates the dataset to only keep the rows that have the specified column in the filter map set to true.

func (*RawDataset) GetVariableIndex

func (d *RawDataset) GetVariableIndex(variableHeaderName string) int

GetVariableIndex returns the index of the variable as found in the header or -1 if not found in the header.

func (*RawDataset) GetVariableIndices

func (d *RawDataset) GetVariableIndices(variableHeaderNames []string) (map[string]int, error)

GetVariableIndices returns the mapping of variable header name to header index. It will error if a field is not found in the header.

func (*RawDataset) GetVariableMetadata

func (d *RawDataset) GetVariableMetadata(variableHeaderName string) *model.Variable

GetVariableMetadata returns the variable metadata using the header name.

func (*RawDataset) SyncMetadata

func (d *RawDataset) SyncMetadata(metaToSync *model.Metadata)

SyncMetadata updates the key metadata properties to match a given metadata. This is often use to update the metadata for prediction or prefeaturization purposes.

func (*RawDataset) UpdateDataset

func (d *RawDataset) UpdateDataset(updates map[int]map[string]string)

UpdateDataset updates a dataset with the value specified in the updates dictionary. If the specified column value is not found in the dictionary, then it is left unchanged. Updates are specified by column index value.

type Storage

type Storage interface {
	ReadDataset(uri string) (*RawDataset, error)
	WriteDataset(uri string, data *RawDataset) error
	ReadData(uri string) ([][]string, error)
	WriteData(uri string, data [][]string) error
	ReadMetadata(uri string) (*model.Metadata, error)
	WriteMetadata(uri string, metadata *model.Metadata, extended bool, update bool) error
	ReadRawVariables(uri string) ([]string, error)
}

Storage defines the base functions needed to store datasets to a backing storage for interactions with an auto ml server.

func GetCSVStorage

func GetCSVStorage() Storage

GetCSVStorage returns the instantiated csv storage.

func GetParquetStorage

func GetParquetStorage() Storage

GetParquetStorage returns the instantiated parquet storage.

func GetStorage

func GetStorage(uri string) Storage

GetStorage returns the storage to use based on URI.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL