core

package

v0.2.0 Latest Latest Go to latest Published: Jan 9, 2019 License: Apache-2.0 Imports: 21 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/giagiannis/data-profiler

Links

Open Source Insights

Documentation ¶

Overview ¶

Package core contains the necessary structs and functions used to execute the dataset analysis.

Index ¶

Constants
Variables
func DistanceToSimilarity(distance float64) float64
func Kendall(a, b []float64) float64
func MaxAbsoluteCountPercentageError(actual, predicted []float64, percentile int) float64
func MaxAbsolutePercentageError(actual, predicted []float64) float64
func Mean(a []float64) float64
func MeanAbsoluteError(actual, predicted []float64) float64
func MeanAbsolutePercentageError(actual, predicted []float64) float64
func MedianAbsolutePercentageError(actual, predicted []float64) float64
func Pearson(a, b []float64) float64
func Percentile(values []float64, percentile int) float64
func RSquared(actual, predicted []float64) float64
func Rank(a []float64) []int
func RootMeanSquaredError(actual, predicted []float64) float64
func RootMeanSquaredLogError(actual, predicted []float64) float64
func SerializeCoordinates(coords []DatasetCoordinates) []byte
func SimilarityToDistance(similarity float64) float64
func Spearman(a, b []float64) float64
func StdDev(a []float64) float64
type AbstractDatasetSimilarityEstimator
- func (a *AbstractDatasetSimilarityEstimator) Concurrency() int
- func (a *AbstractDatasetSimilarityEstimator) Datasets() []*Dataset
- func (a *AbstractDatasetSimilarityEstimator) Duration() float64
- func (a *AbstractDatasetSimilarityEstimator) PopulationPolicy() DatasetSimilarityPopulationPolicy
- func (a *AbstractDatasetSimilarityEstimator) SetPopulationPolicy(pol DatasetSimilarityPopulationPolicy)
- func (a *AbstractDatasetSimilarityEstimator) SimilarityMatrix() *DatasetSimilarityMatrix
type AbstractModeler
- func (a *AbstractModeler) AppxValues() []float64
- func (a *AbstractModeler) Datasets() []*Dataset
- func (a *AbstractModeler) ErrorMetrics() map[string]float64
- func (a *AbstractModeler) EvalTime() float64
- func (a *AbstractModeler) ExecTime() float64
- func (a *AbstractModeler) Samples() map[int]float64
type BhattacharyyaEstimator
- func (e *BhattacharyyaEstimator) Compute() error
- func (e *BhattacharyyaEstimator) Configure(conf map[string]string)
- func (e *BhattacharyyaEstimator) Deserialize(b []byte)
- func (e *BhattacharyyaEstimator) Options() map[string]string
- func (e *BhattacharyyaEstimator) Serialize() []byte
- func (e *BhattacharyyaEstimator) Similarity(a, b *Dataset) float64
type Clustering
- func NewClustering(similarities *DatasetSimilarityMatrix, datasets []*Dataset) *Clustering
- func (c *Clustering) Compute() error
- func (c *Clustering) Results() *Dendrogram
- func (c *Clustering) SetConcurrency(concurrency int)
type CompositeEstimator
- func (e *CompositeEstimator) Compute() error
- func (e *CompositeEstimator) Configure(conf map[string]string)
- func (e *CompositeEstimator) Deserialize(b []byte)
- func (e *CompositeEstimator) Options() map[string]string
- func (e *CompositeEstimator) Serialize() []byte
- func (e *CompositeEstimator) Similarity(a, b *Dataset) float64
type CorrelationEstimator
- func (e *CorrelationEstimator) Compute() error
- func (e *CorrelationEstimator) Configure(conf map[string]string)
- func (e *CorrelationEstimator) Deserialize(b []byte)
- func (e *CorrelationEstimator) Options() map[string]string
- func (e *CorrelationEstimator) Serialize() []byte
- func (e *CorrelationEstimator) Similarity(a, b *Dataset) float64
type CorrelationEstimatorNormalizationType
- func (s CorrelationEstimatorNormalizationType) String() string
type CorrelationEstimatorType
- func (s CorrelationEstimatorType) String() string
type DataPartitioner
- func DeserializePartitioner(b []byte) DataPartitioner
- func NewDataPartitioner(dpType DataPartitionerType, conf map[string]string) DataPartitioner
type DataPartitionerType
type Dataset
- func DiscoverDatasets(inputDir string) []*Dataset
- func NewDataset(path string) *Dataset
- func (d Dataset) Data() []DatasetTuple
- func (d Dataset) Header() []string
- func (d Dataset) ID() string
- func (d Dataset) Path() string
- func (d *Dataset) ReadFromFile() error
- func (d Dataset) String() string
type DatasetCoordinates
- func DeserializeCoordinates(buffer []byte) []DatasetCoordinates
type DatasetEvaluator
- func NewDatasetEvaluator(evalType DatasetEvaluatorType, params map[string]string) (DatasetEvaluator, error)
type DatasetEvaluatorType
type DatasetPartitioner
- func NewDatasetPartitioner(input, output string, splits int, partitionType PartitionerType) *DatasetPartitioner
- func (a *DatasetPartitioner) Delete()
- func (a *DatasetPartitioner) Partition()
type DatasetScores
- func AppxScores(modeler Modeler) *DatasetScores
- func NewDatasetScores() *DatasetScores
- func (s *DatasetScores) Deserialize(buf []byte) error
- func (s *DatasetScores) Serialize() ([]byte, error)
type DatasetSimilarityEstimator
- func DeserializeSimilarityEstimator(b []byte) DatasetSimilarityEstimator
- func NewDatasetSimilarityEstimator(estType DatasetSimilarityEstimatorType, datasets []*Dataset) DatasetSimilarityEstimator
type DatasetSimilarityEstimatorType
- func NewDatasetSimilarityEstimatorType(estimatorType string) *DatasetSimilarityEstimatorType
- func (t DatasetSimilarityEstimatorType) String() string
type DatasetSimilarityMatrix
- func NewDatasetSimilarities(capacity int) *DatasetSimilarityMatrix
- func (s *DatasetSimilarityMatrix) Capacity() int
- func (s *DatasetSimilarityMatrix) Deserialize(buff []byte) error
- func (s *DatasetSimilarityMatrix) FullyCalculatedNodes() int
- func (s *DatasetSimilarityMatrix) Get(idxA, idxB int) float64
- func (s *DatasetSimilarityMatrix) IndexDisabled(flag bool)
- func (s *DatasetSimilarityMatrix) LeastSimilar() (int, float64)
- func (s *DatasetSimilarityMatrix) Serialize() []byte
- func (s *DatasetSimilarityMatrix) Set(idxA, idxB int, value float64)
- func (s DatasetSimilarityMatrix) String() string
type DatasetSimilarityPopulationPolicy
- func (s *DatasetSimilarityPopulationPolicy) Deserialize(b []byte)
- func (s *DatasetSimilarityPopulationPolicy) Serialize() []byte
type DatasetSimilarityPopulationPolicyType
type DatasetTuple
- func DatasetsIntersection(a, b *Dataset) []DatasetTuple
- func DatasetsUnion(a, b *Dataset) []DatasetTuple
- func (t *DatasetTuple) Deserialize(data string)
- func (t DatasetTuple) Equals(o DatasetTuple) bool
- func (t *DatasetTuple) Serialize() string
- func (t DatasetTuple) String() string
type DatasetTuples
- func (slice DatasetTuples) Len() int
- func (slice DatasetTuples) Less(i, j int) bool
- func (slice DatasetTuples) Swap(i, j int)
type Dendrogram
- func NewDendrogram(datasets []*Dataset) *Dendrogram
- func (d *Dendrogram) GetClusters(level int) [][]*Dataset
- func (d *Dendrogram) Heights() (int, int)
- func (d *Dendrogram) String() string
type DendrogramNode
- func (n DendrogramNode) String() string
type FileBasedEvaluator
- func (e *FileBasedEvaluator) Evaluate(dataset string) (float64, error)
type JaccardEstimator
- func (e *JaccardEstimator) Compute() error
- func (e *JaccardEstimator) Configure(conf map[string]string)
- func (e *JaccardEstimator) Deserialize(b []byte)
- func (e *JaccardEstimator) Options() map[string]string
- func (e *JaccardEstimator) Serialize() []byte
- func (e *JaccardEstimator) Similarity(a, b *Dataset) float64
type KDTreePartitioner
- func (p *KDTreePartitioner) Configure(conf map[string]string)
- func (p *KDTreePartitioner) Construct(tuples []DatasetTuple) error
- func (p *KDTreePartitioner) Deserialize(b []byte)
- func (p *KDTreePartitioner) Options() map[string]string
- func (p *KDTreePartitioner) Partition(tuples []DatasetTuple) ([][]DatasetTuple, error)
- func (p *KDTreePartitioner) Serialize() []byte
type KMeansPartitioner
- func (p *KMeansPartitioner) Configure(conf map[string]string)
- func (p *KMeansPartitioner) Construct(tuples []DatasetTuple) error
- func (p *KMeansPartitioner) Deserialize(b []byte)
- func (p *KMeansPartitioner) Options() map[string]string
- func (p *KMeansPartitioner) Partition(tuples []DatasetTuple) ([][]DatasetTuple, error)
- func (p *KMeansPartitioner) Serialize() []byte
type KNNModeler
- func (m *KNNModeler) Configure(conf map[string]string) error
- func (k *KNNModeler) Run() error
type MDScaling
- func NewMDScaling(matrix *DatasetSimilarityMatrix, k int, script string) *MDScaling
- func (md *MDScaling) Compute() error
- func (md *MDScaling) Coordinates() []DatasetCoordinates
- func (md *MDScaling) Gof() float64
- func (md *MDScaling) Stress() float64
- func (md *MDScaling) Variances() ([]float64, error)
type Modeler
- func NewModeler(modelerType ModelerType, datasets []*Dataset, sr float64, ...) Modeler
type ModelerType
- func NewModelerType(t string) ModelerType
type OnlineDatasetEvaluator
- func (e *OnlineDatasetEvaluator) Evaluate(dataset string) (float64, error)
type OnlineIndexer
- func NewOnlineIndexer(estimator DatasetSimilarityEstimator, coordinates []DatasetCoordinates, ...) *OnlineIndexer
- func (o *OnlineIndexer) Calculate(dataset *Dataset) (DatasetCoordinates, float64, error)
- func (o *OnlineIndexer) DatasetsToCompare(datasets int)
type PartitionerType
type ScriptBasedModeler
- func (m *ScriptBasedModeler) Configure(conf map[string]string) error
- func (m *ScriptBasedModeler) Run() error
type ScriptPairSimilarityEstimator
- func (e *ScriptPairSimilarityEstimator) Compute() error
- func (e *ScriptPairSimilarityEstimator) Configure(conf map[string]string)
- func (e *ScriptPairSimilarityEstimator) Deserialize(b []byte)
- func (e *ScriptPairSimilarityEstimator) Options() map[string]string
- func (e *ScriptPairSimilarityEstimator) Serialize() []byte
- func (e *ScriptPairSimilarityEstimator) Similarity(a, b *Dataset) float64
type ScriptSimilarityEstimator
- func (e *ScriptSimilarityEstimator) Compute() error
- func (e *ScriptSimilarityEstimator) Configure(conf map[string]string)
- func (e *ScriptSimilarityEstimator) Deserialize(b []byte)
- func (e *ScriptSimilarityEstimator) Options() map[string]string
- func (e *ScriptSimilarityEstimator) Serialize() []byte
- func (e *ScriptSimilarityEstimator) Similarity(a, b *Dataset) float64
type ScriptSimilarityEstimatorType
type SizeEstimator
- func (e *SizeEstimator) Compute() error
- func (e *SizeEstimator) Configure(conf map[string]string)
- func (e *SizeEstimator) Deserialize(b []byte)
- func (e *SizeEstimator) Options() map[string]string
- func (e *SizeEstimator) Serialize() []byte
- func (e *SizeEstimator) Similarity(a, b *Dataset) float64

Constants ¶

View Source

const (
	// CorrelationSimilarityTypePearson represents the Pearson cor. coeff
	CorrelationSimilarityTypePearson = iota
	// CorrelationSimilarityTypeSpearman represents the Spearman cor. coeff
	CorrelationSimilarityTypeSpearman = iota + 1
	// CorrelationSimilarityTypeKendall represents the Kendall cor. coeff
	CorrelationSimilarityTypeKendall = iota + 2
)

View Source

const (
	// CorrelationSimilarityNormalizationAbs returns |r|, r being the cor. metric
	CorrelationSimilarityNormalizationAbs = iota
	// CorrelationSimilarityNormalizationScale returns r/2 + 0.5, r being the cor. metric
	CorrelationSimilarityNormalizationScale = iota + 1
	// CorrelationSimilarityNormalizationPos returns r, if r>=0 else 0
	CorrelationSimilarityNormalizationPos = iota + 2
)

View Source

const KMeansMaxIteration = 10000

Variables ¶

View Source

var DatasetSimilarityEstimatorAvailableTypes = []DatasetSimilarityEstimatorType{
	SimilarityTypeBhattacharyya,
	SimilarityTypeJaccard,
	SimilarityTypeCorrelation,
	SimilarityTypeComposite,
	SimilarityTypeScript,
	SimilarityTypeSize,
	SimilarityTypeScriptPair,
}

DatasetSimilarityEstimatorAvailableTypes lists the available similarity types

Functions ¶

func DistanceToSimilarity ¶

func DistanceToSimilarity(distance float64) float64

DistanceToSimilarity returns the similarity based on the distance

func Kendall ¶

func Kendall(a, b []float64) float64

Kendall returns yet another rank correlation coefficient between two variables. The two arrays must be of the same size, else 0 is returned.

func MaxAbsoluteCountPercentageError ¶ added in v0.2.0

func MaxAbsoluteCountPercentageError(actual, predicted []float64, percentile int) float64

MaxAbsoluteCountPercentageError returns the percentage of the dataset that has AbsolutePercentageError gte to percentile.

func MaxAbsolutePercentageError ¶ added in v0.2.0

func MaxAbsolutePercentageError(actual, predicted []float64) float64

MaxAbsolutePercentageError returns the Max error of the actual vs the predicted values as a percentage.

func Mean ¶

func Mean(a []float64) float64

Mean returns the mean value of a float array

func MeanAbsoluteError ¶

func MeanAbsoluteError(actual, predicted []float64) float64

MeanAbsoluteError returns the MAE of the actual vs the predicted values

func MeanAbsolutePercentageError ¶

func MeanAbsolutePercentageError(actual, predicted []float64) float64

MeanAbsolutePercentageError returns the MAPE of the actual vs the predicted values

func MedianAbsolutePercentageError ¶

func MedianAbsolutePercentageError(actual, predicted []float64) float64

MedianAbsolutePercentageError returns the MdAPE of the actual vs the predicted values

func Pearson ¶

func Pearson(a, b []float64) float64

Pearson returns the pearson correlation coefficient between two variables. The two arrays must be of the same size, else 0 is returned.

func Percentile ¶

func Percentile(values []float64, percentile int) float64

Percentile returns the i-th percentile of an array of values

func RSquared ¶

func RSquared(actual, predicted []float64) float64

RSquared returns the coeff. of determination of the actual vs the predicted values

func Rank ¶

func Rank(a []float64) []int

Rank returns an array containing the ranks of a slice a

func RootMeanSquaredError ¶

func RootMeanSquaredError(actual, predicted []float64) float64

RootMeanSquaredError returns the RMSE of the actual vs the predicted values

func RootMeanSquaredLogError ¶

func RootMeanSquaredLogError(actual, predicted []float64) float64

RootMeanSquaredLogError returns the RMSLE of the actual vs the predicted values

func SerializeCoordinates ¶

func SerializeCoordinates(coords []DatasetCoordinates) []byte

SerializeCoordinates returns a CSV serilization of a coordinates slice

func SimilarityToDistance ¶

func SimilarityToDistance(similarity float64) float64

SimilarityToDistance returns the distance based on the similarity

func Spearman ¶

func Spearman(a, b []float64) float64

Spearman returns the rank correlation coefficient between two variables. The two arrays must be of the same size, else 0 is returned.

func StdDev ¶

func StdDev(a []float64) float64

StdDev returns the standard deviation of a float slice

Types ¶

type AbstractDatasetSimilarityEstimator ¶

type AbstractDatasetSimilarityEstimator struct {
	// contains filtered or unexported fields
}

AbstractDatasetSimilarityEstimator is the base struct for the similarity estimator objects

func (*AbstractDatasetSimilarityEstimator) Concurrency ¶

func (a *AbstractDatasetSimilarityEstimator) Concurrency() int

Concurrency returns the max number of threads to be used for the computation

func (*AbstractDatasetSimilarityEstimator) Datasets ¶

func (a *AbstractDatasetSimilarityEstimator) Datasets() []*Dataset

Datasets returns the datasets of the estimator

func (*AbstractDatasetSimilarityEstimator) Duration ¶

func (a *AbstractDatasetSimilarityEstimator) Duration() float64

Duration returns the duration of the compution

func (*AbstractDatasetSimilarityEstimator) PopulationPolicy ¶

func (a *AbstractDatasetSimilarityEstimator) PopulationPolicy() DatasetSimilarityPopulationPolicy

PopulationPolicy gets the population policy to be used

func (*AbstractDatasetSimilarityEstimator) SetPopulationPolicy ¶

func (a *AbstractDatasetSimilarityEstimator) SetPopulationPolicy(pol DatasetSimilarityPopulationPolicy)

SetPopulationPolicy sets the population policy to be used

func (*AbstractDatasetSimilarityEstimator) SimilarityMatrix ¶

func (a *AbstractDatasetSimilarityEstimator) SimilarityMatrix() *DatasetSimilarityMatrix

SimilarityMatrix returns the similarity matrix of the estimator

type AbstractModeler ¶

type AbstractModeler struct {
	// contains filtered or unexported fields
}

AbstractModeler implements the common methods of the Modeler structs

func (*AbstractModeler) AppxValues ¶

func (a *AbstractModeler) AppxValues() []float64

AppxValues returns the values of all the datasets

func (*AbstractModeler) Datasets ¶

func (a *AbstractModeler) Datasets() []*Dataset

Datasets returns the datasets slice

func (*AbstractModeler) ErrorMetrics ¶

func (a *AbstractModeler) ErrorMetrics() map[string]float64

ErrorMetrics returns a list of error metrics for the specified model

func (*AbstractModeler) EvalTime ¶

func (a *AbstractModeler) EvalTime() float64

EvalTime returns the dataset evaluation time of the Modeler

func (*AbstractModeler) ExecTime ¶

func (a *AbstractModeler) ExecTime() float64

ExecTime returns the total exection time of the Modeler

func (*AbstractModeler) Samples ¶

func (a *AbstractModeler) Samples() map[int]float64

Samples return the indices of the chosen datasets

type BhattacharyyaEstimator ¶

type BhattacharyyaEstimator struct {
	AbstractDatasetSimilarityEstimator
	// contains filtered or unexported fields
}

BhattacharyyaEstimator is the similarity estimator that quantifies the similarity of the distribution between the datasets.

func (*BhattacharyyaEstimator) Compute ¶

func (e *BhattacharyyaEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*BhattacharyyaEstimator) Configure ¶

func (e *BhattacharyyaEstimator) Configure(conf map[string]string)

Configure sets a the configuration parameters of the estimator

func (*BhattacharyyaEstimator) Deserialize ¶

func (e *BhattacharyyaEstimator) Deserialize(b []byte)

Deserialize constructs a similarity object based on the byte stream

func (*BhattacharyyaEstimator) Options ¶

func (e *BhattacharyyaEstimator) Options() map[string]string

Options returns a list of parameters that can be set by the user

func (*BhattacharyyaEstimator) Serialize ¶

func (e *BhattacharyyaEstimator) Serialize() []byte

Serialize returns a byte array containing a serialized form of the estimator

func (*BhattacharyyaEstimator) Similarity ¶

func (e *BhattacharyyaEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between two datasets

type Clustering ¶

type Clustering struct {
	// contains filtered or unexported fields
}

Clustering struct is responsible to execute the necessary actions in order to cluster the datasets based on their availability

func NewClustering ¶

func NewClustering(similarities *DatasetSimilarityMatrix, datasets []*Dataset) *Clustering

NewClustering is the the constructor for creating a Clustering object, providing a DatasetSimilarities object

func (*Clustering) Compute ¶

func (c *Clustering) Compute() error

Compute executes the clustering

func (*Clustering) Results ¶

func (c *Clustering) Results() *Dendrogram

Results returns the results

func (*Clustering) SetConcurrency ¶

func (c *Clustering) SetConcurrency(concurrency int)

SetConcurrency sets the number of threads to be used

type CompositeEstimator ¶

type CompositeEstimator struct {
	AbstractDatasetSimilarityEstimator
	// contains filtered or unexported fields
}

CompositeEstimator returns a similarity function based on compositions of simpler similarity expressions. The user needs to provide a formula containing the expression of the similarity function, e.g.: 0.8 x BHATTACHARRYA + 0.2 CORRELATION Note that it is the user's responsibility to guarantee that the overall expression remains within the limits of the similarity expression [0,1].

func (*CompositeEstimator) Compute ¶

func (e *CompositeEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*CompositeEstimator) Configure ¶

func (e *CompositeEstimator) Configure(conf map[string]string)

Configure provides the configuration parameters needed by the Estimator

func (*CompositeEstimator) Deserialize ¶

func (e *CompositeEstimator) Deserialize(b []byte)

Deserialize constructs an Estimator object based on the byte array provided.

func (*CompositeEstimator) Options ¶

func (e *CompositeEstimator) Options() map[string]string

Options returns the applicable parameters needed by the Estimator.

func (*CompositeEstimator) Serialize ¶

func (e *CompositeEstimator) Serialize() []byte

Serialize returns an array of bytes representing the Estimator.

func (*CompositeEstimator) Similarity ¶

func (e *CompositeEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between two datasets

type CorrelationEstimator ¶

type CorrelationEstimator struct {
	AbstractDatasetSimilarityEstimator
	// contains filtered or unexported fields
}

CorrelationEstimator estimates the similarity between two datasets based on a correlation metric. This metric can only be used for datasets that consist of a single column and consist of the same number of tuples.

func (*CorrelationEstimator) Compute ¶

func (e *CorrelationEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*CorrelationEstimator) Configure ¶

func (e *CorrelationEstimator) Configure(conf map[string]string)

Configure provides a set of configuration options to the CorrelationEstimator struct.

func (*CorrelationEstimator) Deserialize ¶

func (e *CorrelationEstimator) Deserialize(b []byte)

Deserialize returns a byte array in order to deserialize the CorrelationEstimator

func (*CorrelationEstimator) Options ¶

func (e *CorrelationEstimator) Options() map[string]string

Options returns a list of options used internally by the CorrelationEstimator struct for its execution.

func (*CorrelationEstimator) Serialize ¶

func (e *CorrelationEstimator) Serialize() []byte

Serialize returns a byte array in order to serialize the CorrelationEstimator struct.

func (*CorrelationEstimator) Similarity ¶

func (e *CorrelationEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between two datasets. Since all the correlation coefficients are between [-1.0,1.0], the output of this function is scaled to [0.0,1.0] by returning (x/2.0 + 0.5), where x is one of Pearson, Spearman and Kendall coefficients.

type CorrelationEstimatorNormalizationType ¶

type CorrelationEstimatorNormalizationType uint8

CorrelationEstimatorNormalizationType represents the type of the normalization action. Since all correlation metrics can take any valuein [-1,1], this type reflects the policy with which [-1,1] will be mapped to a similarity metric in [0,1]

func (CorrelationEstimatorNormalizationType) String ¶

func (s CorrelationEstimatorNormalizationType) String() string

String returns a nstring representation of the CorrelationEstimatorNormalizationType

type CorrelationEstimatorType ¶

type CorrelationEstimatorType uint8

CorrelationEstimatorType represents the type of correlation to be used by the CorrelationEstimator

func (CorrelationEstimatorType) String ¶

func (s CorrelationEstimatorType) String() string

String returns a string representation of the CorrelationEstimatorType

type DataPartitioner ¶ added in v0.2.0

type DataPartitioner interface {

	// Construct estimates the partitioning of the provided tuples (offline)
	Construct([]DatasetTuple) error
	// Partition executes partitioning to new datasets
	Partition([]DatasetTuple) ([][]DatasetTuple, error)
	// Configure provides the necessary configuration option to DataPartitioner
	Configure(map[string]string)
	// Options returns a list of options the DataPartitioner accepts with a
	// description
	Options() map[string]string
	// Serialize converts a DataPartitioner object to a stream of bytes
	Serialize() []byte
	// Deserialize converts a stream of bytes to a DataPartitioner object
	Deserialize([]byte)
}

DataPartitioner is responsible to partition a dataset and, upon estimating, the basic partitioning scheme, dynamically partition new datasets.

func DeserializePartitioner ¶ added in v0.2.0

func DeserializePartitioner(b []byte) DataPartitioner

DeserializePartitioner returns instantiates a new partitioner from a serialized version

func NewDataPartitioner ¶ added in v0.2.0

func NewDataPartitioner(dpType DataPartitionerType, conf map[string]string) DataPartitioner

NewDataPartitioner is the factory method for the creation of a new DataPartitioner object

type DataPartitionerType ¶ added in v0.2.0

type DataPartitionerType uint8

DataPartitionerType represents the type of the DataPartitioner struct

const (
	// DataPartitionerKDTree utilizes a kd-tree for partitioning
	DataPartitionerKDTree DataPartitionerType = iota + 1
	// DataPartitionerKMeans utilizes kmeans for partitioning
	DataPartitionerKMeans DataPartitionerType = iota + 2
)

type Dataset ¶

type Dataset struct {
	// contains filtered or unexported fields
}

Dataset struct represents a dataset object.

func DiscoverDatasets ¶

func DiscoverDatasets(inputDir string) []*Dataset

DiscoverDatasets is used to return a slice of Datasets when a new splits directory is provided

func NewDataset ¶

func NewDataset(path string) *Dataset

NewDataset is the constructor for the Dataset struct. A random ID is assigned to a new dataset

func (Dataset) Data ¶

func (d Dataset) Data() []DatasetTuple

Data getter for dataset - only works if ReadFromFile was successful

func (d Dataset) Header() []string

Header getter for dataset - only works if ReadFromFile was successful

func (Dataset) ID ¶

func (d Dataset) ID() string

ID getter for dataset

func (Dataset) Path ¶

func (d Dataset) Path() string

Path getter for dataset

func (*Dataset) ReadFromFile ¶

func (d *Dataset) ReadFromFile() error

ReadFromFile is used to parse the Dataset into memory. If the data are previously read, the method is not re-executed.

func (Dataset) String ¶

func (d Dataset) String() string

String method for dataset object - returns the path of the dataset

type DatasetCoordinates ¶

type DatasetCoordinates []float64

DatasetCoordinates is a struct for representing the dataset coordinates

func DeserializeCoordinates ¶

func DeserializeCoordinates(buffer []byte) []DatasetCoordinates

DeserializeCoordinates instantiated a new DatasetCoordinates slice, based on a CSV serialization form

type DatasetEvaluator ¶

type DatasetEvaluator interface {
	Evaluate(string) (float64, error)
}

DatasetEvaluator reflects the interface of an evaluator object.

func NewDatasetEvaluator ¶

func NewDatasetEvaluator(evalType DatasetEvaluatorType,
	params map[string]string) (DatasetEvaluator, error)

NewDatasetEvaluator returns a new DatasetEvaluator object

type DatasetEvaluatorType ¶

type DatasetEvaluatorType uint8

DatasetEvaluatorType represents the type of the dataset evaluator

const (
	// OnlineEval dynamically parses the dataset values
	OnlineEval DatasetEvaluatorType = iota + 1
	// FileBasedEval returns the pre-computed values of an operator
	FileBasedEval
)

type DatasetPartitioner ¶

type DatasetPartitioner struct {
	// contains filtered or unexported fields
}

DatasetPartitioner accepts a single dataset and it is responsible to partition it.

func NewDatasetPartitioner ¶

func NewDatasetPartitioner(input, output string, splits int, partitionType PartitionerType) *DatasetPartitioner

NewDatasetPartitioner initializes a new DatasetPartitioner object

func (*DatasetPartitioner) Delete ¶

func (a *DatasetPartitioner) Delete()

Delete function deletes the output directory, containing the Dataset splits

func (*DatasetPartitioner) Partition ¶

func (a *DatasetPartitioner) Partition()

Partition function is used to execute the partitioning

type DatasetScores ¶

type DatasetScores struct {
	Scores map[string]float64
}

DatasetScores is used to store the scores of a set of datasets

func AppxScores ¶ added in v0.2.0

func AppxScores(modeler Modeler) *DatasetScores

Return `modeler.AppxValues()` as a `DatasetScores` struct

func NewDatasetScores ¶

func NewDatasetScores() *DatasetScores

NewDatasetScores initializes a new DatasetScores struct

func (*DatasetScores) Deserialize ¶

func (s *DatasetScores) Deserialize(buf []byte) error

Deserialize constructs a DatasetScores strucy based on a byte array

func (*DatasetScores) Serialize ¶

func (s *DatasetScores) Serialize() ([]byte, error)

Serialize returns a stream containing a DatasetScores object

type DatasetSimilarityEstimator ¶

type DatasetSimilarityEstimator interface {
	// computes the similarity matrix
	Compute() error
	// returns the datasets slice
	Datasets() []*Dataset
	// returns the similarity for 2 datasets
	Similarity(a, b *Dataset) float64
	// returns the similarity struct
	SimilarityMatrix() *DatasetSimilarityMatrix
	// provides configuration options
	Configure(map[string]string)
	// list of options for the estimator
	Options() map[string]string
	// sets the population policy for the estimator
	SetPopulationPolicy(DatasetSimilarityPopulationPolicy)
	// returns the population policy
	PopulationPolicy() DatasetSimilarityPopulationPolicy
	// returns a serialized esimator object
	Serialize() []byte
	// instantiates an estimator from a serialized object
	Deserialize([]byte)
	// returns the seconds needed to execute the computation
	Duration() float64
	// returns the max number of threads to be used
	Concurrency() int
	// contains filtered or unexported methods
}

DatasetSimilarityEstimator is the interface that each Similarity estimator obeys.

func DeserializeSimilarityEstimator ¶

func DeserializeSimilarityEstimator(b []byte) DatasetSimilarityEstimator

DeserializeSimilarityEstimator method is used to deserialize the Estimator according to its type

func NewDatasetSimilarityEstimator ¶

func NewDatasetSimilarityEstimator(
	estType DatasetSimilarityEstimatorType,
	datasets []*Dataset) DatasetSimilarityEstimator

NewDatasetSimilarityEstimator is a factory method for the DatasetSimilarityEstimator structs, used to initialize the estimator and return it to the user.

type DatasetSimilarityEstimatorType ¶

type DatasetSimilarityEstimatorType uint

DatasetSimilarityEstimatorType represents the type of the Similarity Estimator

const (
	// SimilarityTypeJaccard estimates the Jaccard coefficient
	SimilarityTypeJaccard DatasetSimilarityEstimatorType = iota
	// SimilarityTypeBhattacharyya estimates the Bhattacharyya coefficient
	SimilarityTypeBhattacharyya DatasetSimilarityEstimatorType = iota + 1
	// SimilarityTypeScript uses a script to transform the data
	SimilarityTypeScript DatasetSimilarityEstimatorType = iota + 2
	// SimilarityTypeComposite utilizes multiple estimators concurrently
	SimilarityTypeComposite DatasetSimilarityEstimatorType = iota + 4
	// SimilarityTypeCorrelation estimates correlation metrics
	SimilarityTypeCorrelation DatasetSimilarityEstimatorType = iota + 5
	// SimilarityTypeSize estimates size metric
	SimilarityTypeSize DatasetSimilarityEstimatorType = iota + 6
	// SimilarityTypeScriptPair estimates the similarity based on a script for each pair
	SimilarityTypeScriptPair DatasetSimilarityEstimatorType = iota + 7
)

func NewDatasetSimilarityEstimatorType ¶

func NewDatasetSimilarityEstimatorType(estimatorType string) *DatasetSimilarityEstimatorType

NewDatasetSimilarityEstimatorType transforms the similarity type from a string to a DatasetSimilarityEstimatorType object

func (DatasetSimilarityEstimatorType) String ¶

func (t DatasetSimilarityEstimatorType) String() string

type DatasetSimilarityMatrix ¶

type DatasetSimilarityMatrix struct {
	// contains filtered or unexported fields
}

DatasetSimilarityMatrix represent the struct that holds the results of a dataset similarity estimation. It also provides the necessary

func NewDatasetSimilarities ¶

func NewDatasetSimilarities(capacity int) *DatasetSimilarityMatrix

NewDatasetSimilarities is the constructor for the DatasetSimilarities struct, expecting the number of datasets that will be held by it. If capacity=0, this implies that the Similarity Matrix will be deserialzed.

func (*DatasetSimilarityMatrix) Capacity ¶

func (s *DatasetSimilarityMatrix) Capacity() int

Capacity returns the capacity of the Similarity Matrix

func (*DatasetSimilarityMatrix) Deserialize ¶

func (s *DatasetSimilarityMatrix) Deserialize(buff []byte) error

Deserialize instantiates an empty DatasetSimilarities object. In case of parse failure, an error is thrown

func (*DatasetSimilarityMatrix) FullyCalculatedNodes ¶

func (s *DatasetSimilarityMatrix) FullyCalculatedNodes() int

FullyCalculatedNodes returns the number of nodes the similarity of which has been calculated for all the nodes. This number can work as a measure of how close to the full similarity matrix the current object is.

func (*DatasetSimilarityMatrix) Get ¶

func (s *DatasetSimilarityMatrix) Get(idxA, idxB int) float64

Get returns the similarity between two dataset paths

func (*DatasetSimilarityMatrix) IndexDisabled ¶

func (s *DatasetSimilarityMatrix) IndexDisabled(flag bool)

IndexDisabled sets whether the closest dataset index should be disabled or not. The index is useless if the FULL Estimator strategy is being followed.

func (*DatasetSimilarityMatrix) LeastSimilar ¶

func (s *DatasetSimilarityMatrix) LeastSimilar() (int, float64)

LeastSimilar method returns the dataset that presents the lowest similarity among the examined datasets

func (*DatasetSimilarityMatrix) Serialize ¶

func (s *DatasetSimilarityMatrix) Serialize() []byte

Serialize method returns a byte slice that represents the similarity matrix

func (*DatasetSimilarityMatrix) Set ¶

func (s *DatasetSimilarityMatrix) Set(idxA, idxB int, value float64)

Set is a setter function for the similarity between two datasets

func (DatasetSimilarityMatrix) String ¶

func (s DatasetSimilarityMatrix) String() string

type DatasetSimilarityPopulationPolicy ¶

type DatasetSimilarityPopulationPolicy struct {
	PolicyType DatasetSimilarityPopulationPolicyType
	Parameters map[string]float64
}

DatasetSimilarityPopulationPolicy is the struct that hold the Population Policy of the Similarity Matrix along with the configuration parameters of it.

func (*DatasetSimilarityPopulationPolicy) Deserialize ¶

func (s *DatasetSimilarityPopulationPolicy) Deserialize(b []byte)

Deserialize is responsible to instantiate a Population Policy object based on its byte representation.

func (*DatasetSimilarityPopulationPolicy) Serialize ¶

func (s *DatasetSimilarityPopulationPolicy) Serialize() []byte

Serialize method returns a slice of bytes containing the serialized form of the Population Policy

type DatasetSimilarityPopulationPolicyType ¶

type DatasetSimilarityPopulationPolicyType uint

DatasetSimilarityPopulationPolicyType is the type that represents the Similarity Matrix population policy

const (
	// PopulationPolicyFull policy needs no params
	PopulationPolicyFull DatasetSimilarityPopulationPolicyType = iota
	// PopulationPolicyAprx must have defined one of two params:
	// count (how many points) or threshold (percentage in similarity gain)
	PopulationPolicyAprx DatasetSimilarityPopulationPolicyType = iota + 1
)

type DatasetTuple ¶

type DatasetTuple struct {
	Data []float64
}

DatasetTuple represents a data tuple from the dataset

func DatasetsIntersection ¶

func DatasetsIntersection(a, b *Dataset) []DatasetTuple

DatasetsIntersection function is used to calculate the intersection of two datasets and returns the tuples that belong to it.

func DatasetsUnion ¶

func DatasetsUnion(a, b *Dataset) []DatasetTuple

DatasetsUnion function is used to calculate the union of two datasets and returns the tuples that belong to it.

func (*DatasetTuple) Deserialize ¶

func (t *DatasetTuple) Deserialize(data string)

Deserialize is used to construct a tuple from a string representation

func (DatasetTuple) Equals ¶

func (t DatasetTuple) Equals(o DatasetTuple) bool

Equals function returns true if t is equal to o

func (*DatasetTuple) Serialize ¶

func (t *DatasetTuple) Serialize() string

Serialize transforms the tuple to a string representation

func (DatasetTuple) String ¶

func (t DatasetTuple) String() string

type DatasetTuples ¶

type DatasetTuples []DatasetTuple

DatasetTuples represents a slice of DatasetTuple objects

func (DatasetTuples) Len ¶

func (slice DatasetTuples) Len() int

func (DatasetTuples) Less ¶

func (slice DatasetTuples) Less(i, j int) bool

func (DatasetTuples) Swap ¶

func (slice DatasetTuples) Swap(i, j int)

type Dendrogram ¶

type Dendrogram struct {
	// contains filtered or unexported fields
}

Dendrogram represents the results of the ClusterApp objects

func NewDendrogram ¶

func NewDendrogram(datasets []*Dataset) *Dendrogram

NewDendrogram is the constructor for a Dendrogram struct

func (*Dendrogram) GetClusters ¶

func (d *Dendrogram) GetClusters(level int) [][]*Dataset

GetClusters function returns a slice containing the clusters of datasets for the specified dendrogram level

func (*Dendrogram) Heights ¶

func (d *Dendrogram) Heights() (int, int)

Heights function returns the tree heights (max, min)

func (*Dendrogram) String ¶

func (d *Dendrogram) String() string

type DendrogramNode ¶

type DendrogramNode struct {
	// contains filtered or unexported fields
}

DendrogramNode is the node of the Dendrogram

func (DendrogramNode) String ¶

func (n DendrogramNode) String() string

type FileBasedEvaluator ¶

type FileBasedEvaluator struct {
	// contains filtered or unexported fields
}

FileBasedEvaluator returns the scores of an operator based on a scores file.

func (*FileBasedEvaluator) Evaluate ¶

func (e *FileBasedEvaluator) Evaluate(dataset string) (float64, error)

Evaluate returns the score for a given dataset

type JaccardEstimator ¶

type JaccardEstimator struct {
	AbstractDatasetSimilarityEstimator
}

JaccardEstimator estimates the Jaccard coefficients between the different datasets. The Jaccard coefficient between two datasets is defined as the cardinality of the intersection divided by the cardinality of the union of the two datasets.

func (*JaccardEstimator) Compute ¶

func (e *JaccardEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*JaccardEstimator) Configure ¶

func (e *JaccardEstimator) Configure(conf map[string]string)

Configure sets the necessary parameters before the similarity execution

func (*JaccardEstimator) Deserialize ¶

func (e *JaccardEstimator) Deserialize(b []byte)

Deserialize instantiates the estimator based on a byte array

func (*JaccardEstimator) Options ¶

func (e *JaccardEstimator) Options() map[string]string

Options returns a list of applicable parameters

func (*JaccardEstimator) Serialize ¶

func (e *JaccardEstimator) Serialize() []byte

Serialize returns a byte array containing the estimator.

func (*JaccardEstimator) Similarity ¶

func (e *JaccardEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between two datasets

type KDTreePartitioner ¶ added in v0.2.0

type KDTreePartitioner struct {
	// contains filtered or unexported fields
}

KDTreePartitioner generates a kd-tree on the selected columns and applies the partitioning to new datasets

func (*KDTreePartitioner) Configure ¶ added in v0.2.0

func (p *KDTreePartitioner) Configure(conf map[string]string)

Configure provides the necessary configuration params

func (*KDTreePartitioner) Construct ¶ added in v0.2.0

func (p *KDTreePartitioner) Construct(tuples []DatasetTuple) error

func (*KDTreePartitioner) Deserialize ¶ added in v0.2.0

func (p *KDTreePartitioner) Deserialize(b []byte)

Deserialize parses a byte array and instantiates a new kdtree part. object

func (*KDTreePartitioner) Options ¶ added in v0.2.0

func (p *KDTreePartitioner) Options() map[string]string

Options returns a list of options

func (*KDTreePartitioner) Partition ¶ added in v0.2.0

func (p *KDTreePartitioner) Partition(tuples []DatasetTuple) ([][]DatasetTuple, error)

Partition applies the previously constructed kd-tree in order to partition the given dataset

func (*KDTreePartitioner) Serialize ¶ added in v0.2.0

func (p *KDTreePartitioner) Serialize() []byte

Serialize returns a byte array with the serialized object

type KMeansPartitioner ¶ added in v0.2.0

type KMeansPartitioner struct {
	// contains filtered or unexported fields
}

KMeansPartitioner applies the k-means clustering algorithm to a given dataset and using the calculated centroids, it partitions newly provided datasets according to their distance from them

func (*KMeansPartitioner) Configure ¶ added in v0.2.0

func (p *KMeansPartitioner) Configure(conf map[string]string)

Configure provides the necessary configuration options to the KMeansPartitioner struct

func (*KMeansPartitioner) Construct ¶ added in v0.2.0

func (p *KMeansPartitioner) Construct(tuples []DatasetTuple) error

Construct runs the k-means algorithm and estimates the centroids of the cluster (in order to be later used for partitioning.

func (*KMeansPartitioner) Deserialize ¶ added in v0.2.0

func (p *KMeansPartitioner) Deserialize(b []byte)

func (*KMeansPartitioner) Options ¶ added in v0.2.0

func (p *KMeansPartitioner) Options() map[string]string

Options returns the configuration options of the KMeansPartitioner

func (*KMeansPartitioner) Partition ¶ added in v0.2.0

func (p *KMeansPartitioner) Partition(tuples []DatasetTuple) (
	[][]DatasetTuple, error)

Partition receives a set of tuples as input and returns a number of clusters

func (*KMeansPartitioner) Serialize ¶ added in v0.2.0

func (p *KMeansPartitioner) Serialize() []byte

type KNNModeler ¶

type KNNModeler struct {
	AbstractModeler
	// contains filtered or unexported fields
}

KNNModeler utilizes a similarity matrix in order to approximate the training set

func (*KNNModeler) Configure ¶

func (m *KNNModeler) Configure(conf map[string]string) error

Configure is the method used to provide the essential paremeters for the conf of the modeler

func (*KNNModeler) Run ¶

func (k *KNNModeler) Run() error

Run executes the training part and obtains the model

type MDScaling ¶

type MDScaling struct {
	// contains filtered or unexported fields
}

MDScaling is responsible for the execution of a MultiDimensional Scaling algorithm in order to provide coefficients for each dataset, based on a a similarity matrix.

func NewMDScaling ¶

func NewMDScaling(matrix *DatasetSimilarityMatrix, k int, script string) *MDScaling

NewMDScaling is the default MDScaling constructor; it initializes a new MDScaling object, based on the provided DatasetSimilarities struct and the k factor that determines the number of target dimensions. If k<1, then auto estimation takes place.

func (*MDScaling) Compute ¶

func (md *MDScaling) Compute() error

Compute functions executes the Multidimensional Scaling computation.

func (*MDScaling) Coordinates ¶

func (md *MDScaling) Coordinates() []DatasetCoordinates

Coordinates getter returns the dataset coordinates in a nxk slice (n being the number of datasets).

func (*MDScaling) Gof ¶

func (md *MDScaling) Gof() float64

Gof getter returns the gof factor for the calculated solution

func (*MDScaling) Stress ¶

func (md *MDScaling) Stress() float64

Stress returns the stress after the execution of the Sammon mapping

func (*MDScaling) Variances ¶

func (md *MDScaling) Variances() ([]float64, error)

Variances function returns the variances of the principal coordinates

type Modeler ¶

type Modeler interface {
	//  Configure is responsible to provide the necessary configuration
	// options to the Modeler struct. Call it before Run.
	Configure(map[string]string) error
	// Run initiates the modeling process.
	Run() error

	// Datasets returns the datasets slice
	Datasets() []*Dataset
	// Samples returns the indices of the chosen datasets.
	Samples() map[int]float64
	// AppxValues returns a slice of the approximated values
	AppxValues() []float64

	// ErrorMetrics returns a list of error metrics for the specified modeler
	ErrorMetrics() map[string]float64

	// ExecTime returns the total execution time of the Modeler
	ExecTime() float64
	// EvalTime returns the evaluation time of the Modeler
	EvalTime() float64
}

Modeler is the interface for the objects that model the dataset space.

func NewModeler ¶

func NewModeler(
	modelerType ModelerType,
	datasets []*Dataset,
	sr float64,
	evaluator DatasetEvaluator) Modeler

NewModeler is the factory method for the modeler object

type ModelerType ¶

type ModelerType uint8

const (
	ScriptBasedModelerType ModelerType = iota
	KNNModelerType         ModelerType = iota + 1
)

func NewModelerType ¶

func NewModelerType(t string) ModelerType

type OnlineDatasetEvaluator ¶

type OnlineDatasetEvaluator struct {
	// contains filtered or unexported fields
}

OnlineDatasetEvaluator is responsible to execute the training script and fetch the model accuracy

func (*OnlineDatasetEvaluator) Evaluate ¶

func (e *OnlineDatasetEvaluator) Evaluate(dataset string) (float64, error)

Evaluate evaluates a new dataset, based on its path

type OnlineIndexer ¶

type OnlineIndexer struct {
	// contains filtered or unexported fields
}

OnlineIndexer is used to execute online indexing. The user can supply a map containing distances from original datasets and the indexer returns the coordinates of the specified dataset.

func NewOnlineIndexer ¶

func NewOnlineIndexer(estimator DatasetSimilarityEstimator,
	coordinates []DatasetCoordinates,
	script string) *OnlineIndexer

NewOnlineIndexer is a constructor function used to initialize an OnlineIndexer object.

func (*OnlineIndexer) Calculate ¶

func (o *OnlineIndexer) Calculate(dataset *Dataset) (DatasetCoordinates, float64, error)

Calculate method is responsible to calculate the coordinates of the specified dataset. In case that such a dataset cannot be represented by the specified coordinates system, an error is returned.

func (*OnlineIndexer) DatasetsToCompare ¶

func (o *OnlineIndexer) DatasetsToCompare(datasets int)

DatasetsToCompare is a setter method to determine the number of datasets that will be utilized for the assigment of coordinates

type PartitionerType ¶

type PartitionerType uint8

PartitionerType represents the type of the partitioning

const (
	// PartitionerUniform represents a uniform partitioner
	PartitionerUniform PartitionerType = iota + 1
)

type ScriptBasedModeler ¶

type ScriptBasedModeler struct {
	AbstractModeler
	// contains filtered or unexported fields
}

ScriptBasedModeler utilizes a script to train an ML model and obtain is values

func (*ScriptBasedModeler) Configure ¶

func (m *ScriptBasedModeler) Configure(conf map[string]string) error

Configure expects the necessary conf options for the specified struct. Specifically, the following parameters are necessary: - script: the path of the script to use

func (*ScriptBasedModeler) Run ¶

func (m *ScriptBasedModeler) Run() error

Run executes the modeling process and populates the samples, realValues and appxValues slices.

type ScriptPairSimilarityEstimator ¶

type ScriptPairSimilarityEstimator struct {
	AbstractDatasetSimilarityEstimator
	// contains filtered or unexported fields
}

ScriptPairSimilarityEstimator executes a script of the extraction of the similarity between each pair of datasets

func (*ScriptPairSimilarityEstimator) Compute ¶

func (e *ScriptPairSimilarityEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*ScriptPairSimilarityEstimator) Configure ¶

func (e *ScriptPairSimilarityEstimator) Configure(conf map[string]string)

Configure sets a number of configuration parameters to the struct. Use this method before the execution of the computation

func (*ScriptPairSimilarityEstimator) Deserialize ¶

func (e *ScriptPairSimilarityEstimator) Deserialize(b []byte)

Deserialize parses a byte array and forms a ScriptSimilarityEstimator object

func (*ScriptPairSimilarityEstimator) Options ¶

func (e *ScriptPairSimilarityEstimator) Options() map[string]string

Options returns a list of options that the user can set

func (*ScriptPairSimilarityEstimator) Serialize ¶

func (e *ScriptPairSimilarityEstimator) Serialize() []byte

Serialize returns a byte array that represents the struct is a serialized version

func (*ScriptPairSimilarityEstimator) Similarity ¶

func (e *ScriptPairSimilarityEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between the two datasets

type ScriptSimilarityEstimator ¶

type ScriptSimilarityEstimator struct {
	AbstractDatasetSimilarityEstimator
	// contains filtered or unexported fields
}

ScriptSimilarityEstimator utilizes a script to analyze the data based on some external algorithm and utilizes various norms to measure the differences between the analysis outputs.

func (*ScriptSimilarityEstimator) Compute ¶

func (e *ScriptSimilarityEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*ScriptSimilarityEstimator) Configure ¶

func (e *ScriptSimilarityEstimator) Configure(conf map[string]string)

Configure sets a number of configuration parameters to the struct. Use this method before the execution of the computation

func (*ScriptSimilarityEstimator) Deserialize ¶

func (e *ScriptSimilarityEstimator) Deserialize(b []byte)

Deserialize parses a byte array and forms a ScriptSimilarityEstimator object

func (*ScriptSimilarityEstimator) Options ¶

func (e *ScriptSimilarityEstimator) Options() map[string]string

Options returns a list of options that the user can set

func (*ScriptSimilarityEstimator) Serialize ¶

func (e *ScriptSimilarityEstimator) Serialize() []byte

Serialize returns a byte array that represents the struct is a serialized version

func (*ScriptSimilarityEstimator) Similarity ¶

func (e *ScriptSimilarityEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between the two datasets

type ScriptSimilarityEstimatorType ¶

type ScriptSimilarityEstimatorType uint8

ScriptSimilarityEstimatorType reflects the type of the ScriptSimilarityEstimator

type SizeEstimator ¶

type SizeEstimator struct {
	AbstractDatasetSimilarityEstimator
}

SizeEstimator estimates the Jaccard coefficients between the different datasets. The Jaccard coefficient between two datasets is defined as the cardinality of the intersection divided by the cardinality of the union of the two datasets.

func (*SizeEstimator) Compute ¶

func (e *SizeEstimator) Compute() error

Compute method constructs the Similarity Matrix

func (*SizeEstimator) Configure ¶

func (e *SizeEstimator) Configure(conf map[string]string)

Configure sets the necessary parameters before the similarity execution

func (*SizeEstimator) Deserialize ¶

func (e *SizeEstimator) Deserialize(b []byte)

Deserialize instantiates the estimator based on a byte array

func (*SizeEstimator) Options ¶

func (e *SizeEstimator) Options() map[string]string

Options returns a list of applicable parameters

func (*SizeEstimator) Serialize ¶

func (e *SizeEstimator) Serialize() []byte

Serialize returns a byte array containing the estimator.

func (*SizeEstimator) Similarity ¶

func (e *SizeEstimator) Similarity(a, b *Dataset) float64

Similarity returns the similarity between two datasets

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func DistanceToSimilarity ¶

func Kendall ¶

func MaxAbsoluteCountPercentageError ¶ added in v0.2.0

func MaxAbsolutePercentageError ¶ added in v0.2.0

func Mean ¶

func MeanAbsoluteError ¶

func MeanAbsolutePercentageError ¶

func MedianAbsolutePercentageError ¶

func Pearson ¶

func Percentile ¶

func RSquared ¶

func Rank ¶

func RootMeanSquaredError ¶

func RootMeanSquaredLogError ¶

func SerializeCoordinates ¶

func SimilarityToDistance ¶

func Spearman ¶

func StdDev ¶

Types ¶

type AbstractDatasetSimilarityEstimator ¶

func (*AbstractDatasetSimilarityEstimator) Concurrency ¶

func (*AbstractDatasetSimilarityEstimator) Datasets ¶

func (*AbstractDatasetSimilarityEstimator) Duration ¶

func (*AbstractDatasetSimilarityEstimator) PopulationPolicy ¶

func (*AbstractDatasetSimilarityEstimator) SetPopulationPolicy ¶

func (*AbstractDatasetSimilarityEstimator) SimilarityMatrix ¶

type AbstractModeler ¶

func (*AbstractModeler) AppxValues ¶

func (*AbstractModeler) Datasets ¶

func (*AbstractModeler) ErrorMetrics ¶

func (*AbstractModeler) EvalTime ¶

func (*AbstractModeler) ExecTime ¶

func (*AbstractModeler) Samples ¶

type BhattacharyyaEstimator ¶

func (*BhattacharyyaEstimator) Compute ¶

func (*BhattacharyyaEstimator) Configure ¶

func (*BhattacharyyaEstimator) Deserialize ¶

func (*BhattacharyyaEstimator) Options ¶

func (*BhattacharyyaEstimator) Serialize ¶

func (*BhattacharyyaEstimator) Similarity ¶

type Clustering ¶

func NewClustering ¶

func (*Clustering) Compute ¶

func (*Clustering) Results ¶

func (*Clustering) SetConcurrency ¶

type CompositeEstimator ¶

func (*CompositeEstimator) Compute ¶

func (*CompositeEstimator) Configure ¶

func (*CompositeEstimator) Deserialize ¶

func (*CompositeEstimator) Options ¶

func (*CompositeEstimator) Serialize ¶

func (*CompositeEstimator) Similarity ¶

type CorrelationEstimator ¶

func (*CorrelationEstimator) Compute ¶

func (*CorrelationEstimator) Configure ¶

func (*CorrelationEstimator) Deserialize ¶

func (*CorrelationEstimator) Options ¶

func (*CorrelationEstimator) Serialize ¶

func (*CorrelationEstimator) Similarity ¶

type CorrelationEstimatorNormalizationType ¶

func (CorrelationEstimatorNormalizationType) String ¶

type CorrelationEstimatorType ¶

func (CorrelationEstimatorType) String ¶

type DataPartitioner ¶ added in v0.2.0

func DeserializePartitioner ¶ added in v0.2.0

func NewDataPartitioner ¶ added in v0.2.0

type DataPartitionerType ¶ added in v0.2.0

type Dataset ¶

func DiscoverDatasets ¶

func NewDataset ¶

func (Dataset) Data ¶

func (Dataset) Header ¶

func (Dataset) ID ¶

func (Dataset) Path ¶