Documentation ¶
Overview ¶
Package core contains the necessary structs and functions used to execute the dataset analysis.
Index ¶
- Constants
- Variables
- func DistanceToSimilarity(distance float64) float64
- func Kendall(a, b []float64) float64
- func MaxAbsoluteCountPercentageError(actual, predicted []float64, percentile int) float64
- func MaxAbsolutePercentageError(actual, predicted []float64) float64
- func Mean(a []float64) float64
- func MeanAbsoluteError(actual, predicted []float64) float64
- func MeanAbsolutePercentageError(actual, predicted []float64) float64
- func MedianAbsolutePercentageError(actual, predicted []float64) float64
- func Pearson(a, b []float64) float64
- func Percentile(values []float64, percentile int) float64
- func RSquared(actual, predicted []float64) float64
- func Rank(a []float64) []int
- func RootMeanSquaredError(actual, predicted []float64) float64
- func RootMeanSquaredLogError(actual, predicted []float64) float64
- func SerializeCoordinates(coords []DatasetCoordinates) []byte
- func SimilarityToDistance(similarity float64) float64
- func Spearman(a, b []float64) float64
- func StdDev(a []float64) float64
- type AbstractDatasetSimilarityEstimator
- func (a *AbstractDatasetSimilarityEstimator) Concurrency() int
- func (a *AbstractDatasetSimilarityEstimator) Datasets() []*Dataset
- func (a *AbstractDatasetSimilarityEstimator) Duration() float64
- func (a *AbstractDatasetSimilarityEstimator) PopulationPolicy() DatasetSimilarityPopulationPolicy
- func (a *AbstractDatasetSimilarityEstimator) SetPopulationPolicy(pol DatasetSimilarityPopulationPolicy)
- func (a *AbstractDatasetSimilarityEstimator) SimilarityMatrix() *DatasetSimilarityMatrix
- type AbstractModeler
- type BhattacharyyaEstimator
- func (e *BhattacharyyaEstimator) Compute() error
- func (e *BhattacharyyaEstimator) Configure(conf map[string]string)
- func (e *BhattacharyyaEstimator) Deserialize(b []byte)
- func (e *BhattacharyyaEstimator) Options() map[string]string
- func (e *BhattacharyyaEstimator) Serialize() []byte
- func (e *BhattacharyyaEstimator) Similarity(a, b *Dataset) float64
- type Clustering
- type CompositeEstimator
- func (e *CompositeEstimator) Compute() error
- func (e *CompositeEstimator) Configure(conf map[string]string)
- func (e *CompositeEstimator) Deserialize(b []byte)
- func (e *CompositeEstimator) Options() map[string]string
- func (e *CompositeEstimator) Serialize() []byte
- func (e *CompositeEstimator) Similarity(a, b *Dataset) float64
- type CorrelationEstimator
- func (e *CorrelationEstimator) Compute() error
- func (e *CorrelationEstimator) Configure(conf map[string]string)
- func (e *CorrelationEstimator) Deserialize(b []byte)
- func (e *CorrelationEstimator) Options() map[string]string
- func (e *CorrelationEstimator) Serialize() []byte
- func (e *CorrelationEstimator) Similarity(a, b *Dataset) float64
- type CorrelationEstimatorNormalizationType
- type CorrelationEstimatorType
- type DataPartitioner
- type DataPartitionerType
- type Dataset
- type DatasetCoordinates
- type DatasetEvaluator
- type DatasetEvaluatorType
- type DatasetPartitioner
- type DatasetScores
- type DatasetSimilarityEstimator
- type DatasetSimilarityEstimatorType
- type DatasetSimilarityMatrix
- func (s *DatasetSimilarityMatrix) Capacity() int
- func (s *DatasetSimilarityMatrix) Deserialize(buff []byte) error
- func (s *DatasetSimilarityMatrix) FullyCalculatedNodes() int
- func (s *DatasetSimilarityMatrix) Get(idxA, idxB int) float64
- func (s *DatasetSimilarityMatrix) IndexDisabled(flag bool)
- func (s *DatasetSimilarityMatrix) LeastSimilar() (int, float64)
- func (s *DatasetSimilarityMatrix) Serialize() []byte
- func (s *DatasetSimilarityMatrix) Set(idxA, idxB int, value float64)
- func (s DatasetSimilarityMatrix) String() string
- type DatasetSimilarityPopulationPolicy
- type DatasetSimilarityPopulationPolicyType
- type DatasetTuple
- type DatasetTuples
- type Dendrogram
- type DendrogramNode
- type FileBasedEvaluator
- type JaccardEstimator
- func (e *JaccardEstimator) Compute() error
- func (e *JaccardEstimator) Configure(conf map[string]string)
- func (e *JaccardEstimator) Deserialize(b []byte)
- func (e *JaccardEstimator) Options() map[string]string
- func (e *JaccardEstimator) Serialize() []byte
- func (e *JaccardEstimator) Similarity(a, b *Dataset) float64
- type KDTreePartitioner
- func (p *KDTreePartitioner) Configure(conf map[string]string)
- func (p *KDTreePartitioner) Construct(tuples []DatasetTuple) error
- func (p *KDTreePartitioner) Deserialize(b []byte)
- func (p *KDTreePartitioner) Options() map[string]string
- func (p *KDTreePartitioner) Partition(tuples []DatasetTuple) ([][]DatasetTuple, error)
- func (p *KDTreePartitioner) Serialize() []byte
- type KMeansPartitioner
- func (p *KMeansPartitioner) Configure(conf map[string]string)
- func (p *KMeansPartitioner) Construct(tuples []DatasetTuple) error
- func (p *KMeansPartitioner) Deserialize(b []byte)
- func (p *KMeansPartitioner) Options() map[string]string
- func (p *KMeansPartitioner) Partition(tuples []DatasetTuple) ([][]DatasetTuple, error)
- func (p *KMeansPartitioner) Serialize() []byte
- type KNNModeler
- type MDScaling
- type Modeler
- type ModelerType
- type OnlineDatasetEvaluator
- type OnlineIndexer
- type PartitionerType
- type ScriptBasedModeler
- type ScriptPairSimilarityEstimator
- func (e *ScriptPairSimilarityEstimator) Compute() error
- func (e *ScriptPairSimilarityEstimator) Configure(conf map[string]string)
- func (e *ScriptPairSimilarityEstimator) Deserialize(b []byte)
- func (e *ScriptPairSimilarityEstimator) Options() map[string]string
- func (e *ScriptPairSimilarityEstimator) Serialize() []byte
- func (e *ScriptPairSimilarityEstimator) Similarity(a, b *Dataset) float64
- type ScriptSimilarityEstimator
- func (e *ScriptSimilarityEstimator) Compute() error
- func (e *ScriptSimilarityEstimator) Configure(conf map[string]string)
- func (e *ScriptSimilarityEstimator) Deserialize(b []byte)
- func (e *ScriptSimilarityEstimator) Options() map[string]string
- func (e *ScriptSimilarityEstimator) Serialize() []byte
- func (e *ScriptSimilarityEstimator) Similarity(a, b *Dataset) float64
- type ScriptSimilarityEstimatorType
- type SizeEstimator
Constants ¶
const ( // CorrelationSimilarityTypePearson represents the Pearson cor. coeff CorrelationSimilarityTypePearson = iota // CorrelationSimilarityTypeSpearman represents the Spearman cor. coeff CorrelationSimilarityTypeSpearman = iota + 1 // CorrelationSimilarityTypeKendall represents the Kendall cor. coeff CorrelationSimilarityTypeKendall = iota + 2 )
const ( // CorrelationSimilarityNormalizationAbs returns |r|, r being the cor. metric CorrelationSimilarityNormalizationAbs = iota // CorrelationSimilarityNormalizationScale returns r/2 + 0.5, r being the cor. metric CorrelationSimilarityNormalizationScale = iota + 1 // CorrelationSimilarityNormalizationPos returns r, if r>=0 else 0 CorrelationSimilarityNormalizationPos = iota + 2 )
const KMeansMaxIteration = 10000
Variables ¶
var DatasetSimilarityEstimatorAvailableTypes = []DatasetSimilarityEstimatorType{ SimilarityTypeBhattacharyya, SimilarityTypeJaccard, SimilarityTypeCorrelation, SimilarityTypeComposite, SimilarityTypeScript, SimilarityTypeSize, SimilarityTypeScriptPair, }
DatasetSimilarityEstimatorAvailableTypes lists the available similarity types
Functions ¶
func DistanceToSimilarity ¶
DistanceToSimilarity returns the similarity based on the distance
func Kendall ¶
Kendall returns yet another rank correlation coefficient between two variables. The two arrays must be of the same size, else 0 is returned.
func MaxAbsoluteCountPercentageError ¶ added in v0.2.0
MaxAbsoluteCountPercentageError returns the percentage of the dataset that has AbsolutePercentageError gte to percentile.
func MaxAbsolutePercentageError ¶ added in v0.2.0
MaxAbsolutePercentageError returns the Max error of the actual vs the predicted values as a percentage.
func MeanAbsoluteError ¶
MeanAbsoluteError returns the MAE of the actual vs the predicted values
func MeanAbsolutePercentageError ¶
MeanAbsolutePercentageError returns the MAPE of the actual vs the predicted values
func MedianAbsolutePercentageError ¶
MedianAbsolutePercentageError returns the MdAPE of the actual vs the predicted values
func Pearson ¶
Pearson returns the pearson correlation coefficient between two variables. The two arrays must be of the same size, else 0 is returned.
func Percentile ¶
Percentile returns the i-th percentile of an array of values
func RootMeanSquaredError ¶
RootMeanSquaredError returns the RMSE of the actual vs the predicted values
func RootMeanSquaredLogError ¶
RootMeanSquaredLogError returns the RMSLE of the actual vs the predicted values
func SerializeCoordinates ¶
func SerializeCoordinates(coords []DatasetCoordinates) []byte
SerializeCoordinates returns a CSV serilization of a coordinates slice
func SimilarityToDistance ¶
SimilarityToDistance returns the distance based on the similarity
Types ¶
type AbstractDatasetSimilarityEstimator ¶
type AbstractDatasetSimilarityEstimator struct {
// contains filtered or unexported fields
}
AbstractDatasetSimilarityEstimator is the base struct for the similarity estimator objects
func (*AbstractDatasetSimilarityEstimator) Concurrency ¶
func (a *AbstractDatasetSimilarityEstimator) Concurrency() int
Concurrency returns the max number of threads to be used for the computation
func (*AbstractDatasetSimilarityEstimator) Datasets ¶
func (a *AbstractDatasetSimilarityEstimator) Datasets() []*Dataset
Datasets returns the datasets of the estimator
func (*AbstractDatasetSimilarityEstimator) Duration ¶
func (a *AbstractDatasetSimilarityEstimator) Duration() float64
Duration returns the duration of the compution
func (*AbstractDatasetSimilarityEstimator) PopulationPolicy ¶
func (a *AbstractDatasetSimilarityEstimator) PopulationPolicy() DatasetSimilarityPopulationPolicy
PopulationPolicy gets the population policy to be used
func (*AbstractDatasetSimilarityEstimator) SetPopulationPolicy ¶
func (a *AbstractDatasetSimilarityEstimator) SetPopulationPolicy(pol DatasetSimilarityPopulationPolicy)
SetPopulationPolicy sets the population policy to be used
func (*AbstractDatasetSimilarityEstimator) SimilarityMatrix ¶
func (a *AbstractDatasetSimilarityEstimator) SimilarityMatrix() *DatasetSimilarityMatrix
SimilarityMatrix returns the similarity matrix of the estimator
type AbstractModeler ¶
type AbstractModeler struct {
// contains filtered or unexported fields
}
AbstractModeler implements the common methods of the Modeler structs
func (*AbstractModeler) AppxValues ¶
func (a *AbstractModeler) AppxValues() []float64
AppxValues returns the values of all the datasets
func (*AbstractModeler) Datasets ¶
func (a *AbstractModeler) Datasets() []*Dataset
Datasets returns the datasets slice
func (*AbstractModeler) ErrorMetrics ¶
func (a *AbstractModeler) ErrorMetrics() map[string]float64
ErrorMetrics returns a list of error metrics for the specified model
func (*AbstractModeler) EvalTime ¶
func (a *AbstractModeler) EvalTime() float64
EvalTime returns the dataset evaluation time of the Modeler
func (*AbstractModeler) ExecTime ¶
func (a *AbstractModeler) ExecTime() float64
ExecTime returns the total exection time of the Modeler
func (*AbstractModeler) Samples ¶
func (a *AbstractModeler) Samples() map[int]float64
Samples return the indices of the chosen datasets
type BhattacharyyaEstimator ¶
type BhattacharyyaEstimator struct { AbstractDatasetSimilarityEstimator // contains filtered or unexported fields }
BhattacharyyaEstimator is the similarity estimator that quantifies the similarity of the distribution between the datasets.
func (*BhattacharyyaEstimator) Compute ¶
func (e *BhattacharyyaEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*BhattacharyyaEstimator) Configure ¶
func (e *BhattacharyyaEstimator) Configure(conf map[string]string)
Configure sets a the configuration parameters of the estimator
func (*BhattacharyyaEstimator) Deserialize ¶
func (e *BhattacharyyaEstimator) Deserialize(b []byte)
Deserialize constructs a similarity object based on the byte stream
func (*BhattacharyyaEstimator) Options ¶
func (e *BhattacharyyaEstimator) Options() map[string]string
Options returns a list of parameters that can be set by the user
func (*BhattacharyyaEstimator) Serialize ¶
func (e *BhattacharyyaEstimator) Serialize() []byte
Serialize returns a byte array containing a serialized form of the estimator
func (*BhattacharyyaEstimator) Similarity ¶
func (e *BhattacharyyaEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between two datasets
type Clustering ¶
type Clustering struct {
// contains filtered or unexported fields
}
Clustering struct is responsible to execute the necessary actions in order to cluster the datasets based on their availability
func NewClustering ¶
func NewClustering(similarities *DatasetSimilarityMatrix, datasets []*Dataset) *Clustering
NewClustering is the the constructor for creating a Clustering object, providing a DatasetSimilarities object
func (*Clustering) SetConcurrency ¶
func (c *Clustering) SetConcurrency(concurrency int)
SetConcurrency sets the number of threads to be used
type CompositeEstimator ¶
type CompositeEstimator struct { AbstractDatasetSimilarityEstimator // contains filtered or unexported fields }
CompositeEstimator returns a similarity function based on compositions of simpler similarity expressions. The user needs to provide a formula containing the expression of the similarity function, e.g.: 0.8 x BHATTACHARRYA + 0.2 CORRELATION Note that it is the user's responsibility to guarantee that the overall expression remains within the limits of the similarity expression [0,1].
func (*CompositeEstimator) Compute ¶
func (e *CompositeEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*CompositeEstimator) Configure ¶
func (e *CompositeEstimator) Configure(conf map[string]string)
Configure provides the configuration parameters needed by the Estimator
func (*CompositeEstimator) Deserialize ¶
func (e *CompositeEstimator) Deserialize(b []byte)
Deserialize constructs an Estimator object based on the byte array provided.
func (*CompositeEstimator) Options ¶
func (e *CompositeEstimator) Options() map[string]string
Options returns the applicable parameters needed by the Estimator.
func (*CompositeEstimator) Serialize ¶
func (e *CompositeEstimator) Serialize() []byte
Serialize returns an array of bytes representing the Estimator.
func (*CompositeEstimator) Similarity ¶
func (e *CompositeEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between two datasets
type CorrelationEstimator ¶
type CorrelationEstimator struct { AbstractDatasetSimilarityEstimator // contains filtered or unexported fields }
CorrelationEstimator estimates the similarity between two datasets based on a correlation metric. This metric can only be used for datasets that consist of a single column and consist of the same number of tuples.
func (*CorrelationEstimator) Compute ¶
func (e *CorrelationEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*CorrelationEstimator) Configure ¶
func (e *CorrelationEstimator) Configure(conf map[string]string)
Configure provides a set of configuration options to the CorrelationEstimator struct.
func (*CorrelationEstimator) Deserialize ¶
func (e *CorrelationEstimator) Deserialize(b []byte)
Deserialize returns a byte array in order to deserialize the CorrelationEstimator
func (*CorrelationEstimator) Options ¶
func (e *CorrelationEstimator) Options() map[string]string
Options returns a list of options used internally by the CorrelationEstimator struct for its execution.
func (*CorrelationEstimator) Serialize ¶
func (e *CorrelationEstimator) Serialize() []byte
Serialize returns a byte array in order to serialize the CorrelationEstimator struct.
func (*CorrelationEstimator) Similarity ¶
func (e *CorrelationEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between two datasets. Since all the correlation coefficients are between [-1.0,1.0], the output of this function is scaled to [0.0,1.0] by returning (x/2.0 + 0.5), where x is one of Pearson, Spearman and Kendall coefficients.
type CorrelationEstimatorNormalizationType ¶
type CorrelationEstimatorNormalizationType uint8
CorrelationEstimatorNormalizationType represents the type of the normalization action. Since all correlation metrics can take any valuein [-1,1], this type reflects the policy with which [-1,1] will be mapped to a similarity metric in [0,1]
func (CorrelationEstimatorNormalizationType) String ¶
func (s CorrelationEstimatorNormalizationType) String() string
String returns a nstring representation of the CorrelationEstimatorNormalizationType
type CorrelationEstimatorType ¶
type CorrelationEstimatorType uint8
CorrelationEstimatorType represents the type of correlation to be used by the CorrelationEstimator
func (CorrelationEstimatorType) String ¶
func (s CorrelationEstimatorType) String() string
String returns a string representation of the CorrelationEstimatorType
type DataPartitioner ¶ added in v0.2.0
type DataPartitioner interface { // Construct estimates the partitioning of the provided tuples (offline) Construct([]DatasetTuple) error // Partition executes partitioning to new datasets Partition([]DatasetTuple) ([][]DatasetTuple, error) // Configure provides the necessary configuration option to DataPartitioner Configure(map[string]string) // Options returns a list of options the DataPartitioner accepts with a // description Options() map[string]string // Serialize converts a DataPartitioner object to a stream of bytes Serialize() []byte // Deserialize converts a stream of bytes to a DataPartitioner object Deserialize([]byte) }
DataPartitioner is responsible to partition a dataset and, upon estimating, the basic partitioning scheme, dynamically partition new datasets.
func DeserializePartitioner ¶ added in v0.2.0
func DeserializePartitioner(b []byte) DataPartitioner
DeserializePartitioner returns instantiates a new partitioner from a serialized version
func NewDataPartitioner ¶ added in v0.2.0
func NewDataPartitioner(dpType DataPartitionerType, conf map[string]string) DataPartitioner
NewDataPartitioner is the factory method for the creation of a new DataPartitioner object
type DataPartitionerType ¶ added in v0.2.0
type DataPartitionerType uint8
DataPartitionerType represents the type of the DataPartitioner struct
const ( // DataPartitionerKDTree utilizes a kd-tree for partitioning DataPartitionerKDTree DataPartitionerType = iota + 1 // DataPartitionerKMeans utilizes kmeans for partitioning DataPartitionerKMeans DataPartitionerType = iota + 2 )
type Dataset ¶
type Dataset struct {
// contains filtered or unexported fields
}
Dataset struct represents a dataset object.
func DiscoverDatasets ¶
DiscoverDatasets is used to return a slice of Datasets when a new splits directory is provided
func NewDataset ¶
NewDataset is the constructor for the Dataset struct. A random ID is assigned to a new dataset
func (Dataset) Data ¶
func (d Dataset) Data() []DatasetTuple
Data getter for dataset - only works if ReadFromFile was successful
func (*Dataset) ReadFromFile ¶
ReadFromFile is used to parse the Dataset into memory. If the data are previously read, the method is not re-executed.
type DatasetCoordinates ¶
type DatasetCoordinates []float64
DatasetCoordinates is a struct for representing the dataset coordinates
func DeserializeCoordinates ¶
func DeserializeCoordinates(buffer []byte) []DatasetCoordinates
DeserializeCoordinates instantiated a new DatasetCoordinates slice, based on a CSV serialization form
type DatasetEvaluator ¶
DatasetEvaluator reflects the interface of an evaluator object.
func NewDatasetEvaluator ¶
func NewDatasetEvaluator(evalType DatasetEvaluatorType, params map[string]string) (DatasetEvaluator, error)
NewDatasetEvaluator returns a new DatasetEvaluator object
type DatasetEvaluatorType ¶
type DatasetEvaluatorType uint8
DatasetEvaluatorType represents the type of the dataset evaluator
const ( // OnlineEval dynamically parses the dataset values OnlineEval DatasetEvaluatorType = iota + 1 // FileBasedEval returns the pre-computed values of an operator FileBasedEval )
type DatasetPartitioner ¶
type DatasetPartitioner struct {
// contains filtered or unexported fields
}
DatasetPartitioner accepts a single dataset and it is responsible to partition it.
func NewDatasetPartitioner ¶
func NewDatasetPartitioner(input, output string, splits int, partitionType PartitionerType) *DatasetPartitioner
NewDatasetPartitioner initializes a new DatasetPartitioner object
func (*DatasetPartitioner) Delete ¶
func (a *DatasetPartitioner) Delete()
Delete function deletes the output directory, containing the Dataset splits
func (*DatasetPartitioner) Partition ¶
func (a *DatasetPartitioner) Partition()
Partition function is used to execute the partitioning
type DatasetScores ¶
DatasetScores is used to store the scores of a set of datasets
func AppxScores ¶ added in v0.2.0
func AppxScores(modeler Modeler) *DatasetScores
Return `modeler.AppxValues()` as a `DatasetScores` struct
func NewDatasetScores ¶
func NewDatasetScores() *DatasetScores
NewDatasetScores initializes a new DatasetScores struct
func (*DatasetScores) Deserialize ¶
func (s *DatasetScores) Deserialize(buf []byte) error
Deserialize constructs a DatasetScores strucy based on a byte array
func (*DatasetScores) Serialize ¶
func (s *DatasetScores) Serialize() ([]byte, error)
Serialize returns a stream containing a DatasetScores object
type DatasetSimilarityEstimator ¶
type DatasetSimilarityEstimator interface { // computes the similarity matrix Compute() error // returns the datasets slice Datasets() []*Dataset // returns the similarity for 2 datasets Similarity(a, b *Dataset) float64 // returns the similarity struct SimilarityMatrix() *DatasetSimilarityMatrix // provides configuration options Configure(map[string]string) // list of options for the estimator Options() map[string]string // sets the population policy for the estimator SetPopulationPolicy(DatasetSimilarityPopulationPolicy) // returns the population policy PopulationPolicy() DatasetSimilarityPopulationPolicy // returns a serialized esimator object Serialize() []byte // instantiates an estimator from a serialized object Deserialize([]byte) // returns the seconds needed to execute the computation Duration() float64 // returns the max number of threads to be used Concurrency() int // contains filtered or unexported methods }
DatasetSimilarityEstimator is the interface that each Similarity estimator obeys.
func DeserializeSimilarityEstimator ¶
func DeserializeSimilarityEstimator(b []byte) DatasetSimilarityEstimator
DeserializeSimilarityEstimator method is used to deserialize the Estimator according to its type
func NewDatasetSimilarityEstimator ¶
func NewDatasetSimilarityEstimator( estType DatasetSimilarityEstimatorType, datasets []*Dataset) DatasetSimilarityEstimator
NewDatasetSimilarityEstimator is a factory method for the DatasetSimilarityEstimator structs, used to initialize the estimator and return it to the user.
type DatasetSimilarityEstimatorType ¶
type DatasetSimilarityEstimatorType uint
DatasetSimilarityEstimatorType represents the type of the Similarity Estimator
const ( // SimilarityTypeJaccard estimates the Jaccard coefficient SimilarityTypeJaccard DatasetSimilarityEstimatorType = iota // SimilarityTypeBhattacharyya estimates the Bhattacharyya coefficient SimilarityTypeBhattacharyya DatasetSimilarityEstimatorType = iota + 1 // SimilarityTypeScript uses a script to transform the data SimilarityTypeScript DatasetSimilarityEstimatorType = iota + 2 // SimilarityTypeComposite utilizes multiple estimators concurrently SimilarityTypeComposite DatasetSimilarityEstimatorType = iota + 4 // SimilarityTypeCorrelation estimates correlation metrics SimilarityTypeCorrelation DatasetSimilarityEstimatorType = iota + 5 // SimilarityTypeSize estimates size metric SimilarityTypeSize DatasetSimilarityEstimatorType = iota + 6 // SimilarityTypeScriptPair estimates the similarity based on a script for each pair SimilarityTypeScriptPair DatasetSimilarityEstimatorType = iota + 7 )
func NewDatasetSimilarityEstimatorType ¶
func NewDatasetSimilarityEstimatorType(estimatorType string) *DatasetSimilarityEstimatorType
NewDatasetSimilarityEstimatorType transforms the similarity type from a string to a DatasetSimilarityEstimatorType object
func (DatasetSimilarityEstimatorType) String ¶
func (t DatasetSimilarityEstimatorType) String() string
type DatasetSimilarityMatrix ¶
type DatasetSimilarityMatrix struct {
// contains filtered or unexported fields
}
DatasetSimilarityMatrix represent the struct that holds the results of a dataset similarity estimation. It also provides the necessary
func NewDatasetSimilarities ¶
func NewDatasetSimilarities(capacity int) *DatasetSimilarityMatrix
NewDatasetSimilarities is the constructor for the DatasetSimilarities struct, expecting the number of datasets that will be held by it. If capacity=0, this implies that the Similarity Matrix will be deserialzed.
func (*DatasetSimilarityMatrix) Capacity ¶
func (s *DatasetSimilarityMatrix) Capacity() int
Capacity returns the capacity of the Similarity Matrix
func (*DatasetSimilarityMatrix) Deserialize ¶
func (s *DatasetSimilarityMatrix) Deserialize(buff []byte) error
Deserialize instantiates an empty DatasetSimilarities object. In case of parse failure, an error is thrown
func (*DatasetSimilarityMatrix) FullyCalculatedNodes ¶
func (s *DatasetSimilarityMatrix) FullyCalculatedNodes() int
FullyCalculatedNodes returns the number of nodes the similarity of which has been calculated for all the nodes. This number can work as a measure of how close to the full similarity matrix the current object is.
func (*DatasetSimilarityMatrix) Get ¶
func (s *DatasetSimilarityMatrix) Get(idxA, idxB int) float64
Get returns the similarity between two dataset paths
func (*DatasetSimilarityMatrix) IndexDisabled ¶
func (s *DatasetSimilarityMatrix) IndexDisabled(flag bool)
IndexDisabled sets whether the closest dataset index should be disabled or not. The index is useless if the FULL Estimator strategy is being followed.
func (*DatasetSimilarityMatrix) LeastSimilar ¶
func (s *DatasetSimilarityMatrix) LeastSimilar() (int, float64)
LeastSimilar method returns the dataset that presents the lowest similarity among the examined datasets
func (*DatasetSimilarityMatrix) Serialize ¶
func (s *DatasetSimilarityMatrix) Serialize() []byte
Serialize method returns a byte slice that represents the similarity matrix
func (*DatasetSimilarityMatrix) Set ¶
func (s *DatasetSimilarityMatrix) Set(idxA, idxB int, value float64)
Set is a setter function for the similarity between two datasets
func (DatasetSimilarityMatrix) String ¶
func (s DatasetSimilarityMatrix) String() string
type DatasetSimilarityPopulationPolicy ¶
type DatasetSimilarityPopulationPolicy struct { PolicyType DatasetSimilarityPopulationPolicyType Parameters map[string]float64 }
DatasetSimilarityPopulationPolicy is the struct that hold the Population Policy of the Similarity Matrix along with the configuration parameters of it.
func (*DatasetSimilarityPopulationPolicy) Deserialize ¶
func (s *DatasetSimilarityPopulationPolicy) Deserialize(b []byte)
Deserialize is responsible to instantiate a Population Policy object based on its byte representation.
func (*DatasetSimilarityPopulationPolicy) Serialize ¶
func (s *DatasetSimilarityPopulationPolicy) Serialize() []byte
Serialize method returns a slice of bytes containing the serialized form of the Population Policy
type DatasetSimilarityPopulationPolicyType ¶
type DatasetSimilarityPopulationPolicyType uint
DatasetSimilarityPopulationPolicyType is the type that represents the Similarity Matrix population policy
const ( // PopulationPolicyFull policy needs no params PopulationPolicyFull DatasetSimilarityPopulationPolicyType = iota // PopulationPolicyAprx must have defined one of two params: // count (how many points) or threshold (percentage in similarity gain) PopulationPolicyAprx DatasetSimilarityPopulationPolicyType = iota + 1 )
type DatasetTuple ¶
type DatasetTuple struct {
Data []float64
}
DatasetTuple represents a data tuple from the dataset
func DatasetsIntersection ¶
func DatasetsIntersection(a, b *Dataset) []DatasetTuple
DatasetsIntersection function is used to calculate the intersection of two datasets and returns the tuples that belong to it.
func DatasetsUnion ¶
func DatasetsUnion(a, b *Dataset) []DatasetTuple
DatasetsUnion function is used to calculate the union of two datasets and returns the tuples that belong to it.
func (*DatasetTuple) Deserialize ¶
func (t *DatasetTuple) Deserialize(data string)
Deserialize is used to construct a tuple from a string representation
func (DatasetTuple) Equals ¶
func (t DatasetTuple) Equals(o DatasetTuple) bool
Equals function returns true if t is equal to o
func (*DatasetTuple) Serialize ¶
func (t *DatasetTuple) Serialize() string
Serialize transforms the tuple to a string representation
func (DatasetTuple) String ¶
func (t DatasetTuple) String() string
type DatasetTuples ¶
type DatasetTuples []DatasetTuple
DatasetTuples represents a slice of DatasetTuple objects
func (DatasetTuples) Len ¶
func (slice DatasetTuples) Len() int
func (DatasetTuples) Less ¶
func (slice DatasetTuples) Less(i, j int) bool
func (DatasetTuples) Swap ¶
func (slice DatasetTuples) Swap(i, j int)
type Dendrogram ¶
type Dendrogram struct {
// contains filtered or unexported fields
}
Dendrogram represents the results of the ClusterApp objects
func NewDendrogram ¶
func NewDendrogram(datasets []*Dataset) *Dendrogram
NewDendrogram is the constructor for a Dendrogram struct
func (*Dendrogram) GetClusters ¶
func (d *Dendrogram) GetClusters(level int) [][]*Dataset
GetClusters function returns a slice containing the clusters of datasets for the specified dendrogram level
func (*Dendrogram) Heights ¶
func (d *Dendrogram) Heights() (int, int)
Heights function returns the tree heights (max, min)
func (*Dendrogram) String ¶
func (d *Dendrogram) String() string
type DendrogramNode ¶
type DendrogramNode struct {
// contains filtered or unexported fields
}
DendrogramNode is the node of the Dendrogram
func (DendrogramNode) String ¶
func (n DendrogramNode) String() string
type FileBasedEvaluator ¶
type FileBasedEvaluator struct {
// contains filtered or unexported fields
}
FileBasedEvaluator returns the scores of an operator based on a scores file.
type JaccardEstimator ¶
type JaccardEstimator struct {
AbstractDatasetSimilarityEstimator
}
JaccardEstimator estimates the Jaccard coefficients between the different datasets. The Jaccard coefficient between two datasets is defined as the cardinality of the intersection divided by the cardinality of the union of the two datasets.
func (*JaccardEstimator) Compute ¶
func (e *JaccardEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*JaccardEstimator) Configure ¶
func (e *JaccardEstimator) Configure(conf map[string]string)
Configure sets the necessary parameters before the similarity execution
func (*JaccardEstimator) Deserialize ¶
func (e *JaccardEstimator) Deserialize(b []byte)
Deserialize instantiates the estimator based on a byte array
func (*JaccardEstimator) Options ¶
func (e *JaccardEstimator) Options() map[string]string
Options returns a list of applicable parameters
func (*JaccardEstimator) Serialize ¶
func (e *JaccardEstimator) Serialize() []byte
Serialize returns a byte array containing the estimator.
func (*JaccardEstimator) Similarity ¶
func (e *JaccardEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between two datasets
type KDTreePartitioner ¶ added in v0.2.0
type KDTreePartitioner struct {
// contains filtered or unexported fields
}
KDTreePartitioner generates a kd-tree on the selected columns and applies the partitioning to new datasets
func (*KDTreePartitioner) Configure ¶ added in v0.2.0
func (p *KDTreePartitioner) Configure(conf map[string]string)
Configure provides the necessary configuration params
func (*KDTreePartitioner) Construct ¶ added in v0.2.0
func (p *KDTreePartitioner) Construct(tuples []DatasetTuple) error
func (*KDTreePartitioner) Deserialize ¶ added in v0.2.0
func (p *KDTreePartitioner) Deserialize(b []byte)
Deserialize parses a byte array and instantiates a new kdtree part. object
func (*KDTreePartitioner) Options ¶ added in v0.2.0
func (p *KDTreePartitioner) Options() map[string]string
Options returns a list of options
func (*KDTreePartitioner) Partition ¶ added in v0.2.0
func (p *KDTreePartitioner) Partition(tuples []DatasetTuple) ([][]DatasetTuple, error)
Partition applies the previously constructed kd-tree in order to partition the given dataset
func (*KDTreePartitioner) Serialize ¶ added in v0.2.0
func (p *KDTreePartitioner) Serialize() []byte
Serialize returns a byte array with the serialized object
type KMeansPartitioner ¶ added in v0.2.0
type KMeansPartitioner struct {
// contains filtered or unexported fields
}
KMeansPartitioner applies the k-means clustering algorithm to a given dataset and using the calculated centroids, it partitions newly provided datasets according to their distance from them
func (*KMeansPartitioner) Configure ¶ added in v0.2.0
func (p *KMeansPartitioner) Configure(conf map[string]string)
Configure provides the necessary configuration options to the KMeansPartitioner struct
func (*KMeansPartitioner) Construct ¶ added in v0.2.0
func (p *KMeansPartitioner) Construct(tuples []DatasetTuple) error
Construct runs the k-means algorithm and estimates the centroids of the cluster (in order to be later used for partitioning.
func (*KMeansPartitioner) Deserialize ¶ added in v0.2.0
func (p *KMeansPartitioner) Deserialize(b []byte)
func (*KMeansPartitioner) Options ¶ added in v0.2.0
func (p *KMeansPartitioner) Options() map[string]string
Options returns the configuration options of the KMeansPartitioner
func (*KMeansPartitioner) Partition ¶ added in v0.2.0
func (p *KMeansPartitioner) Partition(tuples []DatasetTuple) ( [][]DatasetTuple, error)
Partition receives a set of tuples as input and returns a number of clusters
func (*KMeansPartitioner) Serialize ¶ added in v0.2.0
func (p *KMeansPartitioner) Serialize() []byte
type KNNModeler ¶
type KNNModeler struct { AbstractModeler // contains filtered or unexported fields }
KNNModeler utilizes a similarity matrix in order to approximate the training set
func (*KNNModeler) Configure ¶
func (m *KNNModeler) Configure(conf map[string]string) error
Configure is the method used to provide the essential paremeters for the conf of the modeler
func (*KNNModeler) Run ¶
func (k *KNNModeler) Run() error
Run executes the training part and obtains the model
type MDScaling ¶
type MDScaling struct {
// contains filtered or unexported fields
}
MDScaling is responsible for the execution of a MultiDimensional Scaling algorithm in order to provide coefficients for each dataset, based on a a similarity matrix.
func NewMDScaling ¶
func NewMDScaling(matrix *DatasetSimilarityMatrix, k int, script string) *MDScaling
NewMDScaling is the default MDScaling constructor; it initializes a new MDScaling object, based on the provided DatasetSimilarities struct and the k factor that determines the number of target dimensions. If k<1, then auto estimation takes place.
func (*MDScaling) Coordinates ¶
func (md *MDScaling) Coordinates() []DatasetCoordinates
Coordinates getter returns the dataset coordinates in a nxk slice (n being the number of datasets).
type Modeler ¶
type Modeler interface { // Configure is responsible to provide the necessary configuration // options to the Modeler struct. Call it before Run. Configure(map[string]string) error // Run initiates the modeling process. Run() error // Datasets returns the datasets slice Datasets() []*Dataset // Samples returns the indices of the chosen datasets. Samples() map[int]float64 // AppxValues returns a slice of the approximated values AppxValues() []float64 // ErrorMetrics returns a list of error metrics for the specified modeler ErrorMetrics() map[string]float64 // ExecTime returns the total execution time of the Modeler ExecTime() float64 // EvalTime returns the evaluation time of the Modeler EvalTime() float64 }
Modeler is the interface for the objects that model the dataset space.
func NewModeler ¶
func NewModeler( modelerType ModelerType, datasets []*Dataset, sr float64, evaluator DatasetEvaluator) Modeler
NewModeler is the factory method for the modeler object
type ModelerType ¶
type ModelerType uint8
const ( ScriptBasedModelerType ModelerType = iota KNNModelerType ModelerType = iota + 1 )
func NewModelerType ¶
func NewModelerType(t string) ModelerType
type OnlineDatasetEvaluator ¶
type OnlineDatasetEvaluator struct {
// contains filtered or unexported fields
}
OnlineDatasetEvaluator is responsible to execute the training script and fetch the model accuracy
type OnlineIndexer ¶
type OnlineIndexer struct {
// contains filtered or unexported fields
}
OnlineIndexer is used to execute online indexing. The user can supply a map containing distances from original datasets and the indexer returns the coordinates of the specified dataset.
func NewOnlineIndexer ¶
func NewOnlineIndexer(estimator DatasetSimilarityEstimator, coordinates []DatasetCoordinates, script string) *OnlineIndexer
NewOnlineIndexer is a constructor function used to initialize an OnlineIndexer object.
func (*OnlineIndexer) Calculate ¶
func (o *OnlineIndexer) Calculate(dataset *Dataset) (DatasetCoordinates, float64, error)
Calculate method is responsible to calculate the coordinates of the specified dataset. In case that such a dataset cannot be represented by the specified coordinates system, an error is returned.
func (*OnlineIndexer) DatasetsToCompare ¶
func (o *OnlineIndexer) DatasetsToCompare(datasets int)
DatasetsToCompare is a setter method to determine the number of datasets that will be utilized for the assigment of coordinates
type PartitionerType ¶
type PartitionerType uint8
PartitionerType represents the type of the partitioning
const ( // PartitionerUniform represents a uniform partitioner PartitionerUniform PartitionerType = iota + 1 )
type ScriptBasedModeler ¶
type ScriptBasedModeler struct { AbstractModeler // contains filtered or unexported fields }
ScriptBasedModeler utilizes a script to train an ML model and obtain is values
func (*ScriptBasedModeler) Configure ¶
func (m *ScriptBasedModeler) Configure(conf map[string]string) error
Configure expects the necessary conf options for the specified struct. Specifically, the following parameters are necessary: - script: the path of the script to use
func (*ScriptBasedModeler) Run ¶
func (m *ScriptBasedModeler) Run() error
Run executes the modeling process and populates the samples, realValues and appxValues slices.
type ScriptPairSimilarityEstimator ¶
type ScriptPairSimilarityEstimator struct { AbstractDatasetSimilarityEstimator // contains filtered or unexported fields }
ScriptPairSimilarityEstimator executes a script of the extraction of the similarity between each pair of datasets
func (*ScriptPairSimilarityEstimator) Compute ¶
func (e *ScriptPairSimilarityEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*ScriptPairSimilarityEstimator) Configure ¶
func (e *ScriptPairSimilarityEstimator) Configure(conf map[string]string)
Configure sets a number of configuration parameters to the struct. Use this method before the execution of the computation
func (*ScriptPairSimilarityEstimator) Deserialize ¶
func (e *ScriptPairSimilarityEstimator) Deserialize(b []byte)
Deserialize parses a byte array and forms a ScriptSimilarityEstimator object
func (*ScriptPairSimilarityEstimator) Options ¶
func (e *ScriptPairSimilarityEstimator) Options() map[string]string
Options returns a list of options that the user can set
func (*ScriptPairSimilarityEstimator) Serialize ¶
func (e *ScriptPairSimilarityEstimator) Serialize() []byte
Serialize returns a byte array that represents the struct is a serialized version
func (*ScriptPairSimilarityEstimator) Similarity ¶
func (e *ScriptPairSimilarityEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between the two datasets
type ScriptSimilarityEstimator ¶
type ScriptSimilarityEstimator struct { AbstractDatasetSimilarityEstimator // contains filtered or unexported fields }
ScriptSimilarityEstimator utilizes a script to analyze the data based on some external algorithm and utilizes various norms to measure the differences between the analysis outputs.
func (*ScriptSimilarityEstimator) Compute ¶
func (e *ScriptSimilarityEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*ScriptSimilarityEstimator) Configure ¶
func (e *ScriptSimilarityEstimator) Configure(conf map[string]string)
Configure sets a number of configuration parameters to the struct. Use this method before the execution of the computation
func (*ScriptSimilarityEstimator) Deserialize ¶
func (e *ScriptSimilarityEstimator) Deserialize(b []byte)
Deserialize parses a byte array and forms a ScriptSimilarityEstimator object
func (*ScriptSimilarityEstimator) Options ¶
func (e *ScriptSimilarityEstimator) Options() map[string]string
Options returns a list of options that the user can set
func (*ScriptSimilarityEstimator) Serialize ¶
func (e *ScriptSimilarityEstimator) Serialize() []byte
Serialize returns a byte array that represents the struct is a serialized version
func (*ScriptSimilarityEstimator) Similarity ¶
func (e *ScriptSimilarityEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between the two datasets
type ScriptSimilarityEstimatorType ¶
type ScriptSimilarityEstimatorType uint8
ScriptSimilarityEstimatorType reflects the type of the ScriptSimilarityEstimator
type SizeEstimator ¶
type SizeEstimator struct {
AbstractDatasetSimilarityEstimator
}
SizeEstimator estimates the Jaccard coefficients between the different datasets. The Jaccard coefficient between two datasets is defined as the cardinality of the intersection divided by the cardinality of the union of the two datasets.
func (*SizeEstimator) Compute ¶
func (e *SizeEstimator) Compute() error
Compute method constructs the Similarity Matrix
func (*SizeEstimator) Configure ¶
func (e *SizeEstimator) Configure(conf map[string]string)
Configure sets the necessary parameters before the similarity execution
func (*SizeEstimator) Deserialize ¶
func (e *SizeEstimator) Deserialize(b []byte)
Deserialize instantiates the estimator based on a byte array
func (*SizeEstimator) Options ¶
func (e *SizeEstimator) Options() map[string]string
Options returns a list of applicable parameters
func (*SizeEstimator) Serialize ¶
func (e *SizeEstimator) Serialize() []byte
Serialize returns a byte array containing the estimator.
func (*SizeEstimator) Similarity ¶
func (e *SizeEstimator) Similarity(a, b *Dataset) float64
Similarity returns the similarity between two datasets
Source Files ¶
- clustering.go
- dataset.go
- dataseteval.go
- datasetoperations.go
- doc.go
- mdscaling.go
- modeling.go
- onlineindexer.go
- partitioner.go
- similarity.go
- similaritybhattacharyya.go
- similaritycomposite.go
- similaritycorrelation.go
- similarityjaccard.go
- similarityscript.go
- similarityscriptpair.go
- similaritysize.go
- utils.go