preprocessing

package
v0.0.0-...-0705f78 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 30, 2018 License: MIT Imports: 7 Imported by: 0

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func DenseMean

func DenseMean(Xmean *mat.Dense, X mat.Matrix) *mat.Dense

DenseMean puts in Xmean[1,nFeatures] the mean of X rows

func DenseNormalize

func DenseNormalize(X *mat.Dense, FitIntercept, Normalize bool) (XOffset, XScale *mat.Dense)

DenseNormalize normalize matrix rows by removing mean and dividing with standard deviation

func IncrementalMeanAndVar

func IncrementalMeanAndVar(X, lastMean, lastVariance *mat.Dense,
	lastSampleCount int) (updatedMean, updatedVariance *mat.Dense, updatedSampleCount int)

IncrementalMeanAndVar Calculate mean update and a Youngs and Cramer variance update. lastMean and lastVariance are statistics computed at the last step by the function. Both must be initialized to 0.0. In case no scaling is required lastVariance can be None. The mean is always required and returned because necessary for the calculation of the variance. lastNSamplesSeen is the number of samples encountered until now. From the paper "Algorithms for computing the sample variance: analysis and recommendations", by Chan, Golub, and LeVeque. Parameters ---------- X : array-like, shape (nSamples, nFeatures)

Data to use for variance update

lastMean : array-like, shape: (nFeatures,) lastVariance : array-like, shape: (nFeatures,) lastSampleCount : int Returns ------- updatedMean : array, shape (nFeatures,) updatedVariance : array, shape (nFeatures,)

If None, only mean is computed

updatedSampleCount : int References ---------- T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample

variance: recommendations, The American Statistician, Vol. 37, No. 3,
pp. 242-247

Also, see the sparse implementation of this in `utils.sparsefuncs.incrMeanVarianceAxis` and `utils.sparsefuncsFast.incrMeanVarianceAxis0` """

func InsertOnes

func InsertOnes(X *mat.Dense)

InsertOnes insert a column of ones to fit intercept

Example
X := mat.NewDense(2, 5, []float64{2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
InsertOnes(X)
fmt.Printf("X %v\n", X.RawRowView(0))
fmt.Printf("X %v\n", X.RawRowView(1))
Output:

X [1 2 3 4 5 6]
X [1 7 8 9 10 11]

Types

type InverseTransformer

type InverseTransformer interface {
	Transformer
	InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)
}

InverseTransformer is a transformer able to inverse his tranformation

type MinMaxScaler

type MinMaxScaler struct {
	FeatureRange                            []float
	Scale, Min, DataMin, DataMax, DataRange *mat.Dense
	NSamplesSeen                            int
}

MinMaxScaler rescale data between FeatureRange

func NewMinMaxScaler

func NewMinMaxScaler(featureRange []float) *MinMaxScaler

NewMinMaxScaler creates an *MinMaxScaler with FeatureRange 0..1

func (*MinMaxScaler) Fit

func (scaler *MinMaxScaler) Fit(X, Y *mat.Dense) Transformer

Fit computes Sale and Min

func (*MinMaxScaler) FitTransform

func (scaler *MinMaxScaler) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for MinMaxScaler

func (*MinMaxScaler) InverseTransform

func (scaler *MinMaxScaler) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform rescale data into original bounds

func (*MinMaxScaler) PartialFit

func (scaler *MinMaxScaler) PartialFit(X, Y *mat.Dense) Transformer

PartialFit updates Scale and Min with partial data

func (*MinMaxScaler) Reset

func (scaler *MinMaxScaler) Reset() *MinMaxScaler

Reset resets scaler to its initial state

func (*MinMaxScaler) Transform

func (scaler *MinMaxScaler) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform applies scaling to X

type OneHotEncoder

type OneHotEncoder struct{ NumClasses, Min []int }

OneHotEncoder ...

func NewOneHotEncoder

func NewOneHotEncoder() *OneHotEncoder

NewOneHotEncoder creates a *OneHotEncoder

func (*OneHotEncoder) Fit

func (m *OneHotEncoder) Fit(X, Y *mat.Dense) Transformer

Fit ...

func (*OneHotEncoder) FitTransform

func (m *OneHotEncoder) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for OneHotEncoder

func (*OneHotEncoder) InverseTransform

func (m *OneHotEncoder) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform compute Yout classes from one hot encoded format

func (*OneHotEncoder) Transform

func (m *OneHotEncoder) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform transform Y labels to one hot encoded format

type PCA

type PCA struct {
	mat.SVD
	MinVarianceRatio                       float64
	NComponents                            int
	SingularValues, ExplainedVarianceRatio []float64
}

PCA is a thin single value decomposition transformer

Example
X := mat.NewDense(6, 2, []float64{-1., -1., -2., -1., -3., -2., 1., 1., 2., 1., 3., 2.})
pca := NewPCA()
pca.Fit(X, nil)
Xp, _ := pca.Transform(X, nil)
fmt.Printf("explained  : %.3f\n", pca.ExplainedVarianceRatio)
fmt.Printf("Svalues    : %.3f\n", pca.SingularValues)
fmt.Printf("transformed: %.3f\n", Xp.RawMatrix().Data)
X2, _ := pca.InverseTransform(Xp, nil)
fmt.Printf("inversed   : %.3f\n", X2.RawMatrix().Data)
//expected:=[-1.383405778728807 0.293578697080941
// -2.221898016633681 -0.2513348437429921
// -3.605303795362488 0.04224385333794878
// 1.383405778728807 -0.293578697080941
// 2.221898016633681 0.2513348437429921
// 3.605303795362488 -0.04224385333794878]
Output:

explained  : [0.992 0.008]
Svalues    : [6.301 0.550]
transformed: [-1.383 0.294 -2.222 -0.251 -3.605 0.042 1.383 -0.294 2.222 0.251 3.605 -0.042]
inversed   : [-1.000 -1.000 -2.000 -1.000 -3.000 -2.000 1.000 1.000 2.000 1.000 3.000 2.000]

func NewPCA

func NewPCA() *PCA

NewPCA returns a *PCA

func (*PCA) Fit

func (m *PCA) Fit(X, Y *mat.Dense) Transformer

Fit computes the svd of X

func (*PCA) FitTransform

func (m *PCA) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for PCA

func (*PCA) InverseTransform

func (m *PCA) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform put X into original space

func (*PCA) Transform

func (m *PCA) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform Transforms X

type PolynomialFeatures

type PolynomialFeatures struct {
	Degree                       int
	InteractionOnly, IncludeBias bool
	Powers                       [][]int
}

PolynomialFeatures struct

func NewPolynomialFeatures

func NewPolynomialFeatures(degree int) *PolynomialFeatures

NewPolynomialFeatures creates a *PolynomialFeatures

func (*PolynomialFeatures) Fit

func (scaler *PolynomialFeatures) Fit(X, Y *mat.Dense) Transformer

Fit precompute Powers Powers[i, j] is the exponent of the jth input in the ith output.

func (*PolynomialFeatures) FitTransform

func (scaler *PolynomialFeatures) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for PolynomialFeatures

func (*PolynomialFeatures) InverseTransform

func (scaler *PolynomialFeatures) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform inverse tranformation for PolynomialFeatures.

func (*PolynomialFeatures) Transform

func (scaler *PolynomialFeatures) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform returns data with polynomial features added

type QuantilePair

type QuantilePair struct {
	Left  float64
	Right float64
}

type RobustScaler

type RobustScaler struct {
	Center          bool
	Scale           bool
	Quantiles       *QuantilePair
	Median          *mat.Dense
	Tmp             *mat.Dense
	QuantileDivider *mat.Dense
}

RobustScaler scales data by removing centering around the Median and removing outliers by Quantile. See python sklearn's RobustScaler http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html.

func NewDefaultRobustScaler

func NewDefaultRobustScaler() *RobustScaler

NewDefaultRobustScaler supplies typical arguments (via python sklearn)

func NewRobustScaler

func NewRobustScaler(center bool, scale bool, quantiles *QuantilePair) *RobustScaler

NewRobustScaler creates a *RobustScaler

func (*RobustScaler) Fit

func (scaler *RobustScaler) Fit(X, Y *mat.Dense) Transformer

Fit computes Median and Quantiles

func (*RobustScaler) FitTransform

func (scaler *RobustScaler) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for RobustScaler

func (*RobustScaler) InverseTransform

func (scaler *RobustScaler) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform unscales data

func (*RobustScaler) PartialFit

func (scaler *RobustScaler) PartialFit(X, Y *mat.Dense) Transformer

PartialFit computes Median and Quantiles

func (*RobustScaler) Reset

func (scaler *RobustScaler) Reset() *RobustScaler

Reset ...

func (*RobustScaler) Transform

func (scaler *RobustScaler) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform scales data

type Shuffler

type Shuffler struct{ Perm []int }

Shuffler shuffles rows of X and Y

Example
X, Y := mat.NewDense(2, 3, []float64{1, 2, 3, 4, 5, 6}), mat.NewDense(2, 3, []float64{7, 8, 9, 10, 11, 12})
m := NewShuffler()
m.Fit(X, Y)
copy(m.Perm, []int{1, 0})
m.Transform(X, Y)
fmt.Println("Transformed:")
fmt.Println(base.MatStr(X, Y))
m.InverseTransform(X, Y)
fmt.Println("InverseTransformed:")
fmt.Println(base.MatStr(X, Y))
Output:

func NewShuffler

func NewShuffler() *Shuffler

NewShuffler returns a *Shuffler

func (*Shuffler) Fit

func (m *Shuffler) Fit(X, Y *mat.Dense) Transformer

Fit for Shuffler

func (*Shuffler) FitTransform

func (m *Shuffler) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for Shuffler

func (*Shuffler) InverseTransform

func (m *Shuffler) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform for Shuffler

func (*Shuffler) Transform

func (m *Shuffler) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform for Shuffler

type StandardScaler

type StandardScaler struct {
	Scale, Mean, Var *mat.Dense
	NSamplesSeen     int
}

StandardScaler scales data by removing Mean and dividing by stddev

func NewStandardScaler

func NewStandardScaler() *StandardScaler

NewStandardScaler creates a *StandardScaler

func (*StandardScaler) Fit

func (scaler *StandardScaler) Fit(X, Y *mat.Dense) Transformer

Fit computes Mean snd Std

func (*StandardScaler) FitTransform

func (scaler *StandardScaler) FitTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

FitTransform for StandardScaler

func (*StandardScaler) InverseTransform

func (scaler *StandardScaler) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform unscales data

func (*StandardScaler) PartialFit

func (scaler *StandardScaler) PartialFit(X, Y *mat.Dense) Transformer

PartialFit computes Mean snd Std

func (*StandardScaler) Reset

func (scaler *StandardScaler) Reset() *StandardScaler

Reset ...

func (*StandardScaler) Transform

func (scaler *StandardScaler) Transform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

Transform scales data

type Transformer

type Transformer = base.Transformer

Transformer is an interface for various preprocessors

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL