cluster

package
v0.0.0-...-9bff13d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 5, 2015 License: LGPL-3.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Chebyshev

func Chebyshev(a, b Vector) (d float64)

Chebyshev returns the Chebyshev distance metric between points a and b

func CoordinatesSetEqual

func CoordinatesSetEqual(X, Y Matrix) bool

CoordinatesSetEqual returns whether the a and b contain the same set of coordinates. Each row of a and b is a tuple of coordinates.

func Euclidean

func Euclidean(a, b Vector) (d float64)

Euclidean returns the Euclidean distance metric between points a and b

func EuclideanSq

func EuclideanSq(a, b Vector) (d float64)

EuclideanSq returns the Euclidean squared distance metric between points a and b

func InvPerm

func InvPerm(x []int)

InvPerm inverses the permutation x

func Manhattan

func Manhattan(a, b Vector) (d float64)

Manhattan returns the Manhattan distance metric beetween points a and b

func Minkowski

func Minkowski(a, b Vector, p float64) (d float64)

Minkowski returns the Minkowski distance metric between points a and b

func PermEqual

func PermEqual(x, y []int) bool

func Permute

func Permute(x, p []int) (y []int)

Permute returns the permutation of int slice x using index p.

Types

type ActiveSet

type ActiveSet struct {
	// contains filtered or unexported fields
}

array-based integer singly linked list can remove nodes but cannot add nodes

func NewActiveSet

func NewActiveSet(n int) (l ActiveSet)

func (*ActiveSet) Begin

func (l *ActiveSet) Begin() int

func (*ActiveSet) Contains

func (l *ActiveSet) Contains(i int) bool

func (*ActiveSet) End

func (l *ActiveSet) End() int

func (*ActiveSet) Get

func (l *ActiveSet) Get(j int) int

Get returns the jth active element in the set, with boundary wrapping.

func (*ActiveSet) Init

func (l *ActiveSet) Init()

func (*ActiveSet) Len

func (l *ActiveSet) Len() int

func (*ActiveSet) Next

func (l *ActiveSet) Next(curr int) int

func (*ActiveSet) Remove

func (l *ActiveSet) Remove(i int)

type Classes

type Classes struct {
	// classification index
	Index Partitions
	K     int
	Cost  float64
}

func FindClusters

func FindClusters(c Clusterer, k int, repeats int) (classes *Classes)

FIXME There should be multiple instances of Clusterer FindClusters runs the clustering algorithm for the specified number of repeats.

func (*Classes) Partitions

func (c *Classes) Partitions() [][]int

Partitions return an array of partition element arrays

func (*Classes) Sizes

func (c *Classes) Sizes() []int

type Clusterer

type Clusterer interface {
	// Cluster clusters data points into k clusters.
	Cluster(k int) *Classes
	Len() int
}

type Distances

type Distances struct {
	// contains filtered or unexported fields
}

func NewDistances

func NewDistances(X Matrix, metric MetricOp) (d *Distances)

func (*Distances) Get

func (d *Distances) Get(i, j int) float64

func (*Distances) Len

func (d *Distances) Len() int

func (*Distances) Subset

func (d *Distances) Subset(index []int) *Distances

type HClusters

type HClusters struct {
	// Data points [m x n]
	X Matrix
	// Distance metric
	Metric MetricOp
	// number of clusters
	K int
	// linkage method
	Method int
	// Distances between data points [m x m]
	D *Distances
	// Step-wise dendrogram
	Dendrogram Linkages
	// cluster center assignment index
	Index []int
	// cost
	Cost float64
	// contains filtered or unexported fields
}

func (*HClusters) CutTree

func (c *HClusters) CutTree(K int)

CutTree cuts the hierarchical cluster tree to generate K clusters.

func (*HClusters) CutTreeHeight

func (c *HClusters) CutTreeHeight(height float64)

CutTreeHeight cuts the hierarchical cluster tree to specified height.

type HClustersGeneric

type HClustersGeneric struct {
	HClusters
	// contains filtered or unexported fields
}

Generic hierarchical clustering using Mullner's algorithm

func NewHClustersGeneric

func NewHClustersGeneric(X Matrix, metric MetricOp, method int, d *Distances) *HClustersGeneric

func (*HClustersGeneric) Cluster

func (c *HClustersGeneric) Cluster(k int) (classes *Classes)

type HClustersSingle

type HClustersSingle struct {
	HClusters
	// contains filtered or unexported fields
}

Single linkage hierarchical clustering using Minimum Spanning Tree (MST) algorithm

func NewHClustersSingle

func NewHClustersSingle(X Matrix, metric MetricOp, d *Distances) *HClustersSingle

func (*HClustersSingle) Cluster

func (c *HClustersSingle) Cluster(k int) (classes *Classes)

func (*HClustersSingle) Hierarchize

func (c *HClustersSingle) Hierarchize() Linkages

type Heap

type Heap struct {
	// contains filtered or unexported fields
}

min binary heap complete binary tree partial order: every node a stores a value that is less than or equal to that of its children

func (*Heap) Init

func (h *Heap) Init()

func (*Heap) Len

func (h *Heap) Len() int

func (*Heap) Pop

func (h *Heap) Pop() (y int)

Pop removes and returns the minimum element complexity is O(log(n))

func (*Heap) Push

func (h *Heap) Push(x KeyValue)

func (*Heap) Remove

func (h *Heap) Remove(i int) (y int)

Remove removes the element at position i

func (*Heap) Search

func (h *Heap) Search(value int) (i int)

func (*Heap) Update

func (h *Heap) Update(i int, x KeyValue)

Update updates the value at the i-th position

type Hierarchizer

type Hierarchizer interface {
	// Hierarchize organizes data clusters in a dendrogram
	Hierarchize() Linkages
}

type Hopach

type Hopach struct {
	Base Hopacher
	// contains filtered or unexported fields
}

func NewHopach

func NewHopach(base Hopacher) *Hopach

func (*Hopach) Hierarchize

func (h *Hopach) Hierarchize() Linkages

type Hopacher

type Hopacher interface {
	Clusterer
	Subset(index []int) Hopacher
	// Heterogeneity of a given partitioning
	Heterogeneity(classes *Classes) float64
	// Sort elements in some order
	Sort()
}

type KMeans

type KMeans struct {
	// Matrix of data points
	X Matrix
	// Distance metric
	Metric MetricOp
	// number of clusters
	K int
	// Distances between data points [m x m]
	D *Distances
	// Matrix of centroids
	Centers Matrix
	// Total distance of members to each centroid
	Errors Vector
	// cluster center assignment index
	Clusters []int
	// cost
	Cost float64
	// Maximum number of iterations
	MaxIter int
	// ordered index of elements subset
	Index []int
}

func NewKMeans

func NewKMeans(X Matrix, metric MetricOp) *KMeans

func (*KMeans) Cluster

func (c *KMeans) Cluster(k int) (classes *Classes)

Cluster runs the k-means algorithm once with random initialization Returns the classification information

func (*KMeans) Len

func (c *KMeans) Len() int

func (*KMeans) Segregations

func (c *KMeans) Segregations(classes *Classes) (S Matrix)

func (*KMeans) Subset

func (c *KMeans) Subset(index []int) Splitter

type KMedians

type KMedians struct {
	KMeans
}

func NewKMedians

func NewKMedians(X Matrix, metric MetricOp) *KMedians

func (*KMedians) Cluster

func (c *KMedians) Cluster(k int) (classes *Classes)

Cluster runs the k-medians algorithm once with random initialization Returns the classification information N.B. Must explicitly override KMeans.Cluster s.t. KMedians.maximization is called instead of KMeans.maximization.

type KMedoids

type KMedoids struct {
	KMeans
}

func NewKMedoids

func NewKMedoids(X Matrix, metric MetricOp, distances *Distances) *KMedoids

func (*KMedoids) Cluster

func (c *KMedoids) Cluster(k int) (classes *Classes)

Cluster runs the k-medoids algorithm. Returns the classification information.

func (*KMedoids) Subset

func (c *KMedoids) Subset(index []int) Splitter

type KeyValue

type KeyValue struct {
	Key   float64
	Value int
}

type Linkage

type Linkage struct {
	First, Second int
	Distance      float64
}

linkage

type Linkages

type Linkages []Linkage

func (Linkages) Len

func (x Linkages) Len() int

func (Linkages) Less

func (x Linkages) Less(i, j int) bool

func (Linkages) Swap

func (x Linkages) Swap(i, j int)

type Matrix

type Matrix mlgo.Matrix

func Segregations

func Segregations(distances *Distances, classes *Classes) (S Matrix)

Segregations return a matrix of distances between data points and clusters

func SegregationsFromCenters

func SegregationsFromCenters(X, centers Matrix, metric MetricOp) (S Matrix)

SegregationsFromCenters return a matrix of distances between data points and cluster centers

type MetricOp

type MetricOp func(a, b Vector) float64

type MixModel

type MixModel struct {
	// Matrix of data points [m x n]
	X Matrix
	// number of clusters
	K int

	// Matrix of Gaussians [k x n]
	Means, Variances Matrix
	// Vector of mixing proportions [k]
	Mixings Vector
	// Negative likelihood to be minimized
	NLogLikelihood float64
	// Maximum number of iterations
	MaxIter int
	// contains filtered or unexported fields
}

func (*MixModel) Cluster

func (c *MixModel) Cluster(k int) (classes *Classes)

Cluster runs the algorithm once with random initialization Returns the classification information

type Partitions

type Partitions []int

func (Partitions) Equal

func (p Partitions) Equal(q Partitions) bool

Equal returns whether partitions p and q are equal.

func (Partitions) Len

func (p Partitions) Len() int

func (Partitions) Less

func (p Partitions) Less(i, j int) bool

func (Partitions) Reassign

func (p Partitions) Reassign()

Reassign reassigns partition labels so that partitions are assigned indices (1-index) in the order of appearance.

func (Partitions) Swap

func (p Partitions) Swap(i, j int)

type Segregator

type Segregator interface {
	Clusterer
	Segregations(classes *Classes) Matrix
}

type Split

type Split struct {
	K    int
	Cl   *Classes
	Cost float64
}

func SegregateByMeanSil

func SegregateByMeanSil(seg Segregator, K int) (s Split)

TODO Do not count the silhouette of singleton clusters in the average?

func SplitByMeanSplitSil

func SplitByMeanSplitSil(splitter Splitter, K, L int) (s Split)

K is the maximum number of clusters. L is the maximum number of children clusters for any cluster.

type Splitter

type Splitter interface {
	Segregator
	Subset(index []int) Splitter
}

type Subclusterer

type Subclusterer interface {
	Clusterer
	// Subcluster clusters a subset of data points specified by idx into k clusters.
	Subcluster(k int, idx []int) *Classes
}

type UnionFind

type UnionFind struct {
	// parent index array
	Parent []int
	// contains filtered or unexported fields
}

func NewUnionFind

func NewUnionFind(n int) *UnionFind

func (*UnionFind) Find

func (x *UnionFind) Find(i int) (r int)

Find finds the index of the disjoint set in which i belongs

func (*UnionFind) Same

func (x *UnionFind) Same(i, j int) bool

Same returns whether elements i and j belong to the same disjoint set.

func (*UnionFind) Union

func (x *UnionFind) Union(i, j int)

Union creates a new set join i and j together Does not use union-by-rank Same memory requirement as union-by-rank, since memory is used to expand the Parent array instead of maintaining a set size array

type Vector

type Vector mlgo.Vector

func Silhouettes

func Silhouettes(S Matrix, classes *Classes) (s Vector)

Silhouettes returns a vector of silhouettes for data points. If S is a matrix of average distances from each elements to other elements in each cluster, then the returned values are conventionally considered as silhouettes. If S is a matrix of distances from each element to each cluster center, then the returned values are can be considered as shadows. TODO special case: silhouette is not defined for two singleton clusters TODO faithful calculation of "shadow" as defined by Friedrich Leisch (average two nearest centroids for 'b')

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL