biogo.cluster: code.google.com/p/biogo.cluster/kmeans Index | Examples | Files

package kmeans

import "code.google.com/p/biogo.cluster/kmeans"

Package kmeans implements Lloyd's k-means clustering for ℝⁿ data.

Code:play 

package kmeans_test

import (
    "code.google.com/p/biogo.cluster/kmeans"
    "fmt"
    "strings"
)

type Feature struct {
    ID    string
    Start int
    End   int
}

func (f *Feature) Len() int { return f.End - f.Start }

type Features []*Feature

func (f Features) Len() int               { return len(f) }
func (f Features) Values(i int) []float64 { return []float64{float64(f[i].Start), float64(f[i].End)} }

var feats = []*Feature{
    {ID: "0", Start: 1, End: 1700},
    {ID: "1", Start: 2, End: 1700},
    {ID: "2", Start: 3, End: 610},
    {ID: "3", Start: 2, End: 605},
    {ID: "4", Start: 1, End: 600},
    {ID: "5", Start: 2, End: 750},
    {ID: "6", Start: 650, End: 900},
    {ID: "7", Start: 700, End: 950},
    {ID: "8", Start: 1000, End: 1700},
    {ID: "9", Start: 950, End: 1712},
    {ID: "10", Start: 1000, End: 1650},
}

// Cluster feat.Features on the basis of location where:
//  epsilon is allowable error, and
//  effort is number of attempts to achieve error < epsilon for any k.
func ClusterFeatures(f []*Feature, epsilon float64, effort int) (*kmeans.Kmeans, error) {
    km, err := kmeans.New(Features(f))
    if err != nil {
        return nil, err
    }

    values := km.Values()
    cut := make([]float64, len(values))
    for i, v := range values {
        v := v.V()
        l := epsilon * (v[1] - v[0])
        cut[i] = l * l
    }

    for k := 1; k <= len(f); k++ {
    ATTEMPT:
        for attempt := 0; attempt < effort; attempt++ {
            km.Seed(k)
            km.Cluster()
            centers := km.Centers()
            for i, v := range values {
                cv := centers[v.Cluster()].V()
                vv := v.V()
                dx, dy := cv[0]-vv[0], cv[1]-vv[1]
                ok := dx*dx+dy*dy < cut[i]
                if !ok {
                    continue ATTEMPT
                }
            }
            return km, nil
        }
    }

    panic("cannot reach")
}

func Example() {
    km, err := ClusterFeatures(feats, 0.15, 5)
    if err != nil {
        return
    }
    for ci, c := range km.Centers() {
        fmt.Printf("Cluster %d:\n", ci)
        for _, i := range c.Members() {
            f := feats[i]
            fmt.Printf("%2s %s%s\n",
                f.ID,
                strings.Repeat(" ", f.Start/20),
                strings.Repeat("-", f.Len()/20),
            )
        }
        fmt.Println()
    }

    var within float64
    for _, ss := range km.Within() {
        within += ss
    }
    fmt.Printf("betweenSS / totalSS = %.6f\n", 1-(within/km.Total()))

    // Output:
    // Cluster 0:
    //  0 ------------------------------------------------------------------------------------
    //  1 ------------------------------------------------------------------------------------
    //
    // Cluster 1:
    //  2 ------------------------------
    //  3 ------------------------------
    //  4 -----------------------------
    //  5 -------------------------------------
    //
    // Cluster 2:
    //  6                                 ------------
    //  7                                    ------------
    //
    // Cluster 3:
    //  8                                                   -----------------------------------
    //  9                                                --------------------------------------
    // 10                                                   --------------------------------
    //
    // betweenSS / totalSS = 0.995335
}

Index

Examples

Package Files

kmeans.go

type Kmeans

type Kmeans struct {
    // contains filtered or unexported fields
}

Kmeans implements clustering of ℝⁿ data according to the Lloyd k-means algorithm.

func New

func New(data cluster.Interface) (*Kmeans, error)

New creates a new k-means object populated with data from an Interface value, data.

func (*Kmeans) Centers

func (km *Kmeans) Centers() []cluster.Center

Centers returns the k centers determined by a previous call to Cluster.

func (*Kmeans) Cluster

func (km *Kmeans) Cluster() error

Cluster runs a clustering of the data using the k-means algorithm.

func (*Kmeans) Seed

func (km *Kmeans) Seed(k int)

Seed generates the initial means for the k-means algorithm according to the k-means++ algorithm

func (*Kmeans) SetCenters

func (km *Kmeans) SetCenters(c []cluster.Center)

SetCenters sets the locations of the centers to c.

func (*Kmeans) Total

func (km *Kmeans) Total() float64

Total calculates the total sum of squares for the data relative to the data mean.

func (*Kmeans) Values

func (km *Kmeans) Values() []cluster.Value

Values returns a slice of the values in the Kmeans.

func (*Kmeans) Within

func (km *Kmeans) Within() []float64

Within calculates the sum of squares within each cluster. Returns nil if Cluster has not been called.

Package kmeans imports 3 packages (graph). Updated 2015-01-15. Refresh now. Tools for package owners.