model

package
v0.0.0-...-762ccde Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 24, 2018 License: MIT Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Register

func Register(modelType string, m ModelCtor)

new LDA sampler should register itself using this function

Types

type LDA

type LDA struct {
	Alpha    float32 // document topic mixture hyperparameter
	Beta     float32 // topic word mixture hyperparameter
	TopicNum uint32

	Data *corpus.Corpus // for convenience

	Wt  *sstable.Uint32Matrix      // word-topic count table
	Dt  *sstable.Uint32Matrix      // doc-topic count table
	Wts *sstable.Uint32Matrix      // word-topic-sum count table
	Dwt map[sstable.DocWord]uint32 // doc-word-topic map
}

func (*LDA) Infer

func (this *LDA) Infer(dat *corpus.Corpus, iter int)

infer topics on new documents

func (*LDA) Init

func (this *LDA) Init()

func (*LDA) Likelihood

func (this *LDA) Likelihood() float64

compute the joint likelihood of corpus

func (*LDA) LoadWordTopic

func (this *LDA) LoadWordTopic(fn string) error

deserialize word-topic matrix

func (*LDA) Phi

func (this *LDA) Phi() *sstable.Float32Matrix

compute the posterior point estimation of word-topic mixture beta (Dirichlet prior) + data -> phi

func (*LDA) ResampleTopics

func (this *LDA) ResampleTopics(iter int)

func (*LDA) SavePhi

func (this *LDA) SavePhi(fn string) error

serialize word-topic distribution

func (*LDA) SaveTheta

func (this *LDA) SaveTheta(fn string) error

serialize document-topic distribution

func (*LDA) SaveWordTopic

func (this *LDA) SaveWordTopic(fn string) error

serialize word-topic matrix

func (*LDA) Theta

func (this *LDA) Theta() *sstable.Float32Matrix

compute the posterior point estimation of document-topic mixture alpha (Dirichlet prior) + data -> theta

func (*LDA) Train

func (this *LDA) Train(dat *corpus.Corpus, iter int)

type Model

type Model interface {
	// train model for iter iteration
	Train(dat *corpus.Corpus, iter int)
	// do inference for new doc for iter iteration
	Infer(dat *corpus.Corpus, iter int)
	// get doc-topic distribution
	Phi() *sstable.Float32Matrix
	// get word-topic distribution
	Theta() *sstable.Float32Matrix
	// serialize posterior document topic distribution
	SaveTheta(fn string) error
	// serialize posterior word topic distribution
	SavePhi(fn string) error
	// serialize word topic count table
	SaveWordTopic(fn string) error
	// deserialize word topic count table
	LoadWordTopic(fn string) error
}

the common interface new LDA samplers should follow

func NewLDA

func NewLDA(topicNum uint32, alpha float32, beta float32) Model

New creates a LDA instance with collapsed gibbs sampler

func NewSparseLDA

func NewSparseLDA(topicNum uint32, alpha float32, beta float32) Model

NewSparseLDA creates a sparse lda instance with time and memory efficient gibbs sampler

type ModelCtor

type ModelCtor func(topicNum uint32, alpha float32, beta float32) Model

func GetModel

func GetModel(modelType string) (ModelCtor, error)

type SparseLDA

type SparseLDA struct {
	*LDA
	Wtm *sstable.SortedMap
}

func (*SparseLDA) Infer

func (this *SparseLDA) Infer(dat *corpus.Corpus, iter int)

infer topics on new documents

func (*SparseLDA) Likelihood

func (this *SparseLDA) Likelihood() float64

compute the joint likelihood of corpus

func (*SparseLDA) LoadWordTopic

func (this *SparseLDA) LoadWordTopic(fn string) error

deserialize word-topic matrix

func (*SparseLDA) Phi

func (this *SparseLDA) Phi() *sstable.Float32Matrix

compute the posterior point estimation of word-topic mixture beta (Dirichlet prior) + data -> phi

func (*SparseLDA) ResampleTopics

func (this *SparseLDA) ResampleTopics(iter int)

func (*SparseLDA) SavePhi

func (this *SparseLDA) SavePhi(fn string) error

serialize word-topic distribution

func (*SparseLDA) SaveWordTopic

func (this *SparseLDA) SaveWordTopic(fn string) error

serialize word-topic matrix

func (*SparseLDA) Train

func (this *SparseLDA) Train(dat *corpus.Corpus, iter int)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL