doc2vec

package
v0.0.0-...-ce5e274 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 30, 2018 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MAX_EXP                 float64 = 6.0
	EXP_TABLE_SIZE          int     = 1000
	NEG_SAMPLING_TABLE_SIZE int     = 1e8
	PROGRESS_BAR_THRESHOLD  int     = 100000
	THREAD_NUM              int     = 32
)

Variables

This section is empty.

Functions

func GetSigmoidValue

func GetSigmoidValue(f float64) float64

func QuickSort

func QuickSort(i, j int, vec []*SortItem)

升序快排

Types

type IDoc2Vec

type IDoc2Vec interface {
	Train(ctx context.Context, model common.IModelDataProvider)
	GetCorpus() corpus.ICorpus
	GetNeuralNet() neuralnet.INeuralNet
	SaveModel(fname string) (err error)
	LoadModel(fname string) (err error)
	Word2Words(word string)
	Word2Docs(word string)
	Sen2Words(ctx context.Context, content string, iters int)
	Sen2Docs(ctx context.Context, content string, iters int)
	Doc2Docs(docidx int)
	Doc2Words(docidx int)
	GetLikelihood4Doc(context string) (likelihood float64)
	GetLeaveOneOutKwds(ctx context.Context, content string, iters int)
	DocSimCal(content1 string, content2 string) (dis float64)
}

type SortItem

type SortItem struct {
	Idx int32
	Dis float64
}

func (*SortItem) DecodeMsg

func (z *SortItem) DecodeMsg(dc *msgp.Reader) (err error)

DecodeMsg implements msgp.Decodable

func (SortItem) EncodeMsg

func (z SortItem) EncodeMsg(en *msgp.Writer) (err error)

EncodeMsg implements msgp.Encodable

func (SortItem) MarshalMsg

func (z SortItem) MarshalMsg(b []byte) (o []byte, err error)

MarshalMsg implements msgp.Marshaler

func (SortItem) Msgsize

func (z SortItem) Msgsize() (s int)

Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message

func (*SortItem) UnmarshalMsg

func (z *SortItem) UnmarshalMsg(bts []byte) (o []byte, err error)

UnmarshalMsg implements msgp.Unmarshaler

type TDoc2VecImpl

type TDoc2VecImpl struct {
	Dim              int
	UseCbow          bool //true:Continuous Bag-of-Word Model false:skip-gram
	WindowSize       int  //cbow model的窗口大小
	UseHS            bool
	UseNEG           bool //UseHS / UseNEG两种求解优化算法必须选一个 也可以两种算法都选 详见google word2vec源代码
	Negative         int  //负采样词的个数
	StartAlpha       float64
	Iters            int
	TrainedWords     int
	Corpus           corpus.ICorpus
	NN               neuralnet.INeuralNet
	NegSamplingTable []int32
	// contains filtered or unexported fields
}

func NewDoc2Vec

func NewDoc2Vec(useCbow, useHS, useNEG bool, windowSize, dim, iters int) *TDoc2VecImpl

func (*TDoc2VecImpl) DecodeMsg

func (z *TDoc2VecImpl) DecodeMsg(dc *msgp.Reader) (err error)

DecodeMsg implements msgp.Decodable

func (*TDoc2VecImpl) Doc2Docs

func (p *TDoc2VecImpl) Doc2Docs(docidx int)

func (*TDoc2VecImpl) Doc2Words

func (p *TDoc2VecImpl) Doc2Words(docidx int)

func (*TDoc2VecImpl) DocSimCal

func (p *TDoc2VecImpl) DocSimCal(content1 string, content2 string) (sim float64)

func (*TDoc2VecImpl) EncodeMsg

func (z *TDoc2VecImpl) EncodeMsg(en *msgp.Writer) (err error)

EncodeMsg implements msgp.Encodable

func (*TDoc2VecImpl) FitDoc

func (p *TDoc2VecImpl) FitDoc(ctx context.Context, context string, iters int) (dsyn0 *neuralnet.TVector)

func (*TDoc2VecImpl) GetCorpus

func (p *TDoc2VecImpl) GetCorpus() corpus.ICorpus

func (*TDoc2VecImpl) GetLeaveOneOutKwds

func (p *TDoc2VecImpl) GetLeaveOneOutKwds(ctx context.Context, content string, iters int)

func (*TDoc2VecImpl) GetLikelihood4Doc

func (p *TDoc2VecImpl) GetLikelihood4Doc(context string) (likelihood float64)

func (*TDoc2VecImpl) GetNegativeSamplingWordIdx

func (p *TDoc2VecImpl) GetNegativeSamplingWordIdx() int32

func (*TDoc2VecImpl) GetNeuralNet

func (p *TDoc2VecImpl) GetNeuralNet() neuralnet.INeuralNet

func (*TDoc2VecImpl) LoadModel

func (p *TDoc2VecImpl) LoadModel(fname string) (err error)

func (*TDoc2VecImpl) MarshalMsg

func (z *TDoc2VecImpl) MarshalMsg(b []byte) (o []byte, err error)

MarshalMsg implements msgp.Marshaler

func (*TDoc2VecImpl) Msgsize

func (z *TDoc2VecImpl) Msgsize() (s int)

Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message

func (*TDoc2VecImpl) PrintTopKDocs

func (p *TDoc2VecImpl) PrintTopKDocs(slice TSortItemSlice)

func (*TDoc2VecImpl) PrintTopKWords

func (p *TDoc2VecImpl) PrintTopKWords(slice TSortItemSlice)

func (*TDoc2VecImpl) SaveModel

func (p *TDoc2VecImpl) SaveModel(fname string) (err error)

func (*TDoc2VecImpl) Sen2Docs

func (p *TDoc2VecImpl) Sen2Docs(ctx context.Context, content string, iters int)

func (*TDoc2VecImpl) Sen2Words

func (p *TDoc2VecImpl) Sen2Words(ctx context.Context, content string, iters int)

func (*TDoc2VecImpl) Train

func (p *TDoc2VecImpl) Train(ctx context.Context, model common.IModelDataProvider)

func (*TDoc2VecImpl) UnmarshalMsg

func (z *TDoc2VecImpl) UnmarshalMsg(bts []byte) (o []byte, err error)

UnmarshalMsg implements msgp.Unmarshaler

func (*TDoc2VecImpl) Word2Docs

func (p *TDoc2VecImpl) Word2Docs(word string)

func (*TDoc2VecImpl) Word2Words

func (p *TDoc2VecImpl) Word2Words(word string)

type TSortItemSlice

type TSortItemSlice []*SortItem

func (*TSortItemSlice) DecodeMsg

func (z *TSortItemSlice) DecodeMsg(dc *msgp.Reader) (err error)

DecodeMsg implements msgp.Decodable

func (TSortItemSlice) EncodeMsg

func (z TSortItemSlice) EncodeMsg(en *msgp.Writer) (err error)

EncodeMsg implements msgp.Encodable

func (TSortItemSlice) Len

func (p TSortItemSlice) Len() int

func (TSortItemSlice) Less

func (p TSortItemSlice) Less(i, j int) bool

func (TSortItemSlice) MarshalMsg

func (z TSortItemSlice) MarshalMsg(b []byte) (o []byte, err error)

MarshalMsg implements msgp.Marshaler

func (TSortItemSlice) Msgsize

func (z TSortItemSlice) Msgsize() (s int)

Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message

func (TSortItemSlice) Swap

func (p TSortItemSlice) Swap(i, j int)

func (*TSortItemSlice) UnmarshalMsg

func (z *TSortItemSlice) UnmarshalMsg(bts []byte) (o []byte, err error)

UnmarshalMsg implements msgp.Unmarshaler

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL