xiaoxian

package module
v0.0.0-...-f62ef70 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 19, 2020 License: MIT Imports: 15 Imported by: 11

README

xiaoxian

Some NLP functions & wrappers for English & Chinese, offline or online.

Documentation

Index

Constants

View Source
const XiaoXianDefaultTid = "tc040626"
View Source
const XiaoXianHost = "topget.org:7482"

Variables

This section is empty.

Functions

func CalCosineSimilarityOfVectors

func CalCosineSimilarityOfVectors(f1, f2 []float32) float64

func CleanChineseSentence

func CleanChineseSentence(sentA string) string

func CleanEnglish

func CleanEnglish(sentA string) string

func DownloadPagePostOnlyBaiduNLP

func DownloadPagePostOnlyBaiduNLP(urlA string, tokenA string, ifCustomA bool, postDataA string, timeoutSecsA time.Duration) string

func EnsureValidEnglishOnly

func EnsureValidEnglishOnly(textA string, ifOtherEmptyA bool) string

func GetArticleDifficultyEnOL

func GetArticleDifficultyEnOL(strA string) (string, error)

func GetNamedEntityEnOL

func GetNamedEntityEnOL(strA string) ([]string, error)

func GetVectorCnBaiduOL

func GetVectorCnBaiduOL(textA string, tokenA string, clientIdA string, clientSecretA string) (rs string, err string, token string)

func NerCnBaiduOL

func NerCnBaiduOL(textA string, ifCustomA bool, tokenA string, clientIdA string, clientSecretA string) (rs string, err string, token string)

func ParseSentenceEnOL

func ParseSentenceEnOL(strA string) (string, error)

func SentimentCnBaiduOL

func SentimentCnBaiduOL(textA string, tokenA string, clientIdA string, clientSecretA string) (result map[string]float64, err string, token string)

func SplitArticleCn

func SplitArticleCn(strA string) []string

func SplitArticleEn

func SplitArticleEn(strA string) []string

SplitArticleEn 英语分句

func SplitArticleEnOL

func SplitArticleEnOL(strA string) ([]string, error)

func TagEnOL

func TagEnOL(strA string) ([]string, error)

func TokenizeCnBaiduOL

func TokenizeCnBaiduOL(textA string, ifCustomA bool, tokenA string, clientIdA string, clientSecretA string) (rs string, err string, token string)

func TokenizeEn

func TokenizeEn(strA string) []string

func TokenizeEnOL

func TokenizeEnOL(strA string) ([]string, error)

Types

type D2VModel

type D2VModel struct {
	// contains filtered or unexported fields
}

func LoadD2VModel

func LoadD2VModel(fileNameA string) (resultR *D2VModel, errR error)

func LoadD2VModelWithDicts

func LoadD2VModelWithDicts(fileNameA string, dictA string, userDictA string) (resultR *D2VModel, errR error)

func NewD2VModel

func NewD2VModel(dimA int, roundA int) (resultR *D2VModel, errR error)

func NewD2VModelFromDicts

func NewD2VModelFromDicts(dimA int, roundA int, dictA string, userDictA string) (resultR *D2VModel, errR error)

func NewD2VModelFromSegmenter

func NewD2VModelFromSegmenter(dimA int, roundA int, segmenterA *posseg.Segmenter) (*D2VModel, error)

func TrainDoc2VecModel

func TrainDoc2VecModel(dirA string, patternA string, dataFileA string, modelFileNameA string, dimA int, roundA int) (*D2VModel, error)

func (*D2VModel) Dim

func (p *D2VModel) Dim() int

func (*D2VModel) GetDocVector

func (p *D2VModel) GetDocVector(strA string) ([]float32, error)

func (*D2VModel) GetDocVectorMust

func (p *D2VModel) GetDocVectorMust(strA string) []float32

func (*D2VModel) GetSimilarityOfDocs

func (p *D2VModel) GetSimilarityOfDocs(doc1 string, doc2 string) float64

func (*D2VModel) GetSimilarityOfDocsEx

func (p *D2VModel) GetSimilarityOfDocsEx(doc1 string, doc2 string) float64

func (*D2VModel) LoadModel

func (p *D2VModel) LoadModel(fileNameA string) error

func (*D2VModel) Model

func (p *D2VModel) Model() *doc2vec.IDoc2Vec

func (*D2VModel) PrepareTrainText

func (p *D2VModel) PrepareTrainText(textA string) (resultR string)

func (*D2VModel) Round

func (p *D2VModel) Round() int

func (*D2VModel) SaveModel

func (p *D2VModel) SaveModel(fileNameA string) error

func (*D2VModel) SetSegmenter

func (p *D2VModel) SetSegmenter(segmenterA *posseg.Segmenter) error

func (*D2VModel) SetSegmenterByDicts

func (p *D2VModel) SetSegmenterByDicts(dictA string, userDictA string) error

func (*D2VModel) TrainModel

func (p *D2VModel) TrainModel(dirA string, patternA string, dataFileA string, modelFileNameA string) error

func (*D2VModel) TrainModelFromString

func (p *D2VModel) TrainModelFromString(strA string) (result error)

type PosTaggerCn

type PosTaggerCn struct {
	// contains filtered or unexported fields
}

func NewPosTaggerCn

func NewPosTaggerCn(dictA string, userDictA string) (*PosTaggerCn, error)

func (*PosTaggerCn) LoadDict

func (p *PosTaggerCn) LoadDict(dictA string) error

func (*PosTaggerCn) LoadUserDict

func (p *PosTaggerCn) LoadUserDict(userDictA string) error

func (*PosTaggerCn) Tag

func (p *PosTaggerCn) Tag(strA string, autoA bool, sepA string) []string

type TokenizerCn

type TokenizerCn struct {
	// contains filtered or unexported fields
}

func NewTokenizerCn

func NewTokenizerCn(dictA string, userDictA string) (*TokenizerCn, error)

func (*TokenizerCn) LoadDict

func (p *TokenizerCn) LoadDict(dictA string) error

func (*TokenizerCn) LoadUserDict

func (p *TokenizerCn) LoadUserDict(userDictA string) error

func (*TokenizerCn) Tokenize

func (p *TokenizerCn) Tokenize(strA string, autoA bool) []string

type TreebankWordTokenizer

type TreebankWordTokenizer struct {
	// contains filtered or unexported fields
}

a modified version of treebank tokenizer start

func NewTreebankWordTokenizer

func NewTreebankWordTokenizer() *TreebankWordTokenizer

func (*TreebankWordTokenizer) Tokenize

func (t *TreebankWordTokenizer) Tokenize(text string) []string

func (*TreebankWordTokenizer) TokenizeWithDetail

func (t *TreebankWordTokenizer) TokenizeWithDetail(text string) []string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL