nlp

package
v0.0.0-...-32fa971 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 18, 2023 License: MIT Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CleanChineseText

func CleanChineseText(text string) string

func ProcessLine

func ProcessLine(r io.Reader, max int, threshold int, cleaner CorpusCleaner, callback func(s string) error) error

func RemoveNonChinese

func RemoveNonChinese(text string) string

func RemoveUrl

func RemoveUrl(text string) string

Types

type ChineseCorpusCleaner

type ChineseCorpusCleaner struct{}

func (*ChineseCorpusCleaner) DoClean

func (self *ChineseCorpusCleaner) DoClean(text string) string

func (*ChineseCorpusCleaner) DoSplit

func (self *ChineseCorpusCleaner) DoSplit(text string) []string

func (*ChineseCorpusCleaner) GetSeparator

func (self *ChineseCorpusCleaner) GetSeparator() string

type CorpusCleaner

type CorpusCleaner interface {
	DoClean(text string) string
	GetSeparator() string
	DoSplit(text string) []string
}

type EnglishCorpusCleaner

type EnglishCorpusCleaner struct{}

func (*EnglishCorpusCleaner) DoClean

func (self *EnglishCorpusCleaner) DoClean(text string) string

TODO

func (*EnglishCorpusCleaner) DoSplit

func (self *EnglishCorpusCleaner) DoSplit(text string) []string

func (*EnglishCorpusCleaner) GetSeparator

func (self *EnglishCorpusCleaner) GetSeparator() string

type MixCorpusCleaner

type MixCorpusCleaner struct{}

func (*MixCorpusCleaner) DoClean

func (self *MixCorpusCleaner) DoClean(text string) string

TODO

func (*MixCorpusCleaner) DoSplit

func (self *MixCorpusCleaner) DoSplit(text string) []string

TODO

func (*MixCorpusCleaner) GetSeparator

func (self *MixCorpusCleaner) GetSeparator() string

TODO

type SegRequest

type SegRequest struct {
	Text string   `json:"text"`
	Pos  []string `json:"pos"`
	Top  int      `json:"top"`
	Min  int      `json:"min"`
}

type SegResponse

type SegResponse struct {
	Result WordFreqResult `json:"result"`
}

type Token

type Token struct {
	Word string
	Pos  string
}

type Tokenizer

type Tokenizer interface {
	DoSeg(ctx context.Context, s string, pos []string, top int, min int) (rs WordFreqResult, err error)
}

type WordFreqResult

type WordFreqResult [][]interface{}

func (WordFreqResult) Len

func (self WordFreqResult) Len() int

func (WordFreqResult) Less

func (self WordFreqResult) Less(i, j int) bool

func (WordFreqResult) Swap

func (self WordFreqResult) Swap(i, j int)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL