Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type EnglishContractions ¶
type EnglishContractions struct {
// contains filtered or unexported fields
}
func NewEnglishContractions ¶
func NewEnglishContractions() *EnglishContractions
type LangContractions ¶
type SplitTokenizer ¶
type SplitTokenizer struct {
// contains filtered or unexported fields
}
func NewSplitTokenizer ¶
func NewSplitTokenizer(delimiter string) *SplitTokenizer
func (*SplitTokenizer) Tokenize ¶
func (t *SplitTokenizer) Tokenize(str string) []*Token
type TBWordTokenizer ¶
type TBWordTokenizer struct { LangContractions LangContractions ExpandContrations bool Normalize bool // contains filtered or unexported fields }
Mimics TreeBank word tokenizer without using mass of regexps
func NewTBWordTokenizer ¶
func NewTBWordTokenizer(normalize, checkContr bool, langContr LangContractions) *TBWordTokenizer
func (*TBWordTokenizer) Tokenize ¶
func (t *TBWordTokenizer) Tokenize(s string) []*Token
func (*TBWordTokenizer) TokenizeRune ¶
func (t *TBWordTokenizer) TokenizeRune(s []rune) []*Token
type Token ¶
Click to show internal directories.
Click to hide internal directories.