tokenizer

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 28, 2023 License: MIT Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	LanguageNotSupported = errors.New("language not supported")
)

Functions

func IsSupportedLanguage

func IsSupportedLanguage(language Language) bool

func Tokenize

func Tokenize(params *TokenizeParams, config *Config) ([]string, error)

Types

type Config

type Config struct {
	EnableStemming  bool
	EnableStopWords bool
}

type Language

type Language string
const (
	ENGLISH   Language = "en"
	FRENCH    Language = "fr"
	HUNGARIAN Language = "hu"
	NORWEGIAN Language = "no"
	RUSSIAN   Language = "ru"
	SPANISH   Language = "es"
	SWEDISH   Language = "sv"
)

type Stem

type Stem func(string, bool) string

type StopWords

type StopWords map[string]struct{}

type TokenizeParams

type TokenizeParams struct {
	Text            string
	Language        Language
	AllowDuplicates bool
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL