govarnam

package
v1.9.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 7, 2024 License: AGPL-3.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const CHIL_TAG = "chill"
View Source
const STRUCT_INT_DEFAULT_VALUE = -1

Go's struct int has default value 0. For SearchSymbolTable usecase this is a problem. Hence we use a constructor with default value setting. https://stackoverflow.com/q/37135193/1372424

View Source
const VARNAM_LEARNT_WORD_MIN_WEIGHT = 30

VARNAM_LEARNT_WORD_MIN_WEIGHT Minimum weight/confidence for learnt words.

View Source
const VARNAM_MATCH_ALL = 3
View Source
const VARNAM_MATCH_EXACT = 1

Pattern matching

View Source
const VARNAM_MATCH_POSSIBILITY = 2
View Source
const VARNAM_METADATA_SCHEME_AUTHOR = "scheme-author"
View Source
const VARNAM_METADATA_SCHEME_COMPILED_DATE = "scheme-compiled-date"
View Source
const VARNAM_METADATA_SCHEME_DISPLAY_NAME = "scheme-display-name"
View Source
const VARNAM_METADATA_SCHEME_IDENTIFIER = "scheme-id"
View Source
const VARNAM_METADATA_SCHEME_LANGUAGE_CODE = "lang-code"
View Source
const VARNAM_METADATA_SCHEME_STABLE = "scheme-stable"
View Source
const VARNAM_SCHEMA_SYMBOLS_VERSION = 20211101
View Source
const VARNAM_SYMBOL_ANUSVARA = 7
View Source
const VARNAM_SYMBOL_CONSONANT = 2
View Source
const VARNAM_SYMBOL_CONSONANT_VOWEL = 4
View Source
const VARNAM_SYMBOL_DEAD_CONSONANT = 3
View Source
const VARNAM_SYMBOL_FLAGS_MORE_MATCHES_FOR_PATTERN = (1 << 0)
View Source
const VARNAM_SYMBOL_FLAGS_MORE_MATCHES_FOR_VALUE = (1 << 1)
View Source
const VARNAM_SYMBOL_JOINER = 12
View Source
const VARNAM_SYMBOL_MAX = 30
VST creation

VARNAM_SYMBOL_MAX maximum length of VST column value

View Source
const VARNAM_SYMBOL_NON_JOINER = 11
View Source
const VARNAM_SYMBOL_NUMBER = 5
View Source
const VARNAM_SYMBOL_OTHER = 10
View Source
const VARNAM_SYMBOL_PERIOD = 13
View Source
const VARNAM_SYMBOL_SYMBOL = 6
View Source
const VARNAM_SYMBOL_VIRAMA = 9
View Source
const VARNAM_SYMBOL_VISARGA = 8
View Source
const VARNAM_SYMBOL_VOWEL = 1

Available type of symbol tokens

View Source
const VARNAM_TOKEN_ACCEPT_ALL = 0

Token acceptance rules

View Source
const VARNAM_TOKEN_ACCEPT_IF_ENDS_WITH = 3
View Source
const VARNAM_TOKEN_ACCEPT_IF_IN_BETWEEN = 2
View Source
const VARNAM_TOKEN_ACCEPT_IF_STARTS_WITH = 1
View Source
const VARNAM_TOKEN_BASIC_WEIGHT = 10

A symbol token's maximum possible weight value

View Source
const VARNAM_TOKEN_CHAR = 1 // Non-lang characters like A, B, 1, * etc.

Type of tokens

View Source
const VARNAM_TOKEN_SYMBOL = 2 // Lang characters
View Source
const ZWJ = "\u200d"
View Source
const ZWNJ = "\u200c"

General

Variables

View Source
var (
	BuildString   string
	VersionString string
)

Compile-time variables.

View Source
var LOG_TIME_TAKEN = os.Getenv("GOVARNAM_LOG_TIME_TAKEN") != ""
View Source
var VARNAM_LEARNINGS_DIR = os.Getenv("VARNAM_LEARNINGS_DIR")
View Source
var VARNAM_VST_DIR = os.Getenv("VARNAM_VST_DIR")

Functions

func FindVSTDir

func FindVSTDir() (string, error)

FindVSTDir Get the VST storing directory

func GetAllSchemePaths

func GetAllSchemePaths() ([]string, error)

GetAllSchemePaths get available IDs' location as a string array

func InitMigrate added in v1.9.0

func InitMigrate(db *sql.DB, fs fs.FS) (*migrate, error)

func SetLearningsDir added in v1.7.0

func SetLearningsDir(path string)

SetVSTLookupDir This overrides the environment variable

func SetVSTLookupDir added in v1.7.0

func SetVSTLookupDir(path string)

SetVSTLookupDir This overrides the environment variable

Types

type DictionaryResult

type DictionaryResult struct {
	// contains filtered or unexported fields
}

DictionaryResult result from dictionary search

type LangRules

type LangRules struct {
	Virama               string
	IndicDigits          bool
	PatternLongestLength int // Longest length of pattern in VST
	UnicodeBlock         unicode.RangeTable
}

LangRules language reulated config

type LearnStatus

type LearnStatus struct {
	TotalWords  int
	FailedWords int
}

LearnStatus output of bulk learn

type MoreDictionaryResult added in v1.8.0

type MoreDictionaryResult struct {
	// contains filtered or unexported fields
}

MoreDictionaryResult result from dictionary search

type PatternDictionarySuggestion

type PatternDictionarySuggestion struct {
	Sug    Suggestion
	Length int
}

PatternDictionarySuggestion longest match result

type SchemeDetails

type SchemeDetails struct {
	Identifier   string
	LangCode     string
	DisplayName  string
	Author       string
	CompiledDate string
	IsStable     bool
}

SchemeDetails of VST

func GetAllSchemeDetails

func GetAllSchemeDetails() ([]SchemeDetails, error)

GetAllSchemeDetails get information of all schemes available

type Suggestion

type Suggestion struct {
	Word      string
	Weight    int
	LearnedOn int
}

Suggestion suggestion

func SortSuggestions

func SortSuggestions(sugs []Suggestion) []Suggestion

SortSuggestions by weight and learned on time

type Symbol

type Symbol struct {
	Identifier      int
	Type            int
	MatchType       int
	Pattern         string
	Value1          string
	Value2          string
	Value3          string
	Tag             string
	Weight          int
	Priority        int
	AcceptCondition int
	Flags           int
}

Symbol result from VST

func NewSearchSymbol added in v1.7.2

func NewSearchSymbol() Symbol

NewSearchSymbol a constructor for making Symbol. We're doing this because default int value in go structs is 0. This won't work with searching because fields can have 0 value. https://stackoverflow.com/q/37135193/137242

type Token

type Token struct {
	// contains filtered or unexported fields
}

Token info for making a suggestion

type TransliterationResult

type TransliterationResult struct {
	// Exactly found words in dictionary if there is any.
	// From both patterns and normal dict
	ExactWords []Suggestion

	// Exactly starting word matches in dictionary if there is any.
	// Not applicable for patterns dictionary.
	ExactMatches []Suggestion

	// Possible word suggestions from dictionary
	DictionarySuggestions []Suggestion

	// Possible words matching from patterns dictionary
	PatternDictionarySuggestions []Suggestion

	// All possible matches from tokenizer (VARNAM_MATCH_ALL)
	// Has a limit. The first few results will be VARNAM_MATCH_EXACT.
	// This will only be filled if there are no exact matches.
	// Related: See Config.TokenizerSuggestionsAlways
	TokenizerSuggestions []Suggestion

	// VARNAM_MATCH_EXACT results from tokenizer.
	// No limit, mostly gives 1 or less than 3 outputs
	GreedyTokenized []Suggestion
}

TransliterationResult result

type VSTMakerConfig added in v1.7.1

type VSTMakerConfig struct {
	// Not a config. State variable
	Buffering bool

	IgnoreDuplicateTokens bool
	UseDeadConsonants     bool
}

type Varnam

type Varnam struct {
	VSTPath  string
	DictPath string

	LangRules     LangRules
	SchemeDetails SchemeDetails
	Debug         bool

	PatternWordPartializers []func(*Suggestion)

	// Maximum suggestions to obtain from dictionary
	DictionarySuggestionsLimit int

	// Maximum suggestions to obtain from patterns dictionary
	PatternDictionarySuggestionsLimit int

	// Maximum suggestions to be made from tokenizer
	TokenizerSuggestionsLimit int

	// Always include tokenizer made suggestions.
	// Tokenizer results are not exactly the best, but it's alright
	TokenizerSuggestionsAlways bool

	// Whether only exact scheme match should be considered
	// for dictionary search and discard possibility matches
	DictionaryMatchExact bool

	VSTMakerConfig VSTMakerConfig
	// contains filtered or unexported fields
}

Varnam config

func Init

func Init(vstPath string, dictPath string) (*Varnam, error)

Init Initialize varnam. Dictionary will be created if it doesn't exist

func InitFromID

func InitFromID(schemeID string) (*Varnam, error)

InitFromID Init from ID. Scheme ID doesn't necessarily be a language code

func VMInit added in v1.7.1

func VMInit(vstPath string) (*Varnam, error)

VMInit init

func (*Varnam) Close

func (varnam *Varnam) Close() error

Close close db connections

func (*Varnam) Export

func (varnam *Varnam) Export(filePath string, wordsPerFile int) error

Export learnings as JSON to a file

func (*Varnam) GetRecentlyLearntWords added in v1.5.0

func (varnam *Varnam) GetRecentlyLearntWords(ctx context.Context, offset int, limit int) ([]Suggestion, error)

GetRecentlyLearntWords get recently learnt words

func (*Varnam) GetSuggestions added in v1.6.0

func (varnam *Varnam) GetSuggestions(ctx context.Context, word string) []Suggestion

GetSuggestions get word suggestions from dictionary

func (*Varnam) Import

func (varnam *Varnam) Import(filePath string) error

Import learnings from file

func (*Varnam) InitDict

func (varnam *Varnam) InitDict(dictPath string) error

InitDict open connection to dictionary

func (*Varnam) InitVST

func (varnam *Varnam) InitVST(vstPath string) error

InitVST initialize

func (*Varnam) Learn

func (varnam *Varnam) Learn(word string, weight int) error

Learn a word. If already exist, increases weight

func (*Varnam) LearnFromFile

func (varnam *Varnam) LearnFromFile(filePath string) (LearnStatus, error)

LearnFromFile Learn all words in a file

func (*Varnam) LearnMany

func (varnam *Varnam) LearnMany(words []WordInfo) (LearnStatus, error)

LearnMany words in bulk. Faster learning

func (*Varnam) ReIndexDictionary added in v1.9.0

func (varnam *Varnam) ReIndexDictionary() error

ReIndexDictionary re-indexes dictionary

func (*Varnam) RegisterPatternWordPartializer

func (varnam *Varnam) RegisterPatternWordPartializer(cb func(*Suggestion))

RegisterPatternWordPartializer A word partializer remove word ending with proper alternative so that the word can be tokenized further. Useful for malayalam to replace last chil letter with its root

func (*Varnam) ReverseTransliterate

func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error)

ReverseTransliterate do a reverse transliteration

func (*Varnam) SearchSymbolTable

func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error)

SearchSymbolTable For searching symbol table

func (*Varnam) Train

func (varnam *Varnam) Train(pattern string, word string) error

Train a word with a particular pattern. Pattern => word

func (*Varnam) TrainFromFile

func (varnam *Varnam) TrainFromFile(filePath string) (LearnStatus, error)

TrainFromFile Train words with a particular pattern in bulk

func (*Varnam) Transliterate

func (varnam *Varnam) Transliterate(word string) []Suggestion

Transliterate transliterate with output array

func (*Varnam) TransliterateAdvanced added in v1.7.0

func (varnam *Varnam) TransliterateAdvanced(word string) TransliterationResult

TransliterateAdvanced transliterate with a detailed structure as result

func (*Varnam) TransliterateAdvancedWithContext added in v1.7.0

func (varnam *Varnam) TransliterateAdvancedWithContext(ctx context.Context, word string, resultChannel chan<- TransliterationResult)

TransliterateAdvancedWithContext transliterate with a detailed structure as result Go context

func (*Varnam) TransliterateGreedyTokenized added in v1.6.0

func (varnam *Varnam) TransliterateGreedyTokenized(word string) []Suggestion

TransliterateGreedyTokenized transliterate word, only tokenizer results

func (*Varnam) TransliterateWithContext

func (varnam *Varnam) TransliterateWithContext(ctx context.Context, word string, resultChannel chan<- []Suggestion)

TransliterateWithContext Transliterate but with Go context

func (*Varnam) Unlearn

func (varnam *Varnam) Unlearn(word string) error

Unlearn a word, remove from words DB and pattern if there is

func (*Varnam) VMCreateToken added in v1.7.1

func (varnam *Varnam) VMCreateToken(pattern string, value1 string, value2 string, value3 string, tag string, symbolType int, matchType int, priority int, acceptCondition int, buffered bool) error

VMCreateToken Create Token

func (*Varnam) VMDeleteToken added in v1.8.1

func (varnam *Varnam) VMDeleteToken(searchCriteria Symbol) error

VMDeleteToken Removes a token from VST

func (*Varnam) VMFlushBuffer added in v1.7.1

func (varnam *Varnam) VMFlushBuffer() error

VMFlushBuffer flush

func (*Varnam) VMSetSchemeDetails added in v1.7.1

func (varnam *Varnam) VMSetSchemeDetails(sd SchemeDetails) error

VMSetSchemeDetails set scheme details

type WordInfo

type WordInfo struct {
	// contains filtered or unexported fields
}

WordInfo represent a item in words table

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL