Documentation ¶
Index ¶
- Constants
- Variables
- func FindVSTDir() (string, error)
- func GetAllSchemePaths() ([]string, error)
- func InitMigrate(db *sql.DB, fs fs.FS) (*migrate, error)
- func SetLearningsDir(path string)
- func SetVSTLookupDir(path string)
- type DictionaryResult
- type LangRules
- type LearnStatus
- type MoreDictionaryResult
- type PatternDictionarySuggestion
- type SchemeDetails
- type Suggestion
- type Symbol
- type Token
- type TransliterationResult
- type VSTMakerConfig
- type Varnam
- func (varnam *Varnam) Close() error
- func (varnam *Varnam) Export(filePath string, wordsPerFile int) error
- func (varnam *Varnam) GetRecentlyLearntWords(ctx context.Context, offset int, limit int) ([]Suggestion, error)
- func (varnam *Varnam) GetSuggestions(ctx context.Context, word string) []Suggestion
- func (varnam *Varnam) Import(filePath string) error
- func (varnam *Varnam) InitDict(dictPath string) error
- func (varnam *Varnam) InitVST(vstPath string) error
- func (varnam *Varnam) Learn(word string, weight int) error
- func (varnam *Varnam) LearnFromFile(filePath string) (LearnStatus, error)
- func (varnam *Varnam) LearnMany(words []WordInfo) (LearnStatus, error)
- func (varnam *Varnam) ReIndexDictionary() error
- func (varnam *Varnam) RegisterPatternWordPartializer(cb func(*Suggestion))
- func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error)
- func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error)
- func (varnam *Varnam) Train(pattern string, word string) error
- func (varnam *Varnam) TrainFromFile(filePath string) (LearnStatus, error)
- func (varnam *Varnam) Transliterate(word string) []Suggestion
- func (varnam *Varnam) TransliterateAdvanced(word string) TransliterationResult
- func (varnam *Varnam) TransliterateAdvancedWithContext(ctx context.Context, word string, resultChannel chan<- TransliterationResult)
- func (varnam *Varnam) TransliterateGreedyTokenized(word string) []Suggestion
- func (varnam *Varnam) TransliterateWithContext(ctx context.Context, word string, resultChannel chan<- []Suggestion)
- func (varnam *Varnam) Unlearn(word string) error
- func (varnam *Varnam) VMCreateToken(pattern string, value1 string, value2 string, value3 string, tag string, ...) error
- func (varnam *Varnam) VMDeleteToken(searchCriteria Symbol) error
- func (varnam *Varnam) VMFlushBuffer() error
- func (varnam *Varnam) VMSetSchemeDetails(sd SchemeDetails) error
- type WordInfo
Constants ¶
const CHIL_TAG = "chill"
const STRUCT_INT_DEFAULT_VALUE = -1
Go's struct int has default value 0. For SearchSymbolTable usecase this is a problem. Hence we use a constructor with default value setting. https://stackoverflow.com/q/37135193/1372424
const VARNAM_LEARNT_WORD_MIN_WEIGHT = 30
VARNAM_LEARNT_WORD_MIN_WEIGHT Minimum weight/confidence for learnt words.
const VARNAM_MATCH_ALL = 3
const VARNAM_MATCH_EXACT = 1
Pattern matching
const VARNAM_MATCH_POSSIBILITY = 2
const VARNAM_METADATA_SCHEME_AUTHOR = "scheme-author"
const VARNAM_METADATA_SCHEME_COMPILED_DATE = "scheme-compiled-date"
const VARNAM_METADATA_SCHEME_DISPLAY_NAME = "scheme-display-name"
const VARNAM_METADATA_SCHEME_IDENTIFIER = "scheme-id"
const VARNAM_METADATA_SCHEME_LANGUAGE_CODE = "lang-code"
const VARNAM_METADATA_SCHEME_STABLE = "scheme-stable"
const VARNAM_SCHEMA_SYMBOLS_VERSION = 20211101
const VARNAM_SYMBOL_ANUSVARA = 7
const VARNAM_SYMBOL_CONSONANT = 2
const VARNAM_SYMBOL_CONSONANT_VOWEL = 4
const VARNAM_SYMBOL_DEAD_CONSONANT = 3
const VARNAM_SYMBOL_FLAGS_MORE_MATCHES_FOR_PATTERN = (1 << 0)
const VARNAM_SYMBOL_FLAGS_MORE_MATCHES_FOR_VALUE = (1 << 1)
const VARNAM_SYMBOL_JOINER = 12
const VARNAM_SYMBOL_MAX = 30
VST creation
VARNAM_SYMBOL_MAX maximum length of VST column value
const VARNAM_SYMBOL_NON_JOINER = 11
const VARNAM_SYMBOL_NUMBER = 5
const VARNAM_SYMBOL_OTHER = 10
const VARNAM_SYMBOL_PERIOD = 13
const VARNAM_SYMBOL_SYMBOL = 6
const VARNAM_SYMBOL_VIRAMA = 9
const VARNAM_SYMBOL_VISARGA = 8
const VARNAM_SYMBOL_VOWEL = 1
Available type of symbol tokens
const VARNAM_TOKEN_ACCEPT_ALL = 0
Token acceptance rules
const VARNAM_TOKEN_ACCEPT_IF_ENDS_WITH = 3
const VARNAM_TOKEN_ACCEPT_IF_IN_BETWEEN = 2
const VARNAM_TOKEN_ACCEPT_IF_STARTS_WITH = 1
const VARNAM_TOKEN_BASIC_WEIGHT = 10
A symbol token's maximum possible weight value
const VARNAM_TOKEN_CHAR = 1 // Non-lang characters like A, B, 1, * etc.
Type of tokens
const VARNAM_TOKEN_SYMBOL = 2 // Lang characters
const ZWJ = "\u200d"
const ZWNJ = "\u200c"
General
Variables ¶
var ( BuildString string VersionString string )
Compile-time variables.
var LOG_TIME_TAKEN = os.Getenv("GOVARNAM_LOG_TIME_TAKEN") != ""
var VARNAM_LEARNINGS_DIR = os.Getenv("VARNAM_LEARNINGS_DIR")
var VARNAM_VST_DIR = os.Getenv("VARNAM_VST_DIR")
Functions ¶
func GetAllSchemePaths ¶
GetAllSchemePaths get available IDs' location as a string array
func SetLearningsDir ¶ added in v1.7.0
func SetLearningsDir(path string)
SetVSTLookupDir This overrides the environment variable
func SetVSTLookupDir ¶ added in v1.7.0
func SetVSTLookupDir(path string)
SetVSTLookupDir This overrides the environment variable
Types ¶
type DictionaryResult ¶
type DictionaryResult struct {
// contains filtered or unexported fields
}
DictionaryResult result from dictionary search
type LangRules ¶
type LangRules struct { Virama string IndicDigits bool PatternLongestLength int // Longest length of pattern in VST UnicodeBlock unicode.RangeTable }
LangRules language reulated config
type LearnStatus ¶
LearnStatus output of bulk learn
type MoreDictionaryResult ¶ added in v1.8.0
type MoreDictionaryResult struct {
// contains filtered or unexported fields
}
MoreDictionaryResult result from dictionary search
type PatternDictionarySuggestion ¶
type PatternDictionarySuggestion struct { Sug Suggestion Length int }
PatternDictionarySuggestion longest match result
type SchemeDetails ¶
type SchemeDetails struct { Identifier string LangCode string DisplayName string Author string CompiledDate string IsStable bool }
SchemeDetails of VST
func GetAllSchemeDetails ¶
func GetAllSchemeDetails() ([]SchemeDetails, error)
GetAllSchemeDetails get information of all schemes available
type Suggestion ¶
Suggestion suggestion
func SortSuggestions ¶
func SortSuggestions(sugs []Suggestion) []Suggestion
SortSuggestions by weight and learned on time
type Symbol ¶
type Symbol struct { Identifier int Type int MatchType int Pattern string Value1 string Value2 string Value3 string Tag string Weight int Priority int AcceptCondition int Flags int }
Symbol result from VST
func NewSearchSymbol ¶ added in v1.7.2
func NewSearchSymbol() Symbol
NewSearchSymbol a constructor for making Symbol. We're doing this because default int value in go structs is 0. This won't work with searching because fields can have 0 value. https://stackoverflow.com/q/37135193/137242
type Token ¶
type Token struct {
// contains filtered or unexported fields
}
Token info for making a suggestion
type TransliterationResult ¶
type TransliterationResult struct { // Exactly found words in dictionary if there is any. // From both patterns and normal dict ExactWords []Suggestion // Exactly starting word matches in dictionary if there is any. // Not applicable for patterns dictionary. ExactMatches []Suggestion // Possible word suggestions from dictionary DictionarySuggestions []Suggestion // Possible words matching from patterns dictionary PatternDictionarySuggestions []Suggestion // All possible matches from tokenizer (VARNAM_MATCH_ALL) // Has a limit. The first few results will be VARNAM_MATCH_EXACT. // This will only be filled if there are no exact matches. // Related: See Config.TokenizerSuggestionsAlways TokenizerSuggestions []Suggestion // VARNAM_MATCH_EXACT results from tokenizer. // No limit, mostly gives 1 or less than 3 outputs GreedyTokenized []Suggestion }
TransliterationResult result
type VSTMakerConfig ¶ added in v1.7.1
type Varnam ¶
type Varnam struct { VSTPath string DictPath string LangRules LangRules SchemeDetails SchemeDetails Debug bool PatternWordPartializers []func(*Suggestion) // Maximum suggestions to obtain from dictionary DictionarySuggestionsLimit int // Maximum suggestions to obtain from patterns dictionary PatternDictionarySuggestionsLimit int // Maximum suggestions to be made from tokenizer TokenizerSuggestionsLimit int // Always include tokenizer made suggestions. // Tokenizer results are not exactly the best, but it's alright TokenizerSuggestionsAlways bool // Whether only exact scheme match should be considered // for dictionary search and discard possibility matches DictionaryMatchExact bool VSTMakerConfig VSTMakerConfig // contains filtered or unexported fields }
Varnam config
func InitFromID ¶
InitFromID Init from ID. Scheme ID doesn't necessarily be a language code
func (*Varnam) GetRecentlyLearntWords ¶ added in v1.5.0
func (varnam *Varnam) GetRecentlyLearntWords(ctx context.Context, offset int, limit int) ([]Suggestion, error)
GetRecentlyLearntWords get recently learnt words
func (*Varnam) GetSuggestions ¶ added in v1.6.0
func (varnam *Varnam) GetSuggestions(ctx context.Context, word string) []Suggestion
GetSuggestions get word suggestions from dictionary
func (*Varnam) LearnFromFile ¶
func (varnam *Varnam) LearnFromFile(filePath string) (LearnStatus, error)
LearnFromFile Learn all words in a file
func (*Varnam) LearnMany ¶
func (varnam *Varnam) LearnMany(words []WordInfo) (LearnStatus, error)
LearnMany words in bulk. Faster learning
func (*Varnam) ReIndexDictionary ¶ added in v1.9.0
ReIndexDictionary re-indexes dictionary
func (*Varnam) RegisterPatternWordPartializer ¶
func (varnam *Varnam) RegisterPatternWordPartializer(cb func(*Suggestion))
RegisterPatternWordPartializer A word partializer remove word ending with proper alternative so that the word can be tokenized further. Useful for malayalam to replace last chil letter with its root
func (*Varnam) ReverseTransliterate ¶
func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error)
ReverseTransliterate do a reverse transliteration
func (*Varnam) SearchSymbolTable ¶
func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error)
SearchSymbolTable For searching symbol table
func (*Varnam) TrainFromFile ¶
func (varnam *Varnam) TrainFromFile(filePath string) (LearnStatus, error)
TrainFromFile Train words with a particular pattern in bulk
func (*Varnam) Transliterate ¶
func (varnam *Varnam) Transliterate(word string) []Suggestion
Transliterate transliterate with output array
func (*Varnam) TransliterateAdvanced ¶ added in v1.7.0
func (varnam *Varnam) TransliterateAdvanced(word string) TransliterationResult
TransliterateAdvanced transliterate with a detailed structure as result
func (*Varnam) TransliterateAdvancedWithContext ¶ added in v1.7.0
func (varnam *Varnam) TransliterateAdvancedWithContext(ctx context.Context, word string, resultChannel chan<- TransliterationResult)
TransliterateAdvancedWithContext transliterate with a detailed structure as result Go context
func (*Varnam) TransliterateGreedyTokenized ¶ added in v1.6.0
func (varnam *Varnam) TransliterateGreedyTokenized(word string) []Suggestion
TransliterateGreedyTokenized transliterate word, only tokenizer results
func (*Varnam) TransliterateWithContext ¶
func (varnam *Varnam) TransliterateWithContext(ctx context.Context, word string, resultChannel chan<- []Suggestion)
TransliterateWithContext Transliterate but with Go context
func (*Varnam) VMCreateToken ¶ added in v1.7.1
func (varnam *Varnam) VMCreateToken(pattern string, value1 string, value2 string, value3 string, tag string, symbolType int, matchType int, priority int, acceptCondition int, buffered bool) error
VMCreateToken Create Token
func (*Varnam) VMDeleteToken ¶ added in v1.8.1
VMDeleteToken Removes a token from VST
func (*Varnam) VMFlushBuffer ¶ added in v1.7.1
VMFlushBuffer flush
func (*Varnam) VMSetSchemeDetails ¶ added in v1.7.1
func (varnam *Varnam) VMSetSchemeDetails(sd SchemeDetails) error
VMSetSchemeDetails set scheme details