Documentation ¶
Index ¶
- Constants
- Variables
- func FindVSTDir() (string, error)
- func GetAllSchemePaths() ([]string, error)
- type DictionaryResult
- type LangRules
- type LearnStatus
- type PatternDictionarySuggestion
- type SchemeDetails
- type Suggestion
- type Symbol
- type Token
- type TransliterationResult
- type Varnam
- func (varnam *Varnam) Close() error
- func (varnam *Varnam) Export(filePath string) error
- func (varnam *Varnam) Import(filePath string) error
- func (varnam *Varnam) InitDict(dictPath string) error
- func (varnam *Varnam) InitVST(vstPath string) error
- func (varnam *Varnam) Learn(word string, weight int) error
- func (varnam *Varnam) LearnFromFile(filePath string) (LearnStatus, error)
- func (varnam *Varnam) LearnMany(words []WordInfo) (LearnStatus, error)
- func (varnam *Varnam) RegisterPatternWordPartializer(cb func(*Suggestion))
- func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error)
- func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error)
- func (varnam *Varnam) Train(pattern string, word string) error
- func (varnam *Varnam) TrainFromFile(filePath string) error
- func (varnam *Varnam) Transliterate(word string) TransliterationResult
- func (varnam *Varnam) TransliterateGreedy(word string) TransliterationResult
- func (varnam *Varnam) TransliterateWithContext(ctx context.Context, word string, resultChannel chan<- TransliterationResult)
- func (varnam *Varnam) Unlearn(word string) error
- type WordInfo
Constants ¶
const CHIL_TAG = "chill"
const VARNAM_LEARNT_WORD_MIN_WEIGHT = 30
VARNAM_LEARNT_WORD_MIN_WEIGHT Minimum weight/confidence for learnt words.
const VARNAM_MATCH_ALL = 3
const VARNAM_MATCH_EXACT = 1
Pattern matching
const VARNAM_MATCH_POSSIBILITY = 2
const VARNAM_SYMBOL_ANUSVARA = 7
const VARNAM_SYMBOL_CONSONANT = 2
const VARNAM_SYMBOL_CONSONANT_VOWEL = 4
const VARNAM_SYMBOL_DEAD_CONSONANT = 3
const VARNAM_SYMBOL_JOINER = 12
const VARNAM_SYMBOL_NON_JOINER = 11
const VARNAM_SYMBOL_NUMBER = 5
const VARNAM_SYMBOL_OTHER = 10
const VARNAM_SYMBOL_PERIOD = 13
const VARNAM_SYMBOL_SYMBOL = 6
const VARNAM_SYMBOL_VIRAMA = 9
const VARNAM_SYMBOL_VISARGA = 8
const VARNAM_SYMBOL_VOWEL = 1
Available type of symbol tokens
const VARNAM_TOKEN_ACCEPT_ALL = 0
Token acceptance rules
const VARNAM_TOKEN_ACCEPT_IF_ENDS_WITH = 3
const VARNAM_TOKEN_ACCEPT_IF_IN_BETWEEN = 2
const VARNAM_TOKEN_ACCEPT_IF_STARTS_WITH = 1
const VARNAM_TOKEN_BASIC_WEIGHT = 10
A symbol token's maximum possible weight value
const VARNAM_TOKEN_CHAR = 1 // Non-lang characters like A, B, 1, * etc.
Type of tokens
const VARNAM_TOKEN_SYMBOL = 2 // Lang characters
const ZWJ = "\u200d"
const ZWNJ = "\u200c"
General
Variables ¶
var LOG_TIME_TAKEN = os.Getenv("GOVARNAM_LOG_TIME_TAKEN") != ""
var VARNAM_VST_DIR = [2]string{
"schemes",
"/usr/local/share/varnam/vstDEV"}
VARNAM_VST_DIR VST lookiup directories according to priority
Functions ¶
func FindVSTDir ¶ added in v1.3.0
FindVSTDir Get the VST storing directory
func GetAllSchemePaths ¶ added in v1.3.0
GetAllSchemePaths get available IDs' location as a string array
Types ¶
type DictionaryResult ¶
type DictionaryResult struct {
// contains filtered or unexported fields
}
DictionaryResult result from dictionary search
type LangRules ¶
type LangRules struct { Virama string IndicDigits bool PatternLongestLength int // Longest length of pattern in VST }
LangRules language reulated config
type LearnStatus ¶ added in v1.3.0
LearnStatus output of bulk learn
type PatternDictionarySuggestion ¶
type PatternDictionarySuggestion struct { Sug Suggestion Length int }
PatternDictionarySuggestion longest match result
type SchemeDetails ¶ added in v1.3.0
type SchemeDetails struct { Identifier string LangCode string DisplayName string Author string CompiledDate string IsStable bool }
SchemeDetails of VST
func GetAllSchemeDetails ¶ added in v1.3.0
func GetAllSchemeDetails() ([]SchemeDetails, error)
GetAllSchemeDetails get information of all schemes available
type Suggestion ¶
Suggestion suggestion
func SortSuggestions ¶ added in v1.1.0
func SortSuggestions(sugs []Suggestion) []Suggestion
SortSuggestions by weight and learned on time
type Symbol ¶
type Symbol struct { Identifier int Type int MatchType int Pattern string Value1 string Value2 string Value3 string Tag string Weight int Priority int AcceptCondition int Flags int }
Symbol result from VST
type Token ¶
type Token struct {
// contains filtered or unexported fields
}
Token info for making a suggestion
type TransliterationResult ¶
type TransliterationResult struct { // Exact matches found in dictionary if any // From both patterns and normal dict ExactMatches []Suggestion // Possible words matching from dictionary DictionarySuggestions []Suggestion // Possible words matching from patterns dictionary PatternDictionarySuggestions []Suggestion // All possible matches from tokenizer (VARNAM_MATCH_ALL) // Has a limit. The first few results will be VARNAM_MATCH_EXACT. // This will only be filled if there are no exact matches. // Related: See Config.TokenizerSuggestionsAlways TokenizerSuggestions []Suggestion // VARNAM_MATCH_EXACT results from tokenizer. // No limit, mostly gives 1 or less than 3 outputs GreedyTokenized []Suggestion }
TransliterationResult result
type Varnam ¶
type Varnam struct { VSTPath string DictPath string LangRules LangRules SchemeDetails SchemeDetails Debug bool PatternWordPartializers []func(*Suggestion) // Maximum suggestions to obtain from dictionary DictionarySuggestionsLimit int // Maximum suggestions to obtain from patterns dictionary PatternDictionarySuggestionsLimit int // Maximum suggestions to be made from tokenizer TokenizerSuggestionsLimit int // Always include tokenizer made suggestions. // Tokenizer results are not exactly the best, but it's alright TokenizerSuggestionsAlways bool // contains filtered or unexported fields }
Varnam config
func InitFromID ¶
InitFromID Init from ID. Scheme ID doesn't necessarily be a language code
func (*Varnam) LearnFromFile ¶
func (varnam *Varnam) LearnFromFile(filePath string) (LearnStatus, error)
LearnFromFile Learn all words in a file
func (*Varnam) LearnMany ¶ added in v1.3.0
func (varnam *Varnam) LearnMany(words []WordInfo) (LearnStatus, error)
LearnMany words in bulk. Faster learning
func (*Varnam) RegisterPatternWordPartializer ¶ added in v1.3.0
func (varnam *Varnam) RegisterPatternWordPartializer(cb func(*Suggestion))
RegisterPatternWordPartializer A word partializer remove word ending with proper alternative so that the word can be tokenized further. Useful for malayalam to replace last chil letter with its root
func (*Varnam) ReverseTransliterate ¶ added in v1.2.0
func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error)
ReverseTransliterate do a reverse transliteration
func (*Varnam) SearchSymbolTable ¶ added in v1.3.0
func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error)
SearchSymbolTable For searching symbol table
func (*Varnam) TrainFromFile ¶
TrainFromFile Train words with a particular pattern in bulk
func (*Varnam) Transliterate ¶
func (varnam *Varnam) Transliterate(word string) TransliterationResult
Transliterate a word with all possibilities as results
func (*Varnam) TransliterateGreedy ¶
func (varnam *Varnam) TransliterateGreedy(word string) TransliterationResult
TransliterateGreedy transliterate word without all possible suggestions in result
func (*Varnam) TransliterateWithContext ¶
func (varnam *Varnam) TransliterateWithContext(ctx context.Context, word string, resultChannel chan<- TransliterationResult)
TransliterateWithContext Use Go context