govarnam

package
v1.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 14, 2021 License: AGPL-3.0 Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const CHIL_TAG = "chill"
View Source
const VARNAM_LEARNT_WORD_MIN_WEIGHT = 30

VARNAM_LEARNT_WORD_MIN_WEIGHT Minimum weight/confidence for learnt words.

View Source
const VARNAM_MATCH_ALL = 3
View Source
const VARNAM_MATCH_EXACT = 1

Pattern matching

View Source
const VARNAM_MATCH_POSSIBILITY = 2
View Source
const VARNAM_SYMBOL_ANUSVARA = 7
View Source
const VARNAM_SYMBOL_CONSONANT = 2
View Source
const VARNAM_SYMBOL_CONSONANT_VOWEL = 4
View Source
const VARNAM_SYMBOL_DEAD_CONSONANT = 3
View Source
const VARNAM_SYMBOL_JOINER = 12
View Source
const VARNAM_SYMBOL_NON_JOINER = 11
View Source
const VARNAM_SYMBOL_NUMBER = 5
View Source
const VARNAM_SYMBOL_OTHER = 10
View Source
const VARNAM_SYMBOL_PERIOD = 13
View Source
const VARNAM_SYMBOL_SYMBOL = 6
View Source
const VARNAM_SYMBOL_VIRAMA = 9
View Source
const VARNAM_SYMBOL_VISARGA = 8
View Source
const VARNAM_SYMBOL_VOWEL = 1

Available type of symbol tokens

View Source
const VARNAM_TOKEN_ACCEPT_ALL = 0

Token acceptance rules

View Source
const VARNAM_TOKEN_ACCEPT_IF_ENDS_WITH = 3
View Source
const VARNAM_TOKEN_ACCEPT_IF_IN_BETWEEN = 2
View Source
const VARNAM_TOKEN_ACCEPT_IF_STARTS_WITH = 1
View Source
const VARNAM_TOKEN_BASIC_WEIGHT = 10

A symbol token's maximum possible weight value

View Source
const VARNAM_TOKEN_CHAR = 1 // Non-lang characters like A, B, 1, * etc.

Type of tokens

View Source
const VARNAM_TOKEN_SYMBOL = 2 // Lang characters
View Source
const ZWJ = "\u200d"
View Source
const ZWNJ = "\u200c"

General

Variables

View Source
var LOG_TIME_TAKEN = os.Getenv("GOVARNAM_LOG_TIME_TAKEN") != ""
View Source
var VARNAM_VST_DIR = [2]string{

	"schemes",
	"/usr/local/share/varnam/vstDEV"}

VARNAM_VST_DIR VST lookiup directories according to priority

Functions

func FindVSTDir added in v1.3.0

func FindVSTDir() (string, error)

FindVSTDir Get the VST storing directory

func GetAllSchemePaths added in v1.3.0

func GetAllSchemePaths() ([]string, error)

GetAllSchemePaths get available IDs' location as a string array

Types

type DictionaryResult

type DictionaryResult struct {
	// contains filtered or unexported fields
}

DictionaryResult result from dictionary search

type LangRules

type LangRules struct {
	Virama               string
	IndicDigits          bool
	PatternLongestLength int // Longest length of pattern in VST
}

LangRules language reulated config

type LearnStatus added in v1.3.0

type LearnStatus struct {
	TotalWords  int
	FailedWords int
}

LearnStatus output of bulk learn

type PatternDictionarySuggestion

type PatternDictionarySuggestion struct {
	Sug    Suggestion
	Length int
}

PatternDictionarySuggestion longest match result

type SchemeDetails added in v1.3.0

type SchemeDetails struct {
	Identifier   string
	LangCode     string
	DisplayName  string
	Author       string
	CompiledDate string
	IsStable     bool
}

SchemeDetails of VST

func GetAllSchemeDetails added in v1.3.0

func GetAllSchemeDetails() ([]SchemeDetails, error)

GetAllSchemeDetails get information of all schemes available

type Suggestion

type Suggestion struct {
	Word      string
	Weight    int
	LearnedOn int
}

Suggestion suggestion

func SortSuggestions added in v1.1.0

func SortSuggestions(sugs []Suggestion) []Suggestion

SortSuggestions by weight and learned on time

type Symbol

type Symbol struct {
	Identifier      int
	Type            int
	MatchType       int
	Pattern         string
	Value1          string
	Value2          string
	Value3          string
	Tag             string
	Weight          int
	Priority        int
	AcceptCondition int
	Flags           int
}

Symbol result from VST

type Token

type Token struct {
	// contains filtered or unexported fields
}

Token info for making a suggestion

type TransliterationResult

type TransliterationResult struct {
	// Exact matches found in dictionary if any
	// From both patterns and normal dict
	ExactMatches []Suggestion

	// Possible words matching from dictionary
	DictionarySuggestions []Suggestion

	// Possible words matching from patterns dictionary
	PatternDictionarySuggestions []Suggestion

	// All possible matches from tokenizer (VARNAM_MATCH_ALL)
	// Has a limit. The first few results will be VARNAM_MATCH_EXACT.
	// This will only be filled if there are no exact matches.
	// Related: See Config.TokenizerSuggestionsAlways
	TokenizerSuggestions []Suggestion

	// VARNAM_MATCH_EXACT results from tokenizer.
	// No limit, mostly gives 1 or less than 3 outputs
	GreedyTokenized []Suggestion
}

TransliterationResult result

type Varnam

type Varnam struct {
	VSTPath  string
	DictPath string

	LangRules     LangRules
	SchemeDetails SchemeDetails
	Debug         bool

	PatternWordPartializers []func(*Suggestion)

	// Maximum suggestions to obtain from dictionary
	DictionarySuggestionsLimit int

	// Maximum suggestions to obtain from patterns dictionary
	PatternDictionarySuggestionsLimit int

	// Maximum suggestions to be made from tokenizer
	TokenizerSuggestionsLimit int

	// Always include tokenizer made suggestions.
	// Tokenizer results are not exactly the best, but it's alright
	TokenizerSuggestionsAlways bool
	// contains filtered or unexported fields
}

Varnam config

func Init

func Init(vstPath string, dictPath string) (*Varnam, error)

Init Initialize varnam. Dictionary will be created if it doesn't exist

func InitFromID

func InitFromID(schemeID string) (*Varnam, error)

InitFromID Init from ID. Scheme ID doesn't necessarily be a language code

func (*Varnam) Close

func (varnam *Varnam) Close() error

Close close db connections

func (*Varnam) Export added in v1.1.0

func (varnam *Varnam) Export(filePath string) error

Export learnings as JSON to a file

func (*Varnam) Import added in v1.1.0

func (varnam *Varnam) Import(filePath string) error

Import learnings from file

func (*Varnam) InitDict added in v1.2.0

func (varnam *Varnam) InitDict(dictPath string) error

InitDict open connection to dictionary

func (*Varnam) InitVST added in v1.2.0

func (varnam *Varnam) InitVST(vstPath string) error

InitVST initialize

func (*Varnam) Learn

func (varnam *Varnam) Learn(word string, weight int) error

Learn a word. If already exist, increases weight

func (*Varnam) LearnFromFile

func (varnam *Varnam) LearnFromFile(filePath string) (LearnStatus, error)

LearnFromFile Learn all words in a file

func (*Varnam) LearnMany added in v1.3.0

func (varnam *Varnam) LearnMany(words []WordInfo) (LearnStatus, error)

LearnMany words in bulk. Faster learning

func (*Varnam) RegisterPatternWordPartializer added in v1.3.0

func (varnam *Varnam) RegisterPatternWordPartializer(cb func(*Suggestion))

RegisterPatternWordPartializer A word partializer remove word ending with proper alternative so that the word can be tokenized further. Useful for malayalam to replace last chil letter with its root

func (*Varnam) ReverseTransliterate added in v1.2.0

func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error)

ReverseTransliterate do a reverse transliteration

func (*Varnam) SearchSymbolTable added in v1.3.0

func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error)

SearchSymbolTable For searching symbol table

func (*Varnam) Train

func (varnam *Varnam) Train(pattern string, word string) error

Train a word with a particular pattern. Pattern => word

func (*Varnam) TrainFromFile

func (varnam *Varnam) TrainFromFile(filePath string) error

TrainFromFile Train words with a particular pattern in bulk

func (*Varnam) Transliterate

func (varnam *Varnam) Transliterate(word string) TransliterationResult

Transliterate a word with all possibilities as results

func (*Varnam) TransliterateGreedy

func (varnam *Varnam) TransliterateGreedy(word string) TransliterationResult

TransliterateGreedy transliterate word without all possible suggestions in result

func (*Varnam) TransliterateWithContext

func (varnam *Varnam) TransliterateWithContext(ctx context.Context, word string, resultChannel chan<- TransliterationResult)

TransliterateWithContext Use Go context

func (*Varnam) Unlearn

func (varnam *Varnam) Unlearn(word string) error

Unlearn a word, remove from words DB and pattern if there is

type WordInfo

type WordInfo struct {
	// contains filtered or unexported fields
}

WordInfo represent a item in words table

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL