bamboo

package module
v0.0.0-...-f0ca534 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 21, 2022 License: MIT Imports: 3 Imported by: 3

README

Text processing library for Vietnamese

License

The MIT License (MIT) Copyright (C) 2018 Luong Thanh Lam

Documentation

Overview

Package bamboo implements text processing for Vietnamese

Index

Constants

View Source
const (
	EfreeToneMarking uint = 1 << iota
	EstdToneStyle
	EautoCorrectEnabled
	EstdFlags = EfreeToneMarking | EstdToneStyle | EautoCorrectEnabled
)
View Source
const UNICODE = "Unicode"

Variables

View Source
var InputMethodDefinitions = map[string]InputMethodDefinition{
	"Telex": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ",
		"d": "D_Đ",
	},
	"VNI": {
		"0": "XoaDauThanh",
		"1": "DauSac",
		"2": "DauHuyen",
		"3": "DauHoi",
		"4": "DauNga",
		"5": "DauNang",
		"6": "AEO_ÂÊÔ",
		"7": "UO_ƯƠ",
		"8": "A_Ă",
		"9": "D_Đ",
	},
	"VIQR": {
		"0": "XoaDauThanh",
		"'": "DauSac",
		"`": "DauHuyen",
		"?": "DauHoi",
		"~": "DauNga",
		".": "DauNang",
		"^": "AEO_ÂÊÔ",
		"+": "UO_ƯƠ",
		"*": "UO_ƯƠ",
		"(": "A_Ă",
		"d": "D_Đ",
	},
	"Microsoft layout": {
		"8": "DauSac",
		"5": "DauHuyen",
		"6": "DauHoi",
		"7": "DauNga",
		"9": "DauNang",
		"1": "__ă",
		"!": "_Ă",
		"2": "__â",
		"@": "_Â",
		"3": "__ê",
		"#": "_Ê",
		"4": "__ô",
		"$": "_Ô",
		"0": "__đ",
		")": "_Đ",
		"[": "__ư",
		"{": "_Ư",
		"]": "__ơ",
		"}": "_Ơ",
	},
	"Telex 2": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ__Ư",
		"d": "D_Đ",
		"]": "__ư",
		"[": "__ơ",
		"}": "_Ư",
		"{": "_Ơ",
	},
	"Telex + VNI": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ",
		"d": "D_Đ",
		"0": "XoaDauThanh",
		"1": "DauSac",
		"2": "DauHuyen",
		"3": "DauHoi",
		"4": "DauNga",
		"5": "DauNang",
		"6": "AEO_ÂÊÔ",
		"7": "UO_ƯƠ",
		"8": "A_Ă",
		"9": "D_Đ",
	},
	"Telex + VNI + VIQR": {
		"z":  "XoaDauThanh",
		"s":  "DauSac",
		"f":  "DauHuyen",
		"r":  "DauHoi",
		"x":  "DauNga",
		"j":  "DauNang",
		"a":  "A_Â",
		"e":  "E_Ê",
		"o":  "O_Ô",
		"w":  "UOA_ƯƠĂ",
		"d":  "D_Đ",
		"0":  "XoaDauThanh",
		"1":  "DauSac",
		"2":  "DauHuyen",
		"3":  "DauHoi",
		"4":  "DauNga",
		"5":  "DauNang",
		"6":  "AEO_ÂÊÔ",
		"7":  "UO_ƯƠ",
		"8":  "A_Ă",
		"9":  "D_Đ",
		"'":  "DauSac",
		"`":  "DauHuyen",
		"?":  "DauHoi",
		"~":  "DauNga",
		".":  "DauNang",
		"^":  "AEO_ÂÊÔ",
		"+":  "UO_ƯƠ",
		"*":  "UO_ƯƠ",
		"(":  "A_Ă",
		"\\": "D_Đ",
	},
	"VNI Bàn phím tiếng Pháp": {
		"&":  "XoaDauThanh",
		"é":  "DauSac",
		"\"": "DauHuyen",
		"'":  "DauHoi",
		"(":  "DauNga",
		"-":  "DauNang",
		"è":  "AEO_ÂÊÔ",
		"_":  "UO_ƯƠ",
		"ç":  "A_Ă",
		"à":  "D_Đ",
	},
	"Telex W": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ__Ư",
		"d": "D_Đ",
	},
}
View Source
var PunctuationMarks = []rune{
	',', ';', ':', '.', '"', '\'', '!', '?', ' ',
	'<', '>', '=', '+', '-', '*', '/', '\\',
	'_', '~', '`', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']',
	'|',
}
View Source
var Vowels = []rune("aàáảãạăằắẳẵặâầấẩẫậeèéẻẽẹêềếểễệiìíỉĩịoòóỏõọôồốổỗộơờớởỡợuùúủũụưừứửữựyỳýỷỹỵ")

Functions

func AddMarkToChar

func AddMarkToChar(chr rune, mark uint8) rune

func AddMarkToTonelessChar

func AddMarkToTonelessChar(chr rune, mark uint8) rune

func AddToneToChar

func AddToneToChar(chr rune, tone uint8) rune

func Encode

func Encode(charsetName string, input string) string

func FindMarkPosition

func FindMarkPosition(chr rune) int

func FindVowelPosition

func FindVowelPosition(chr rune) int

func Flatten

func Flatten(composition []*Transformation, mode Mode) string

func GetCharsetNames

func GetCharsetNames() []string

func GetInputMethodDefinitions

func GetInputMethodDefinitions() map[string]InputMethodDefinition

func HasAnyVietnameseRune

func HasAnyVietnameseRune(word string) bool

func HasAnyVietnameseVower

func HasAnyVietnameseVower(word string) bool

func IsAlpha

func IsAlpha(c rune) bool

func IsPunctuationMark

func IsPunctuationMark(key rune) bool

func IsSpace

func IsSpace(key rune) bool

func IsVietnameseRune

func IsVietnameseRune(lowerKey rune) bool

func IsVowel

func IsVowel(chr rune) bool

func IsWordBreakSymbol

func IsWordBreakSymbol(key rune) bool

Types

type BambooEngine

type BambooEngine struct {
	// contains filtered or unexported fields
}

func (*BambooEngine) CanProcessKey

func (e *BambooEngine) CanProcessKey(key rune) bool

func (*BambooEngine) GetFlag

func (e *BambooEngine) GetFlag(flag uint) uint

func (*BambooEngine) GetInputMethod

func (e *BambooEngine) GetInputMethod() InputMethod

func (*BambooEngine) GetProcessedString

func (e *BambooEngine) GetProcessedString(mode Mode) string

func (*BambooEngine) IsValid

func (e *BambooEngine) IsValid(inputIsFullComplete bool) bool

func (*BambooEngine) ProcessKey

func (e *BambooEngine) ProcessKey(key rune, mode Mode)

func (*BambooEngine) ProcessString

func (e *BambooEngine) ProcessString(str string, mode Mode)

func (*BambooEngine) RemoveLastChar

func (e *BambooEngine) RemoveLastChar(refreshLastToneTarget bool)

Find the last APPENDING transformation and all the transformations that add effects to it.

func (*BambooEngine) Reset

func (e *BambooEngine) Reset()

func (*BambooEngine) RestoreLastWord

func (e *BambooEngine) RestoreLastWord(toVietnamese bool)

func (*BambooEngine) SetFlag

func (e *BambooEngine) SetFlag(flag uint)

type EffectType

type EffectType int
const (
	Appending          EffectType = iota << 0
	MarkTransformation EffectType = iota
	ToneTransformation EffectType = iota
	Replacing          EffectType = iota
)

type IEngine

type IEngine interface {
	SetFlag(uint)
	GetInputMethod() InputMethod
	ProcessKey(rune, Mode)
	ProcessString(string, Mode)
	GetProcessedString(Mode) string
	IsValid(bool) bool
	CanProcessKey(rune) bool
	RemoveLastChar(bool)
	RestoreLastWord(bool)
	Reset()
}

func NewEngine

func NewEngine(inputMethod InputMethod, flag uint) IEngine

type InputMethod

type InputMethod struct {
	Name          string
	Rules         []Rule
	SuperKeys     []rune
	ToneKeys      []rune
	AppendingKeys []rune
	Keys          []rune
}

func ParseInputMethod

func ParseInputMethod(imDef map[string]InputMethodDefinition, imName string) InputMethod

type InputMethodDefinition

type InputMethodDefinition map[string]string

type Mark

type Mark uint8

type alias

const (
	MarkNone  Mark = iota << 0
	MarkHat   Mark = iota
	MarkBreve Mark = iota
	MarkHorn  Mark = iota
	MarkDash  Mark = iota
	MarkRaw   Mark = iota
)

func FindMarkFromChar

func FindMarkFromChar(chr rune) (Mark, bool)

type Mode

type Mode uint
const (
	VietnameseMode Mode = 1 << iota
	EnglishMode
	ToneLess
	MarkLess
	LowerCase
	FullText
	PunctuationMode
	InReverseOrder
)

type Rule

type Rule struct {
	Key           rune
	Effect        uint8 // (Tone, Mark)
	EffectType    EffectType
	EffectOn      rune
	Result        rune
	AppendedRules []Rule
}

func ParseRules

func ParseRules(key rune, line string) []Rule

func ParseToneLessRule

func ParseToneLessRule(key, effectiveOn, result rune, effect Mark) []Rule

func ParseTonelessRules

func ParseTonelessRules(key rune, line string) []Rule

func (*Rule) GetMark

func (r *Rule) GetMark() Mark

func (*Rule) GetTone

func (r *Rule) GetTone() Tone

func (*Rule) SetMark

func (r *Rule) SetMark(mark Mark)

func (*Rule) SetTone

func (r *Rule) SetTone(tone Tone)

type Tone

type Tone uint8
const (
	ToneNone  Tone = iota << 0
	ToneGrave Tone = iota
	ToneAcute Tone = iota
	ToneHook  Tone = iota
	ToneTilde Tone = iota
	ToneDot   Tone = iota
)

func FindToneFromChar

func FindToneFromChar(chr rune) Tone

type Transformation

type Transformation struct {
	Rule        Rule
	Target      *Transformation
	IsUpperCase bool
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL