bamboo

package
v0.7.9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 13, 2021 License: GPL-3.0, MIT Imports: 3 Imported by: 0

README

Text processing library for Vietnamese

License

The MIT License (MIT) Copyright (C) 2018 Luong Thanh Lam

Documentation

Overview

Package bamboo implements text processing for Vietnamese

Index

Constants

View Source
const (
	EfreeToneMarking uint = 1 << iota
	EstdToneStyle
	EautoCorrectEnabled
	EstdFlags = EfreeToneMarking | EstdToneStyle | EautoCorrectEnabled
)
View Source
const UNICODE = "Unicode"

Variables

View Source
var InputMethodDefinitions = map[string]InputMethodDefinition{
	"Telex": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ",
		"d": "D_Đ",
	},
	"VNI": {
		"0": "XoaDauThanh",
		"1": "DauSac",
		"2": "DauHuyen",
		"3": "DauHoi",
		"4": "DauNga",
		"5": "DauNang",
		"6": "AEO_ÂÊÔ",
		"7": "UO_ƯƠ",
		"8": "A_Ă",
		"9": "D_Đ",
	},
	"VIQR": {
		"0":  "XoaDauThanh",
		"'":  "DauSac",
		"`":  "DauHuyen",
		"?":  "DauHoi",
		"~":  "DauNga",
		".":  "DauNang",
		"^":  "AEO_ÂÊÔ",
		"+":  "UO_ƯƠ",
		"*":  "UO_ƯƠ",
		"(":  "A_Ă",
		"\\": "D_Đ",
	},
	"Microsoft layout": {
		"8": "DauSac",
		"5": "DauHuyen",
		"6": "DauHoi",
		"7": "DauNga",
		"9": "DauNang",
		"1": "__ă",
		"!": "_Ă",
		"2": "__â",
		"@": "_Â",
		"3": "__ê",
		"#": "_Ê",
		"4": "__ô",
		"$": "_Ô",
		"0": "__đ",
		")": "_Đ",
		"[": "__ư",
		"{": "_Ư",
		"]": "__ơ",
		"}": "_Ơ",
	},
	"Telex 2": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ__Ư",
		"d": "D_Đ",
		"]": "__ư",
		"[": "__ơ",
		"}": "_Ư",
		"{": "_Ơ",
	},
	"Telex + VNI": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ",
		"d": "D_Đ",
		"0": "XoaDauThanh",
		"1": "DauSac",
		"2": "DauHuyen",
		"3": "DauHoi",
		"4": "DauNga",
		"5": "DauNang",
		"6": "AEO_ÂÊÔ",
		"7": "UO_ƯƠ",
		"8": "A_Ă",
		"9": "D_Đ",
	},
	"Telex + VNI + VIQR": {
		"z":  "XoaDauThanh",
		"s":  "DauSac",
		"f":  "DauHuyen",
		"r":  "DauHoi",
		"x":  "DauNga",
		"j":  "DauNang",
		"a":  "A_Â",
		"e":  "E_Ê",
		"o":  "O_Ô",
		"w":  "UOA_ƯƠĂ",
		"d":  "D_Đ",
		"0":  "XoaDauThanh",
		"1":  "DauSac",
		"2":  "DauHuyen",
		"3":  "DauHoi",
		"4":  "DauNga",
		"5":  "DauNang",
		"6":  "AEO_ÂÊÔ",
		"7":  "UO_ƯƠ",
		"8":  "A_Ă",
		"9":  "D_Đ",
		"'":  "DauSac",
		"`":  "DauHuyen",
		"?":  "DauHoi",
		"~":  "DauNga",
		".":  "DauNang",
		"^":  "AEO_ÂÊÔ",
		"+":  "UO_ƯƠ",
		"*":  "UO_ƯƠ",
		"(":  "A_Ă",
		"\\": "D_Đ",
	},
	"VNI Bàn phím tiếng Pháp": {
		"&":  "XoaDauThanh",
		"é":  "DauSac",
		"\"": "DauHuyen",
		"'":  "DauHoi",
		"(":  "DauNga",
		"-":  "DauNang",
		"è":  "AEO_ÂÊÔ",
		"_":  "UO_ƯƠ",
		"ç":  "A_Ă",
		"à":  "D_Đ",
	},
	"Telex W": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ__Ư",
		"d": "D_Đ",
	},
}
View Source
var PunctuationMarks = []rune{
	',', ';', ':', '.', '"', '\'', '!', '?', ' ',
	'<', '>', '=', '+', '-', '*', '/', '\\',
	'_', '~', '`', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']',
	'|',
}
View Source
var Vowels = []rune("aàáảãạăằắẳẵặâầấẩẫậeèéẻẽẹêềếểễệiìíỉĩịoòóỏõọôồốổỗộơờớởỡợuùúủũụưừứửữựyỳýỷỹỵ")

Functions

func AddMarkToChar

func AddMarkToChar(chr rune, mark uint8) rune

func AddMarkToTonelessChar added in v0.5.9

func AddMarkToTonelessChar(chr rune, mark uint8) rune

func AddToneToChar

func AddToneToChar(chr rune, tone uint8) rune

func Encode

func Encode(charsetName string, input string) string

func FindMarkPosition

func FindMarkPosition(chr rune) int

func FindVowelPosition

func FindVowelPosition(chr rune) int

func Flatten

func Flatten(composition []*Transformation, mode Mode) string

func GetCharsetNames

func GetCharsetNames() []string

func GetInputMethodDefinitions added in v0.5.9

func GetInputMethodDefinitions() map[string]InputMethodDefinition

func HasAnyVietnameseRune added in v0.5.5

func HasAnyVietnameseRune(word string) bool

func IsAlpha added in v0.2.9

func IsAlpha(c rune) bool

func IsPunctuationMark added in v0.5.9

func IsPunctuationMark(key rune) bool

func IsSpace added in v0.7.1

func IsSpace(key rune) bool

func IsVietnameseRune added in v0.5.9

func IsVietnameseRune(lowerKey rune) bool

func IsVowel

func IsVowel(chr rune) bool

func IsWordBreakSymbol added in v0.3.4

func IsWordBreakSymbol(key rune) bool

Types

type BambooEngine

type BambooEngine struct {
	// contains filtered or unexported fields
}

func (*BambooEngine) CanProcessKey added in v0.3.4

func (e *BambooEngine) CanProcessKey(key rune) bool

func (*BambooEngine) GetFlag

func (e *BambooEngine) GetFlag(flag uint) uint

func (*BambooEngine) GetInputMethod

func (e *BambooEngine) GetInputMethod() InputMethod

func (*BambooEngine) GetProcessedString

func (e *BambooEngine) GetProcessedString(mode Mode) string

func (*BambooEngine) IsValid added in v0.5.9

func (e *BambooEngine) IsValid(inputIsFullComplete bool) bool

func (*BambooEngine) ProcessKey added in v0.3.4

func (e *BambooEngine) ProcessKey(key rune, mode Mode)

func (*BambooEngine) ProcessString

func (e *BambooEngine) ProcessString(str string, mode Mode)

func (*BambooEngine) RemoveLastChar

func (e *BambooEngine) RemoveLastChar(refreshLastToneTarget bool)

Find the last APPENDING transformation and all the transformations that add effects to it.

func (*BambooEngine) Reset

func (e *BambooEngine) Reset()

func (*BambooEngine) RestoreLastWord added in v0.4.8

func (e *BambooEngine) RestoreLastWord(toVietnamese bool)

func (*BambooEngine) SetFlag

func (e *BambooEngine) SetFlag(flag uint)

type EffectType

type EffectType int
const (
	Appending          EffectType = iota << 0
	MarkTransformation EffectType = iota
	ToneTransformation EffectType = iota
	Replacing          EffectType = iota
)

type IEngine

type IEngine interface {
	SetFlag(uint)
	GetInputMethod() InputMethod
	ProcessKey(rune, Mode)
	ProcessString(string, Mode)
	GetProcessedString(Mode) string
	IsValid(bool) bool
	CanProcessKey(rune) bool
	RemoveLastChar(bool)
	RestoreLastWord(bool)
	Reset()
}

func NewEngine

func NewEngine(inputMethod InputMethod, flag uint) IEngine

type InputMethod

type InputMethod struct {
	Name          string
	Rules         []Rule
	SuperKeys     []rune
	ToneKeys      []rune
	AppendingKeys []rune
	Keys          []rune
}

func ParseInputMethod added in v0.3.4

func ParseInputMethod(imDef map[string]InputMethodDefinition, imName string) InputMethod

type InputMethodDefinition added in v0.3.4

type InputMethodDefinition map[string]string

type Mark

type Mark uint8

type alias

const (
	MarkNone  Mark = iota << 0
	MarkHat   Mark = iota
	MarkBreve Mark = iota
	MarkHorn  Mark = iota
	MarkDash  Mark = iota
	MarkRaw   Mark = iota
)

func FindMarkFromChar

func FindMarkFromChar(chr rune) (Mark, bool)

type Mode

type Mode uint
const (
	VietnameseMode Mode = 1 << iota
	EnglishMode
	ToneLess
	MarkLess
	LowerCase
	FullText
	PunctuationMode
	InReverseOrder
)

type Rule

type Rule struct {
	Key           rune
	Effect        uint8 // (Tone, Mark)
	EffectType    EffectType
	EffectOn      rune
	Result        rune
	AppendedRules []Rule
}

func ParseRules

func ParseRules(key rune, line string) []Rule

func ParseToneLessRule added in v0.4.9

func ParseToneLessRule(key, effectiveOn, result rune, effect Mark) []Rule

func ParseTonelessRules

func ParseTonelessRules(key rune, line string) []Rule

func (*Rule) GetMark

func (r *Rule) GetMark() Mark

func (*Rule) GetTone

func (r *Rule) GetTone() Tone

func (*Rule) SetMark

func (r *Rule) SetMark(mark Mark)

func (*Rule) SetTone

func (r *Rule) SetTone(tone Tone)

type Tone

type Tone uint8
const (
	ToneNone  Tone = iota << 0
	ToneGrave Tone = iota
	ToneAcute Tone = iota
	ToneHook  Tone = iota
	ToneTilde Tone = iota
	ToneDot   Tone = iota
)

func FindToneFromChar

func FindToneFromChar(chr rune) Tone

type Transformation

type Transformation struct {
	Rule        Rule
	Target      *Transformation
	IsUpperCase bool
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL