ucdparser

package
v0.0.0-...-61d31b1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 7, 2018 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var DefaultAdditionalValues = map[string][][]string{
	"Jamo_Short_Name": {{"", "None", "<none>"}},
}

map[<Property name in any form>][<index of additional value, does not matter>][<0-short name, 1-long name, 2..-other known names>]

View Source
var DefaultDeprecatedProperies = []string{
	"FC_NFKC_Closure",
	"Expands_On_NFD",
	"Expands_On_NFC",
	"Expands_On_NFKD",
	"Expands_On_NFKC",
}
View Source
var DefaultParseDetails = []ParseDetails{
	{"auxiliary" + string(os.PathSeparator) + "GraphemeBreakProperty.txt", -1, "Grapheme_Cluster_Break", 1, "", 0, "Other"},
	{"auxiliary" + string(os.PathSeparator) + "SentenceBreakProperty.txt", -1, "Sentence_Break", 1, "", 0, "Other"},
	{"auxiliary" + string(os.PathSeparator) + "WordBreakProperty.txt", -1, "Word_Break", 1, "", 0, "Other"},

	{"extracted" + string(os.PathSeparator) + "DerivedBidiClass.txt", -1, "Bidi_Class", 1, "", 0, "Left_To_Right"},
	{"extracted" + string(os.PathSeparator) + "DerivedBinaryProperties.txt", 1, "", -1, "Y", 0, "N"},
	{"extracted" + string(os.PathSeparator) + "DerivedCombiningClass.txt", -1, "Canonical_Combining_Class", 1, "", 0, "Not_Reordered"},
	{"extracted" + string(os.PathSeparator) + "DerivedDecompositionType.txt", -1, "Decomposition_Type", 1, "", 0, "None"},
	{"extracted" + string(os.PathSeparator) + "DerivedEastAsianWidth.txt", -1, "East_Asian_Width", 1, "", 0, "Neutral"},
	{"extracted" + string(os.PathSeparator) + "DerivedGeneralCategory.txt", -1, "General_Category", 1, "", 0, "Unassigned"},
	{"extracted" + string(os.PathSeparator) + "DerivedJoiningGroup.txt", -1, "Joining_Group", 1, "", 0, "No_Joining_Group"},
	{"extracted" + string(os.PathSeparator) + "DerivedJoiningType.txt", -1, "Joining_Type", 1, "", 0, "Non_Joining"},
	{"extracted" + string(os.PathSeparator) + "DerivedLineBreak.txt", -1, "Line_Break", 1, "", 0, "Unknown"},
	{"extracted" + string(os.PathSeparator) + "DerivedNumericType.txt", -1, "Numeric_Type", 1, "", 0, "None"},

	{"BidiBrackets.txt", -1, "Bidi_Paired_Bracket_Type", 2, "", 0, "n"},
	{"CompositionExclusions.txt", -1, "Composition_Exclusion", -1, "Y", 0, "N"},
	{"HangulSyllableType.txt", -1, "Hangul_Syllable_Type", 1, "", 0, "Not_Applicable"},
	{"Scripts.txt", -1, "Script", 1, "", 0, "Unknown"},
	{"Blocks.txt", -1, "Block", 1, "", 0, "No_Block"},
	{"DerivedAge.txt", -1, "Age", 1, "", 0, "Unassigned"},
	{"IndicPositionalCategory.txt", -1, "Indic_Positional_Category", 1, "", 0, "NA"},
	{"IndicSyllabicCategory.txt", -1, "Indic_Syllabic_Category", 1, "", 0, "Other"},
	{"Jamo.txt", -1, "Jamo_Short_Name", 1, "", 0, "none"},
	{"DerivedCoreProperties.txt", 1, "", -1, "Y", 0, "Y"},
	{"PropList.txt", 1, "", -1, "Y", 0, "Y"},
	{"VerticalOrientation.txt", -1, "Vertical_Orientation", 1, "", 0, "R"},
	{"DerivedNormalizationProps.txt", 1, "NFC_QC", 2, "", 1, "Yes"},
	{"DerivedNormalizationProps.txt", 1, "NFD_QC", 2, "", 1, "Yes"},
	{"DerivedNormalizationProps.txt", 1, "NFKC_QC", 2, "", 1, "Yes"},
	{"DerivedNormalizationProps.txt", 1, "NFKD_QC", 2, "", 1, "Yes"},
	{"DerivedNormalizationProps.txt", 1, "Full_Composition_Exclusion", -1, "Y", 1, "No"},
	{"DerivedNormalizationProps.txt", 1, "Changes_When_NFKC_Casefolded", -1, "Y", 1, "No"},
}
View Source
var DefaultPseudoValues = map[string]map[string][]string{
	"General_Category": {
		"C":  {"Cc", "Cf", "Cn", "Co", "Cs"},
		"L":  {"Ll", "Lm", "Lo", "Lt", "Lu"},
		"LC": {"Ll", "Lt", "Lu"},
		"M":  {"Mc", "Me", "Mn"},
		"N":  {"Nd", "Nl", "No"},
		"P":  {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"},
		"S":  {"Sc", "Sk", "Sm", "So"},
		"Z":  {"Zl", "Zp", "Zs"},
	},
}

map[<Property name in any form>][<Pseudo value name in any form>]=slice of unioning values names From PropertyValueAliases.txt (look for "|")

View Source
var DefaultReallyEmptyValues = map[string][]string{
	"Script":                    {"Katakana_Or_Hiragana"},
	"Canonical_Combining_Class": {"CCC133", "Attached_Below_Left"},
}

Really empty values. List it here to avoid it in console log. map[<Property name in any form>]=slice of values names

Functions

func EmojiVer

func EmojiVer(fileName string) string

func UCDVer

func UCDVer(fileName string) string

Types

type ParseDetails

type ParseDetails struct {
	File           string
	PropertyColumn int
	PropertyName   string
	ValueColumn    int
	ValueName      string
	RangeColumn    int
	Missing        string // only for PropertyName!=""
}

type Parser

type Parser struct {
	VersionFunc          func(fileName string) string
	Version              string
	Properties           Properties
	AdditionalValues     map[string][][]string
	PseudoValues         map[string]map[string][]string
	DeprecatedProperties []string
	ReallyEmptyValues    map[string][]string
	ParseDetails         []ParseDetails
	// contains filtered or unexported fields
}

func NewParser

func NewParser(srcDir string) *Parser

func (*Parser) Parse

func (parser *Parser) Parse()

type Properties

type Properties []Property

func ParseStructureUCD

func ParseStructureUCD(srcDir string) Properties

func (*Properties) CleanEmpty

func (p *Properties) CleanEmpty()

func (Properties) MustPropIndexByName

func (p Properties) MustPropIndexByName(str string) int

func (Properties) PropIndexByName

func (p Properties) PropIndexByName(str string) int

-1 if not found

type Property

type Property struct {
	Kind      string
	ShortName string
	LongName  string
	KnownAs   []string
	Values    []Value
	File      string
	Missing   int // Value index or -1 if not applicable (i.e. file describe all values)
}

func (Property) MustValueIndexByName

func (p Property) MustValueIndexByName(str string) int

type Value

type Value struct {
	ShortName string
	LongName  string
	Num       uint // for "ccc" only	// TODO uint8?
	KnownAs   []string
	Ranges    *unicode.RangeTable
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL