whatlanggo: github.com/abadojack/whatlanggo Index | Files

package whatlanggo

import "github.com/abadojack/whatlanggo"

Package whatlanggo detects natural languages and scripts ( writing systems ). Languages are represented by a determined list of constants while scripts are represented by *unicode.RangeTable.

Index

Package Files

constants.go detect.go doc.go info.go lang.go options.go script.go trigrams.go unicode.go utils.go

Constants

const ReliableConfidenceThreshold = 0.8

ReliableConfidenceThreshold is confidence rating that has to be succeeded for the language detection to be considered reliable.

Variables

var Langs = map[Lang]string{
    Afr: "Afrikaans",
    Aka: "Akan",
    Amh: "Amharic",
    Arb: "Arabic",
    Azj: "Azerbaijani",
    Bel: "Belarusian",
    Ben: "Bengali",
    Bho: "Bhojpuri",
    Bul: "Bulgarian",
    Ceb: "Cebuano",
    Ces: "Czech",
    Cmn: "Mandarin",
    Dan: "Danish",
    Deu: "German",
    Ell: "Greek",
    Eng: "English",
    Epo: "Esperanto",
    Est: "Estonian",
    Fin: "Finnish",
    Fra: "French",
    Guj: "Gujarati",
    Hat: "Haitian Creole",
    Hau: "Hausa",
    Heb: "Hebrew",
    Hin: "Hindi",
    Hrv: "Croatian",
    Hun: "Hungarian",
    Ibo: "Igbo",
    Ilo: "Ilocano",
    Ind: "Indonesian",
    Ita: "Italian",
    Jav: "Javanese",
    Jpn: "Japanese",
    Kan: "Kannada",
    Kat: "Georgian",
    Khm: "Khmer",
    Kin: "Kinyarwanda",
    Kor: "Korean",
    Kur: "Kurdish",
    Lav: "Latvian",
    Lit: "Lithuanian",
    Mai: "Maithili",
    Mal: "Malayalam",
    Mar: "Marathi",
    Mkd: "Macedonian",
    Mlg: "Malagasy",
    Mya: "Burmese",
    Nep: "Nepali",
    Nld: "Dutch",
    Nno: "Nynorsk",
    Nob: "Bokmal",
    Nya: "Chewa",
    Ori: "Oriya",
    Orm: "Oromo",
    Pan: "Punjabi",
    Pes: "Persian",
    Pol: "Polish",
    Por: "Portuguese",
    Ron: "Romanian",
    Run: "Rundi",
    Rus: "Russian",
    Sin: "Sinhalese",
    Skr: "Saraiki",
    Slv: "Slovene",
    Sna: "Shona",
    Som: "Somali",
    Spa: "Spanish",
    Srp: "Serbian",
    Swe: "Swedish",
    Tam: "Tamil",
    Tel: "Telugu",
    Tgl: "Tagalog",
    Tha: "Thai",
    Tir: "Tigrinya",
    Tuk: "Turkmen",
    Tur: "Turkish",
    Uig: "Uyghur",
    Ukr: "Ukrainian",
    Urd: "Urdu",
    Uzb: "Uzbek",
    Vie: "Vietnamese",
    Ydd: "Yiddish",
    Yor: "Yoruba",
    Zul: "Zulu",
}

Langs represents a map of Lang to language name.

var Scripts = map[*unicode.RangeTable]string{
    unicode.Arabic:     "Arabic",
    unicode.Bengali:    "Bengali",
    unicode.Cyrillic:   "Cyrillic",
    unicode.Ethiopic:   "Ethiopic",
    unicode.Devanagari: "Devanagari",
    unicode.Han:        "Han",
    unicode.Georgian:   "Georgian",
    unicode.Greek:      "Greek",
    unicode.Gujarati:   "Gujarati",
    unicode.Gurmukhi:   "Gurmukhi",
    unicode.Hangul:     "Hangul",
    unicode.Hebrew:     "Hebrew",
    unicode.Hiragana:   "Hiragana",
    unicode.Kannada:    "Kannada",
    unicode.Katakana:   "Katakana",
    unicode.Khmer:      "Khmer",
    unicode.Latin:      "Latin",
    unicode.Malayalam:  "Malayalam",
    unicode.Myanmar:    "Myanmar",
    unicode.Oriya:      "Oriya",
    unicode.Sinhala:    "Sinhala",
    unicode.Tamil:      "Tamil",
    unicode.Telugu:     "Telugu",
    unicode.Thai:       "Thai",
}

Scripts is the set of Unicode script tables.

func DetectScript Uses

func DetectScript(text string) *unicode.RangeTable

DetectScript returns only the script of the given text.

func LangToString Uses

func LangToString(lang Lang) string

LangToString converts enum into ISO 639-3 code as a string. Deprecated: LangToString is deprected and exists for historical compatibility. Please use `Lang.Iso6393()` instead.

func LangToStringShort Uses

func LangToStringShort(lang Lang) string

LangToStringShort converts enum into ISO 639-1 code as a string. Return empty string when there is no ISO 639-1 code. Deprecated: LangToStringShort is deprected and exists for historical compatibility. Please use `Lang.Iso6391()` instead.

type Info Uses

type Info struct {
    Lang       Lang
    Script     *unicode.RangeTable
    Confidence float64
}

Info represents a full outcome of language detection.

func Detect Uses

func Detect(text string) Info

Detect language and script of the given text.

func DetectWithOptions Uses

func DetectWithOptions(text string, options Options) Info

DetectWithOptions detects the language and script of the given text with the provided options.

func (*Info) IsReliable Uses

func (info *Info) IsReliable() bool

IsReliable returns true if Confidence is greater than the Reliable Confidence Threshold

type Lang Uses

type Lang int

Lang represents a language following ISO 639-3 standard.

const (
    Afr Lang = iota
    Aka
    Amh
    Arb
    Azj
    Bel
    Ben
    Bho
    Bul
    Ceb
    Ces
    Cmn
    Dan
    Deu
    Ell
    Eng
    Epo
    Est
    Fin
    Fra
    Guj
    Hat
    Hau
    Heb
    Hin
    Hrv
    Hun
    Ibo
    Ilo
    Ind
    Ita
    Jav
    Jpn
    Kan
    Kat
    Khm
    Kin
    Kor
    Kur
    Lav
    Lit
    Mai
    Mal
    Mar
    Mkd
    Mlg
    Mya
    Nep
    Nld
    Nno
    Nob
    Nya
    Ori
    Orm
    Pan
    Pes
    Pol
    Por
    Ron
    Run
    Rus
    Sin
    Skr
    Slv
    Sna
    Som
    Spa
    Srp
    Swe
    Tam
    Tel
    Tgl
    Tha
    Tir
    Tuk
    Tur
    Uig
    Ukr
    Urd
    Uzb
    Vie
    Ydd
    Yor
    Zul
)

Aka ...

func CodeToLang Uses

func CodeToLang(code string) Lang

CodeToLang gets enum by ISO 639-3 code as a string.

func DetectLang Uses

func DetectLang(text string) Lang

DetectLang detects only the language by a given text.

func DetectLangWithOptions Uses

func DetectLangWithOptions(text string, options Options) Lang

DetectLangWithOptions detects only the language of the given text with the provided options.

func (Lang) Iso6391 Uses

func (lang Lang) Iso6391() string

Iso6391 returns ISO 639-1 code of Lang as a string.

func (Lang) Iso6393 Uses

func (lang Lang) Iso6393() string

Iso6393 returns ISO 639-3 code of Lang as a string.

func (Lang) String Uses

func (lang Lang) String() string

String returns the human-readable name of the language as a string.

type Options Uses

type Options struct {
    Whitelist map[Lang]bool
    Blacklist map[Lang]bool
}

Options represents options that can be set when detecting a language or/and script such blacklisting languages to skip checking.

Package whatlanggo imports 3 packages (graph) and is imported by 6 packages. Updated 2019-03-06. Refresh now. Tools for package owners.