charset

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 23, 2022 License: Apache-2.0, Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// CharsetBin is used for marking binary charset.
	CharsetBin = "binary"
	// CollationBin is the default collation for CharsetBin.
	CollationBin = "binary"
	// CharsetUTF8 is the default charset for string types.
	CharsetUTF8 = "utf8"
	// CollationUTF8 is the default collation for CharsetUTF8.
	CollationUTF8 = "utf8_bin"
	// CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go.
	CharsetUTF8MB4 = "utf8mb4"
	// CollationUTF8MB4 is the default collation for CharsetUTF8MB4.
	CollationUTF8MB4 = "utf8mb4_bin"
	// CharsetASCII is a subset of UTF8.
	CharsetASCII = "ascii"
	// CollationASCII is the default collation for CharsetACSII.
	CollationASCII = "ascii_bin"
	// CharsetLatin1 is a single byte charset.
	CharsetLatin1 = "latin1"
	// CollationLatin1 is the default collation for CharsetLatin1.
	CollationLatin1 = "latin1_bin"

	CollationGBKBin = "gbk_bin"

	CharsetARMSCII8 = "armscii8"
	CharsetBig5     = "big5"
	CharsetBinary   = "binary"
	CharsetCP1250   = "cp1250"
	CharsetCP1251   = "cp1251"
	CharsetCP1256   = "cp1256"
	CharsetCP1257   = "cp1257"
	CharsetCP850    = "cp850"
	CharsetCP852    = "cp852"
	CharsetCP866    = "cp866"
	CharsetCP932    = "cp932"
	CharsetDEC8     = "dec8"
	CharsetEUCJPMS  = "eucjpms"
	CharsetEUCKR    = "euckr"
	CharsetGB18030  = "gb18030"
	CharsetGB2312   = "gb2312"
	CharsetGBK      = "gbk"
	CharsetGEOSTD8  = "geostd8"
	CharsetGreek    = "greek"
	CharsetHebrew   = "hebrew"
	CharsetHP8      = "hp8"
	CharsetKEYBCS2  = "keybcs2"
	CharsetKOI8R    = "koi8r"
	CharsetKOI8U    = "koi8u"
	CharsetLatin2   = "latin2"
	CharsetLatin5   = "latin5"
	CharsetLatin7   = "latin7"
	CharsetMacCE    = "macce"
	CharsetMacRoman = "macroman"
	CharsetSJIS     = "sjis"
	CharsetSWE7     = "swe7"
	CharsetTIS620   = "tis620"
	CharsetUCS2     = "ucs2"
	CharsetUJIS     = "ujis"
	CharsetUTF16    = "utf16"
	CharsetUTF16LE  = "utf16le"
	CharsetUTF32    = "utf32"
)

Variables

View Source
var (
	ErrUnknownCollation         = terror.ClassDDL.NewStd(mysql.ErrUnknownCollation)
	ErrCollationCharsetMismatch = terror.ClassDDL.NewStd(mysql.ErrCollationCharsetMismatch)
)

Functions

func AddCharset

func AddCharset(c *Charset)

AddCharset adds a new charset. Use only when adding a custom charset to the parser.

func AddCollation

func AddCollation(c *Collation)

AddCollation adds a new collation. Use only when adding a custom collation to the parser.

func FindNextCharacterLength

func FindNextCharacterLength(label string) func([]byte) int

FindNextCharacterLength is used in lexer.peek() to determine the next character length.

func GetCharsetInfoByID

func GetCharsetInfoByID(coID int) (string, string, error)

GetCharsetInfoByID returns charset and collation for id as cs_number.

func GetDefaultCharsetAndCollate

func GetDefaultCharsetAndCollate() (string, string)

GetDefaultCharsetAndCollate returns the default charset and collation.

func GetDefaultCollation

func GetDefaultCollation(charset string) (string, error)

GetDefaultCollation returns the default collation for charset.

func GetDefaultCollationLegacy

func GetDefaultCollationLegacy(charset string) (string, error)

GetDefaultCollationLegacy is compatible with the charset support in old version parser.

func Lookup

func Lookup(label string) (e encoding.Encoding, name string)

Lookup returns the encoding with the specified label, and its canonical name. It returns nil and the empty string if label is not one of the standard encodings for HTML. Matching is case-insensitive and ignores leading and trailing whitespace.

func RemoveCharset

func RemoveCharset(c string)

RemoveCharset remove a charset. Use only when adding a custom charset to the parser.

func ValidCharsetAndCollation

func ValidCharsetAndCollation(cs string, co string) bool

ValidCharsetAndCollation checks the charset and the collation validity and returns a boolean.

Types

type Charset

type Charset struct {
	Name             string
	DefaultCollation string
	Collations       map[string]*Collation
	Desc             string
	Maxlen           int
}

Charset is a charset. Now we only support MySQL.

func GetCharsetInfo

func GetCharsetInfo(cs string) (*Charset, error)

GetCharsetInfo returns charset and collation for cs as name.

func GetSupportedCharsets

func GetSupportedCharsets() []*Charset

GetSupportedCharsets gets descriptions for all charsets supported so far.

type Collation

type Collation struct {
	ID          int
	CharsetName string
	Name        string
	IsDefault   bool
}

Collation is a collation. Now we only support MySQL.

func GetCollationByID

func GetCollationByID(id int) (*Collation, error)

GetCollationByID returns collations by given id.

func GetCollationByName

func GetCollationByName(name string) (*Collation, error)

func GetCollations

func GetCollations() []*Collation

GetCollations returns a list for all collations.

func GetSupportedCollations

func GetSupportedCollations() []*Collation

GetSupportedCollations gets information for all collations supported so far.

type Encoding

type Encoding struct {
	// contains filtered or unexported fields
}

Encoding provide a interface to encode/decode a string with specific encoding.

func NewEncoding

func NewEncoding(label string) *Encoding

NewEncoding creates a new Encoding.

func (*Encoding) Decode

func (e *Encoding) Decode(dest, src []byte) ([]byte, error)

Decode convert bytes from a specific charset to utf-8 charset.

func (*Encoding) Enabled

func (e *Encoding) Enabled() bool

Enabled indicates whether the non-utf8 encoding is used.

func (*Encoding) Encode

func (e *Encoding) Encode(dest, src []byte) ([]byte, error)

Encode convert bytes from utf-8 charset to a specific charset.

func (*Encoding) Name

func (e *Encoding) Name() string

Name returns the name of the current encoding.

func (*Encoding) UpdateEncoding

func (e *Encoding) UpdateEncoding(label EncodingLabel)

UpdateEncoding updates to a new Encoding.

type EncodingLabel

type EncodingLabel string

func Format

func Format(label string) EncodingLabel

Format trim and change the label to lowercase.

func Formatted

func Formatted(label string) EncodingLabel

Formatted is used when the label is already trimmed and it is lowercase.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL