utils

package
v1.4.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 27, 2023 License: MIT Imports: 7 Imported by: 0

Documentation

Index

Constants

View Source
const (
	TritonBytesType  string = "BYTES"
	TritonINT32Type  string = "INT32"
	TritonINT64Type  string = "INT64"
	TritonFP16Type   string = "FP16"
	TritonFP32Type   string = "FP32"
	SliceByteType    string = "[]byte"
	SliceFloat32Type string = "[]float32"
	SliceFloat64Type string = "[]float64"
	SliceIntType     string = "[]int"
	SliceInt64Type   string = "[]int64"
)

Variables

View Source
var (
	ErrEmptyVocab           = errors.New("empty vocab")              // empty vocab error.
	ErrEmptyCallbackFunc    = errors.New("callback function is nil") // empty callback function.
	ErrEmptyHTTPRequestBody = errors.New("http request body is nil") // empty http request body.
	ErrEmptyGRPCRequestBody = errors.New("grpc request body is nil") // empty grpc request body.

	// ASCIIWhiteSpace ascii white space array.
	ASCIIWhiteSpace = [256]bool{' ': true, '\t': true, '\n': true, '\r': true}

	// ASCIIPunctuation Ascii punctuation characters range.
	ASCIIPunctuation = &unicode.RangeTable{
		R16: []unicode.Range16{
			{0x0021, 0x002f, 1},
			{0x003a, 0x0040, 1},
			{0x005b, 0x0060, 1},
			{0x007b, 0x007e, 1},
		},
		LatinOffset: 4,
	}

	// BertChineseChar maybe is the BERT Chinese Char.
	BertChineseChar = &unicode.RangeTable{
		R16: []unicode.Range16{
			{0x4e00, 0x9fff, 1},
			{0x3400, 0x4dbf, 1},
			{0xf900, 0xfaff, 1},
		},
		R32: []unicode.Range32{
			{Lo: 0x20000, Hi: 0x2a6df, Stride: 1},
			{Lo: 0x2a700, Hi: 0x2b73f, Stride: 1},
			{Lo: 0x2b740, Hi: 0x2b81f, Stride: 1},
			{Lo: 0x2b820, Hi: 0x2ceaf, Stride: 1},
			{Lo: 0x2f800, Hi: 0x2fa1f, Stride: 1},
		},
	}
)

Functions

func BinaryFilter added in v1.4.0

func BinaryFilter(arr []byte) []byte

BinaryFilter []byte filter space.

func BinaryToSlice added in v1.4.0

func BinaryToSlice(body []uint8, bytesLen int, returnType string) []interface{}

BinaryToSlice []byte to slice.

func CalTimeGapWithNS added in v1.3.6

func CalTimeGapWithNS(begin int64) int64

CalTimeGapWithNS get nano timestamp gap.

func Clean

func Clean(text string) string

Clean function will clear some characters.

func CleanAndPadChineseWithWhiteSpace

func CleanAndPadChineseWithWhiteSpace(text string) []string

CleanAndPadChineseWithWhiteSpace combine three function clean, padChinese, tokenizeWhitespaceV1.

func GetNanoTimeFromSys added in v1.3.6

func GetNanoTimeFromSys() int64

GetNanoTimeFromSys get nano timestamp.

func IsChinese

func IsChinese(c rune) bool

IsChinese validates that rune c is in the CJK range according to BERT spec.

func IsChineseOrNumber added in v1.4.4

func IsChineseOrNumber(c rune) bool

IsChineseOrNumber validates that rune c is in the CJK range according to BERT spec or Number.

func IsControl

func IsControl(c rune) bool

IsControl checks whether rune c is a BERT control character.

func IsPunctuation

func IsPunctuation(c rune) bool

IsPunctuation checks whether rune c is a BERT punctuation character.

func IsWhiteSpaceOrChinese

func IsWhiteSpaceOrChinese(c rune) bool

IsWhiteSpaceOrChinese validates that rune c is whitespace or is Chinese.

func IsWhiteSpaceOrChineseOrNumber added in v1.4.4

func IsWhiteSpaceOrChineseOrNumber(c rune) bool

IsWhiteSpaceOrChineseOrNumber validates that rune c is whitespace or is Chinese or is Number.

func IsWhitespace

func IsWhitespace(c rune) bool

IsWhitespace checks whether rune c is a BERT whitespace character.

func PadChinese

func PadChinese(text string) string

PadChinese will add space padding around all CJK chars This implementation matches BasicTokenizer._tokenize_chinese_chars.

func SliceToInterfaceSlice added in v1.4.0

func SliceToInterfaceSlice[T any](arr []T) []interface{}

SliceToInterfaceSlice any slice to []interface{}.

func SliceTransposeFor2D

func SliceTransposeFor2D[T comparable](slice [][]T) [][]T

SliceTransposeFor2D Transport 2-D Dimension Slice. Like NxM to MxN.

func SliceTransposeFor3D

func SliceTransposeFor3D[T comparable](slice [][][]T) [][][]T

SliceTransposeFor3D Transport 3-D Dimension Slice. Like NxM to MxN.

func SplitPunctuation

func SplitPunctuation(text string) (toks []string)

SplitPunctuation split punctuation.

func StringSliceTruncate

func StringSliceTruncate(sequence [][]string, maxLen int) [][]string

StringSliceTruncate truncate uses heuristic of trimming seq with longest len until sequenceLen satisfied.

func StripAccentsAndLower

func StripAccentsAndLower(text string) string

StripAccentsAndLower strip accents and lower.

Types

type JSONMarshal added in v1.4.6

type JSONMarshal func(v interface{}) ([]byte, error)

JSONMarshal returns the JSON encoding of v.

type JSONUnmarshal added in v1.4.6

type JSONUnmarshal func(data []byte, v interface{}) error

JSONUnmarshal parses the JSON-encoded data and stores the result in the value pointed to by v. If v is nil or not a pointer, Unmarshal returns an InvalidUnmarshalError.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL