Documentation ¶
Index ¶
- Constants
- Variables
- func BinaryFilter(arr []byte) []byte
- func BinaryToSlice(body []uint8, bytesLen int, returnType string) []interface{}
- func CalTimeGapWithNS(begin int64) int64
- func Clean(text string) string
- func CleanAndPadChineseWithWhiteSpace(text string) []string
- func GetNanoTimeFromSys() int64
- func IsChinese(c rune) bool
- func IsChineseOrNumber(c rune) bool
- func IsControl(c rune) bool
- func IsPunctuation(c rune) bool
- func IsWhiteSpaceOrChinese(c rune) bool
- func IsWhiteSpaceOrChineseOrNumber(c rune) bool
- func IsWhitespace(c rune) bool
- func PadChinese(text string) string
- func SliceToInterfaceSlice[T any](arr []T) []interface{}
- func SliceTransposeFor2D[T comparable](slice [][]T) [][]T
- func SliceTransposeFor3D[T comparable](slice [][][]T) [][][]T
- func SplitPunctuation(text string) (toks []string)
- func StringSliceTruncate(sequence [][]string, maxLen int) [][]string
- func StripAccentsAndLower(text string) string
- type JSONMarshal
- type JSONUnmarshal
Constants ¶
const ( TritonBytesType string = "BYTES" TritonINT32Type string = "INT32" TritonINT64Type string = "INT64" TritonFP16Type string = "FP16" TritonFP32Type string = "FP32" SliceByteType string = "[]byte" SliceFloat32Type string = "[]float32" SliceFloat64Type string = "[]float64" SliceIntType string = "[]int" SliceInt64Type string = "[]int64" )
Variables ¶
var ( ErrEmptyVocab = errors.New("empty vocab") // empty vocab error. ErrEmptyCallbackFunc = errors.New("callback function is nil") // empty callback function. ErrEmptyHTTPRequestBody = errors.New("http request body is nil") // empty http request body. ErrEmptyGRPCRequestBody = errors.New("grpc request body is nil") // empty grpc request body. // ASCIIWhiteSpace ascii white space array. ASCIIWhiteSpace = [256]bool{' ': true, '\t': true, '\n': true, '\r': true} // ASCIIPunctuation Ascii punctuation characters range. ASCIIPunctuation = &unicode.RangeTable{ R16: []unicode.Range16{ {0x0021, 0x002f, 1}, {0x003a, 0x0040, 1}, {0x005b, 0x0060, 1}, {0x007b, 0x007e, 1}, }, LatinOffset: 4, } // BertChineseChar maybe is the BERT Chinese Char. BertChineseChar = &unicode.RangeTable{ R16: []unicode.Range16{ {0x4e00, 0x9fff, 1}, {0x3400, 0x4dbf, 1}, {0xf900, 0xfaff, 1}, }, R32: []unicode.Range32{ {Lo: 0x20000, Hi: 0x2a6df, Stride: 1}, {Lo: 0x2a700, Hi: 0x2b73f, Stride: 1}, {Lo: 0x2b740, Hi: 0x2b81f, Stride: 1}, {Lo: 0x2b820, Hi: 0x2ceaf, Stride: 1}, {Lo: 0x2f800, Hi: 0x2fa1f, Stride: 1}, }, } )
Functions ¶
func BinaryFilter ¶ added in v1.4.0
BinaryFilter []byte filter space.
func BinaryToSlice ¶ added in v1.4.0
BinaryToSlice []byte to slice.
func CalTimeGapWithNS ¶ added in v1.3.6
CalTimeGapWithNS get nano timestamp gap.
func CleanAndPadChineseWithWhiteSpace ¶
CleanAndPadChineseWithWhiteSpace combine three function clean, padChinese, tokenizeWhitespaceV1.
func GetNanoTimeFromSys ¶ added in v1.3.6
func GetNanoTimeFromSys() int64
GetNanoTimeFromSys get nano timestamp.
func IsChineseOrNumber ¶ added in v1.4.4
IsChineseOrNumber validates that rune c is in the CJK range according to BERT spec or Number.
func IsPunctuation ¶
IsPunctuation checks whether rune c is a BERT punctuation character.
func IsWhiteSpaceOrChinese ¶
IsWhiteSpaceOrChinese validates that rune c is whitespace or is Chinese.
func IsWhiteSpaceOrChineseOrNumber ¶ added in v1.4.4
IsWhiteSpaceOrChineseOrNumber validates that rune c is whitespace or is Chinese or is Number.
func IsWhitespace ¶
IsWhitespace checks whether rune c is a BERT whitespace character.
func PadChinese ¶
PadChinese will add space padding around all CJK chars This implementation matches BasicTokenizer._tokenize_chinese_chars.
func SliceToInterfaceSlice ¶ added in v1.4.0
func SliceToInterfaceSlice[T any](arr []T) []interface{}
SliceToInterfaceSlice any slice to []interface{}.
func SliceTransposeFor2D ¶
func SliceTransposeFor2D[T comparable](slice [][]T) [][]T
SliceTransposeFor2D Transport 2-D Dimension Slice. Like NxM to MxN.
func SliceTransposeFor3D ¶
func SliceTransposeFor3D[T comparable](slice [][][]T) [][][]T
SliceTransposeFor3D Transport 3-D Dimension Slice. Like NxM to MxN.
func SplitPunctuation ¶
SplitPunctuation split punctuation.
func StringSliceTruncate ¶
StringSliceTruncate truncate uses heuristic of trimming seq with longest len until sequenceLen satisfied.
func StripAccentsAndLower ¶
StripAccentsAndLower strip accents and lower.
Types ¶
type JSONMarshal ¶ added in v1.4.6
JSONMarshal returns the JSON encoding of v.
type JSONUnmarshal ¶ added in v1.4.6
JSONUnmarshal parses the JSON-encoded data and stores the result in the value pointed to by v. If v is nil or not a pointer, Unmarshal returns an InvalidUnmarshalError.