convert

package
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 10, 2024 License: LGPL-3.0 Imports: 23 Imported by: 0

Documentation

Overview

Package convert extends the interface for the character encodings that transform text to and from Unicode UTF-8.

Index

Examples

Constants

View Source
const (
	// NUL Null control code.
	NUL = iota
	// SOH Start of heading.
	SOH
	// STX Start of text.
	STX
	// ETX End of text.
	ETX
	// EOT End of transmission.
	EOT
	// ENQ Enquiry.
	ENQ
	// ACK Acknowledge.
	ACK
	// BEL Bell or alert.
	BEL
	// BS Backspace.
	BS
	// HT Horizontal tabulation.
	HT
	// LF Line feed.
	LF
	// VT Vertical tabulation.
	VT
	// FF Form feed.
	FF
	// CR Carriage return.
	CR
	// SO Shift out.
	SO
	// SI Shift in.
	SI
	// DLE Data Link Escape.
	DLE
	// DC1 Device control one.
	DC1
	// DC2 Device control two.
	DC2
	// DC3 Device control three.
	DC3
	// DC4 Device control four.
	DC4
	// NAK Negative acknowledge.
	NAK
	// SYN Synchronous idle.
	SYN
	// ETB End of transmission block.
	ETB
	// CAN Cancel.
	CAN
	// EM End of medium.
	EM
	// SUB Substitute.
	SUB
	// ESC Escape.
	ESC
	// FS File separator.
	FS
	// GS Group separator.
	GS
	// RS Record separator.
	RS
	// US Unit separator.
	US
	// SP Space.
	SP
)

The common ASCII and Unicode control decimal values.

View Source
const (
	// LeftSquareBracket [.
	LeftSquareBracket = 91
	// VerticalBar |.
	VerticalBar = 124
	// DEL Delete.
	DEL = 127
	// Dash Hyphen -.
	Dash = 150
	// Nbsp Non-breaking space.
	Nbsp = 160
	// InvertedExclamation ¡.
	InvertedExclamation = 161
	// Cent ¢.
	Cent = 162
	// BrokenBar ¦.
	BrokenBar = 166
	// Negation ¬.
	Negation = 172
	// PlusMinus ±.
	PlusMinus = 177
	// LightVertical light vertical │.
	LightVertical = 179
	// SquareRoot Square root √.
	SquareRoot = 251
	// NBSP Non-breaking space.
	NBSP = 255
	// Delta Δ.
	Delta = 916
	// LeftwardsArrow ←.
	LeftwardsArrow = 8592
	// SquareRootU Unicode square root √.
	SquareRootU = 8730
	// House ⌂.
	House = 8962
	// IntegralExtension ⎮.
	IntegralExtension = 9134
	// SymbolNUL ␀.
	SymbolNUL = 9216
	// SymbolESC ␛.
	SymbolESC = 9243
	// SymbolDEL ␡.
	SymbolDEL = 9249
	// LightVerticalU Box drawing light vertical │.
	LightVerticalU = 9474
	// CheckMark ✓.
	CheckMark = 10003
	// Replacement character �.
	Replacement = 65533
	// Open Box ␣.
	OpenBox = 9251
)

Special ASCII and Unicode character codes and symbols.

View Source
const (
	Row8 = 128 // the first cell after the end of a 7-bit row.

)

Variables

View Source
var (
	ErrANSI   = errors.New("ansi controls must be chained to c.swap")
	ErrBytes  = errors.New("cannot transform an empty byte slice")
	ErrEncode = errors.New("no input encoding provided")
	ErrName   = errors.New("unknown or unsupported code page name or alias")
	ErrOutput = errors.New("nothing to output")
	ErrWidth  = errors.New("cannot find the number columns from using line break")
	ErrWrap   = errors.New("wrap width must be chained to c.swap")
)

Functions

func EncodeAlias

func EncodeAlias(name string) string

EncodeAlias returns a valid IANA index encoding name from a shorten name or alias.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.EncodeAlias("437")
	fmt.Println(s)
	s = convert.EncodeAlias("koi8u")
	fmt.Println(s)
}
Output:

IBM437
KOI8-U

func EncodeUTF32

func EncodeUTF32(name string) encoding.Encoding

EncodeUTF32 initializes common UTF-32 encodings.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.EncodeUTF32("utf-32")
	fmt.Println(s)
}
Output:

UTF-32LE (Use BOM)

func Encoder

func Encoder(name string) (encoding.Encoding, error)

Encoder returns the named character set encoder.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	e, _ := convert.Encoder("cp437")
	fmt.Println(e)
	e, _ = convert.Encoder("1252")
	fmt.Println(e)
}
Output:

IBM Code Page 437
Windows 1252

func EqualLB

func EqualLB(r, nl [2]rune) bool

EqualLB reports whether r matches the single or multi-byte, line break character runes.

func Humanize

func Humanize(name string) string

Humanize the encoding by using an shorter, less formal name.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.Humanize("cp437")
	fmt.Println(s)
	s = convert.Humanize("cp1252")
	fmt.Println(s)
}
Output:

IBM437
Windows-1252

func Picture

func Picture(b byte) rune

Picture converts a byte value to a Unicode Control Picture rune.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	const lf = byte(138)
	r := convert.Picture(lf)
	fmt.Printf("%U %s\n", r, string(r))
	const esc = byte(155)
	r = convert.Picture(esc)
	fmt.Printf("%U %s\n", r, string(r))
}
Output:

U+240A ␊
U+241B ␛

func Shorten

func Shorten(name string) string

Shorten the name to a custom name, a common name or an alias.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.Shorten("cp437")
	fmt.Println(s)
	s = convert.Shorten("IBM-437")
	fmt.Println(s)
}
Output:

437
437

func Swap

func Swap(code rune) rune
Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	fmt.Println(string(convert.Swap(convert.DEL)))
	fmt.Println(string(convert.Swap(convert.SquareRoot)))
}
Output:

Δ
✓

Types

type Convert

type Convert struct {
	Args  Flag // Args are the cmd supplied flag arguments.
	Input struct {
		Encoding  encoding.Encoding // Encoding are the encoding of the input text.
		Input     []byte            // Bytes are the input text as bytes.
		Ignore    []rune            // Ignore these runes.
		LineBreak [2]rune           // Line break controls used by the text.
		UseBreaks bool              // UseBreaks uses the line break controls as new lines.
		Table     bool              // Table flags this text as a code page table.
	}
	Output []rune // Output are the transformed UTF-8 runes.
}

Convert 8-bit code page text encodings or Unicode byte array text to UTF-8 runes.

func (*Convert) ANSI

func (c *Convert) ANSI(b ...byte) ([]rune, error)

ANSI transforms legacy encoded ANSI into modern UTF-8 text. It displays ASCII control codes as characters. It obeys the DOS end of file marker.

func (*Convert) ANSIControls

func (c *Convert) ANSIControls() *Convert

ANSIControls replaces out all ←[ and ␛[ character matches with functional ANSI escape controls.

func (*Convert) Chars

func (c *Convert) Chars(b ...byte) ([]rune, error)

Chars transforms legacy encoded characters and text control codes into UTF-8 characters. It displays both ASCII and ANSI control codes as characters. It ignores the DOS end of file marker.

func (*Convert) Dump

func (c *Convert) Dump(b ...byte) ([]rune, error)

Dump transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It ignores the DOS end of file marker.

func (*Convert) FixJISTable

func (c *Convert) FixJISTable()

FixJISTable blanks invalid ShiftJIS characters while printing 8-bit tables.

func (*Convert) LineBreak

func (c *Convert) LineBreak()

LineBreak will try to guess the line break representation as a 2 byte value. A guess of Unix will return [10, 0], Windows [13, 10], otherwise a [0, 0] value is returned.

func (*Convert) RunesControls

func (c *Convert) RunesControls()

RunesControls switches out C0 and C1 ASCII controls with Unicode Control Picture represenations.

func (*Convert) RunesControlsEBCDIC

func (c *Convert) RunesControlsEBCDIC()

RunesControlsEBCDIC switches out EBCDIC controls with Unicode Control Picture represenations.

func (*Convert) RunesDOS

func (c *Convert) RunesDOS()

RunesDOS switches out C0, C1 and other controls with PC/MS-DOS picture glyphs.

func (*Convert) RunesEBCDIC

func (c *Convert) RunesEBCDIC()

RunesEBCDIC switches out EBCDIC IBM mainframe controls with Unicode picture represenations. Where no appropriate picture exists a space placeholder is used.

func (*Convert) RunesKOI8

func (c *Convert) RunesKOI8()

RunesKOI8 blanks out unused C0, C1 and other controls spaces for Russian sets.

func (*Convert) RunesLatin

func (c *Convert) RunesLatin()

RunesLatin blanks out unused C0, C1 and other controls spaces for ISO Latin sets.

func (*Convert) RunesMacintosh

func (c *Convert) RunesMacintosh()

RunesMacintosh replaces specific Mac OS Roman characters with Unicode picture represenations.

func (*Convert) RunesShiftJIS

func (c *Convert) RunesShiftJIS()

RunesShiftJIS tweaks some Unicode picture represenations for Shift-JIS.

func (*Convert) RunesUTF8

func (c *Convert) RunesUTF8()

RunesUTF8 tweaks some Unicode picture represenations for UTF-8 Basic Latin.

func (*Convert) RunesWindows

func (c *Convert) RunesWindows()

RunesWindows tweaks some Unicode picture represenations for Windows-125x sets.

func (*Convert) RunesXRows

func (c *Convert) RunesXRows()

RunesXRows blanks out rows 8x and 9x with spaces.

func (*Convert) SkipCode

func (c *Convert) SkipCode() *Convert

SkipCode marks control characters to be ignored. It needs to be applied before Convert.transform().

func (Convert) SkipIgnore

func (c Convert) SkipIgnore(i int) bool

SkipIgnore reports whether the rune should be skipped.

func (*Convert) Swap

func (c *Convert) Swap() (*Convert, error)

Swap transforms character map and control codes into UTF-8 unicode runes.

func (*Convert) Swaps

func (c *Convert) Swaps() (*Convert, error)

func (*Convert) Text

func (c *Convert) Text(b ...byte) ([]rune, error)

Text transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It obeys the DOS end of file marker.

func (*Convert) Transform

func (c *Convert) Transform() error

Transform byte data from named character map encoded text into UTF-8.

type Flag

type Flag struct {
	Controls  []string // Always use these control codes.
	SwapChars []string // Swap out these characters with common alternatives.
	MaxWidth  int      // Maximum text width per-line.
}

Flag are the user supplied values.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL