textutil

package module
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 22, 2017 License: BSD-3-Clause Imports: 2 Imported by: 0

Documentation

Overview

Package textutil provides tools for creating Transformers as used in golang.org/x/text.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Rewriter

type Rewriter interface {
	// Rewrite rewrites an indivisible segment of input. If any error is
	// encountered, all reads and writes made within the same call to Rewrite
	// will be discarded. Otherwise, the runes read from the input replace the
	// runes written in the output.
	//
	// Rewrite must be called with a State representing non-empty input.
	Rewrite(c State)

	// Reset implements the Reset method of tranform.Transformer.
	Reset()
}

A Rewriter rewrites UTF-8 bytes.

Example
package main

import (
	"fmt"
	"unicode"
	"unicode/utf8"

	"github.com/mpvl/textutil"
	"golang.org/x/text/transform"
)

func main() {
	clean := textutil.NewTransformer(&cleanSpaces{})
	fmt.Println(clean.String("  Hello   world! \t Hello   world!   ")) // Hello world! Hello world!

	escape := textutil.NewTransformerFromFunc(escape)
	escaped := escape.String("Héllø wørl∂!") // H\u00E9ll\u00F8 w\u00F8rl\u2202!
	fmt.Println(escaped)

	unescape := textutil.NewTransformerFromFunc(unescape)
	fmt.Println(unescape.String(escaped)) // Héllø wørl∂!

	// As usual, Transformers can be chained together:
	t := transform.Chain(escape, clean, unescape)
	s, _, _ := transform.String(t, "\t\t\tHéllø   \t   wørl∂!    ")
	fmt.Println(s) // Héllø wørl∂!

}

// The cleanSpaces Rewriter collapses consecutive whitespace characters into a
// single space and trims them completely at the beginning and end of the input.
// It handles only one rune at a time.
type cleanSpaces struct {
	notFirst, foundSpace bool
}

func (t *cleanSpaces) Rewrite(s textutil.State) {
	switch r, _ := s.ReadRune(); {
	case unicode.IsSpace(r):
		t.foundSpace = true
	case t.foundSpace && t.notFirst && !s.WriteRune(' '):
		// Don't change the state if writing the space fails.
	default:
		t.foundSpace, t.notFirst = false, true
		s.WriteRune(r)
	}
}

func (t *cleanSpaces) Reset() { *t = cleanSpaces{} }

// escape rewrites input by escaping all non-ASCII runes and the escape
// character itself.
func escape(s textutil.State) {
	switch r, _ := s.ReadRune(); {
	case r >= 0xffff:
		fmt.Fprintf(s, `\U%08X`, r)
	case r >= utf8.RuneSelf:
		fmt.Fprintf(s, `\u%04X`, r)
	case r == '\\':
		s.WriteString(`\\`)
	default:
		s.WriteRune(r)
	}
}

// unescape unescapes input escaped by escaper.
func unescape(s textutil.State) {
	if r, _ := s.ReadRune(); r != '\\' {
		s.WriteRune(r)
		return
	}
	n := 8
	switch b, _ := s.ReadRune(); b {
	case 'u':
		n = 4
		fallthrough
	case 'U':
		var r rune
		for i := 0; i < n; i++ {
			r <<= 4
			switch b, _ := s.ReadRune(); {
			case '0' <= b && b <= '9':
				r |= b - '0'
			case 'A' <= b && b <= 'F':
				r |= b - 'A' + 10
			default:
				s.UnreadRune()
				s.WriteRune(utf8.RuneError)
				return
			}
		}
		s.WriteRune(r)
	case '\\':
		s.WriteRune('\\')
	default:
		s.WriteRune(utf8.RuneError)
	}
}
Output:

Hello world! Hello world!
H\u00E9ll\u00F8 w\u00F8rl\u2202!
Héllø wørl∂!
Héllø wørl∂!

type State

type State interface {
	// ReadRune returns the next rune from the source and the number of bytes
	// consumed. It returns (RuneError, 1) for Invalid UTF-8 bytes. If the
	// source buffer is empty, it will return (RuneError, 0).
	ReadRune() (r rune, size int)

	// UnreadRune unreads the most recently read rune and makes it available for
	// a next call to Rewrite. Only one call to UnreadRune is allowed per
	// Rewrite.
	UnreadRune()

	// WriteBytes writes the given byte slice to the destination and reports
	// whether the write was successful.
	WriteBytes(b []byte) bool

	// WriteString writes the given string to the destination and reports
	// whether the write was successful.
	WriteString(s string) bool

	// WriteRune writes the given rune to the destination and reports whether
	// the write was successful.
	WriteRune(r rune) bool

	// Write implements io.Writer. The user is advised to use WriteBytes when
	// conformance to io.Writer is not needed.
	Write(b []byte) (n int, err error)

	// SetError reports invalid source bytes.
	SetError(err error)
}

State tracks the transformation of a minimal chunk of input. Reads and writes on a State will either be committed in full or not at all.

type Transformer

type Transformer struct {
	transform.SpanningTransformer
}

A Transformer wraps a transform.SpanningTransformer providing convenience methods for most of the functionality in the tranform package.

func NewTransformer

func NewTransformer(r Rewriter) Transformer

NewTransformer returns a Transformer that uses the given Rewriter to transform input by repeatedly calling Rewrite until all input has been processed or an error is encountered.

func NewTransformerFromFunc

func NewTransformerFromFunc(rewrite func(State)) Transformer

NewTransformerFromFunc calls NewTransform with a stateless Rewriter created from rewrite, which must follow the same guidelines as the Rewrite method of a Rewriter.

func (Transformer) Bytes

func (t Transformer) Bytes(b []byte) []byte

Bytes returns a new byte slice with the result of converting b using t. It calls Reset on t. It returns nil if any error was found.

func (Transformer) Reset

func (t Transformer) Reset()

Reset calls the Reset method of the underlying Transformer.

func (Transformer) Span

func (t Transformer) Span(b []byte, atEOF bool) (n int, err error)

Span calls the Span method of the underlying Transformer.

func (Transformer) String

func (t Transformer) String(s string) string

String applies t to s and returns the result. This methods wraps transform.String. It returns the empty string if any error occurred.

func (Transformer) Transform

func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)

Transform calls the Transform method of the underlying Transformer.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL