textscan

package

v0.6.15 Latest Latest Go to latest Published: Jan 4, 2024 License: MIT Imports: 10 Imported by: 3

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/gookit/goutil

Links

Open Source Insights

README ¶

TextScan

Package textscan implements text scanner for quickly parse text contents. Can use for parse like INI, Properties format contents.

Install

go get github.com/gookit/goutil/strutil/textscan

Examples

package main

import (
	"fmt"

	"github.com/gookit/goutil/dump"
	"github.com/gookit/goutil/strutil/textscan"
	"github.com/gookit/goutil/testutil/assert"
)

func main() {
	ts := textscan.TextScanner{}
	ts.AddMatchers(
		&textscan.CommentsMatcher{},
		&textscan.KeyValueMatcher{},
	)

	ts.SetInput(`
# comments 1
name = inhere

// comments 2
age = 28

/*
multi line
comments 3
*/
desc = '''
a multi
line string
'''
`)

	data := make(map[string]string)
	err := ts.Each(func(t textscan.Token) {
		fmt.Println("====> Token kind:", t.Kind())
		fmt.Println(t.String())

		if t.Kind() == textscan.TokValue {
			v := t.(*textscan.ValueToken)
			data[v.Key()] = v.Value()
		}
	})

	dump.P(data, err)
}

Output:

====> Token kind: Comments
# comments 1
====> Token kind: Value
key: name
value: "inhere"
comments: 
====> Token kind: Comments
// comments 2
====> Token kind: Value
key: age
value: "28"
comments: 
====> Token kind: Comments
/*
multi line
comments 3
*/
====> Token kind: Value
key: desc
value: "\n\na multi\nline string\n"
comments: 

==== Collected data:
map[string]string { #len=3
  "desc": string("
a multi
line string
"), #len=22
  "name": string("inhere"), #len=6
  "age": string("28"), #len=2
},

Projects using `textscan`

textscan is used in these projects:

Documentation ¶

Overview ¶

Package textscan Implemented a parser that quickly scans and analyzes text content. It can be used to parse INI, Properties and other formats

Index ¶

Constants
Variables
func AddKind(k Kind, name string)
func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)
func CommentsDetectEnd(line string) bool
func HasKind(k Kind) bool
func IsKindToken(k Kind, tok Token) bool
func KindString(k Kind) string
type BaseToken
- func (t *BaseToken) IsValid() bool
- func (t *BaseToken) Kind() Kind
- func (t *BaseToken) String() string
- func (t *BaseToken) Value() string
type CommentToken
- func NewCommentToken(val string) *CommentToken
- func (t *CommentToken) HasMore() bool
- func (t *CommentToken) MergeSame(tok Token) error
- func (t *CommentToken) ScanMore(ts *TextScanner) error
- func (t *CommentToken) String() string
- func (t *CommentToken) Value() string
type CommentsMatcher
- func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)
- func (m *CommentsMatcher) MatchEnd(text string) bool
type ErrScan
- func (e ErrScan) Error() string
type HandleFn
type KeyValueMatcher
- func (m *KeyValueMatcher) DetectEnd(mark, text string) (ok bool, val string)
- func (m *KeyValueMatcher) Match(text string, prev Token) (Token, error)
type Kind
- func (k Kind) String() string
type LiteToken
type Matcher
type Parser
- func NewParser(fn HandleFn) *Parser
- func (p *Parser) AddMatchers(ms ...Matcher)
- func (p *Parser) Parse(bs []byte) error
- func (p *Parser) ParseFrom(r io.Reader) error
- func (p *Parser) ParseText(text string) error
type StringToken
- func NewEmptyToken() *StringToken
- func NewStringToken(k Kind, val string) *StringToken
- func (t *StringToken) HasMore() bool
- func (t *StringToken) MergeSame(_ Token) error
- func (t *StringToken) ScanMore(_ *TextScanner) error
type TextScanner
- func NewScanner(in any) *TextScanner
- func (s *TextScanner) AddKind(k Kind, name string)
- func (s *TextScanner) AddMatchers(ms ...Matcher)
- func (s *TextScanner) Each(fn func(t Token)) error
- func (s *TextScanner) Err() error
- func (s *TextScanner) Line() int
- func (s *TextScanner) PrevToken() Token
- func (s *TextScanner) Scan() bool
- func (s *TextScanner) ScanNext() (ok bool, text string)
- func (s *TextScanner) SetInput(in any)
- func (s *TextScanner) SetNext(text string)
- func (s *TextScanner) SetSplit(fn bufio.SplitFunc)
- func (s *TextScanner) Token() Token
type Token
type ValueToken
- func (t *ValueToken) Comment() string
- func (t *ValueToken) HasComment() bool
- func (t *ValueToken) HasMore() bool
- func (t *ValueToken) Key() string
- func (t *ValueToken) Mark() string
- func (t *ValueToken) MergeSame(_ Token) error
- func (t *ValueToken) ScanMore(ts *TextScanner) error
- func (t *ValueToken) String() string
- func (t *ValueToken) Value() string
- func (t *ValueToken) Values() []string

Examples ¶

NewScanner

Constants ¶

View Source

const (
	MultiLineValMarkS = "'''"
	MultiLineValMarkD = `"""`
	MultiLineValMarkH = "<<<" // heredoc at start. <<<TXT ... TXT
	MultiLineValMarkQ = "\\"  // at end. eg: properties contents
	MultiLineCmtEnd   = "*/"
)

define special chars consts

Variables ¶

View Source

var ErrCommentsNotEnd = errors.New("not end of multi-line comments")

ErrCommentsNotEnd error

View Source

var ErrMLineValueNotEnd = errors.New("not end of multi line value")

ErrMLineValueNotEnd error

Functions ¶

func AddKind ¶

func AddKind(k Kind, name string)

AddKind add global kind to kinds

func CommentsDetect ¶

func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)

CommentsDetect check.

- inlineChars: #

default match:

inline #, //
multi line: /*

func CommentsDetectEnd ¶

func CommentsDetectEnd(line string) bool

CommentsDetectEnd multi line comments end

func HasKind ¶

func HasKind(k Kind) bool

HasKind check

func IsKindToken ¶

func IsKindToken(k Kind, tok Token) bool

IsKindToken check

func KindString ¶

func KindString(k Kind) string

KindString name

Types ¶

type BaseToken ¶

type BaseToken struct {
	// contains filtered or unexported fields
}

BaseToken struct

func (*BaseToken) IsValid ¶

func (t *BaseToken) IsValid() bool

IsValid token

func (*BaseToken) Kind ¶

func (t *BaseToken) Kind() Kind

Kind type

func (*BaseToken) String ¶

func (t *BaseToken) String() string

String of token

func (*BaseToken) Value ¶

func (t *BaseToken) Value() string

Value of token

type CommentToken ¶

type CommentToken struct {
	BaseToken
	// contains filtered or unexported fields
}

CommentToken struct

func NewCommentToken ¶

func NewCommentToken(val string) *CommentToken

NewCommentToken instance.

func (*CommentToken) HasMore ¶

func (t *CommentToken) HasMore() bool

HasMore is multi line values

func (*CommentToken) MergeSame ¶

func (t *CommentToken) MergeSame(tok Token) error

MergeSame comments token

func (*CommentToken) ScanMore ¶

func (t *CommentToken) ScanMore(ts *TextScanner) error

ScanMore scan multi line values

func (*CommentToken) String ¶

func (t *CommentToken) String() string

String for token

func (*CommentToken) Value ¶

func (t *CommentToken) Value() string

Value fo token

type CommentsMatcher ¶

type CommentsMatcher struct {
	// InlineChars for match inline comments. default is: #
	InlineChars []byte
	// MatchFn for comments line
	// - mark 	useful on multi line comments
	MatchFn func(text string) (ok, more bool, err error)
	// DetectEnd for multi line comments
	DetectEnd func(text string) bool
}

CommentsMatcher match comments lines. will auto merge prev comments token

func (*CommentsMatcher) Match ¶

func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)

Match comments token

func (*CommentsMatcher) MatchEnd ¶

func (m *CommentsMatcher) MatchEnd(text string) bool

MatchEnd for multi line comments

type ErrScan ¶

type ErrScan struct {
	Msg  string // error message
	Line int    // error line number, start 1
	Text string // text contents on error
}

ErrScan error on scan or parse contents

func (ErrScan) Error ¶

func (e ErrScan) Error() string

Error string

type HandleFn ¶

type HandleFn func(t Token)

HandleFn for token

type KeyValueMatcher ¶

type KeyValueMatcher struct {
	// Separator string for split key and value, default is "="
	Separator string
	// MergeComments collect previous comments token to value token.
	// If set as True, on each s.Scan() please notice skip TokComments
	MergeComments bool
	// InlineComment parse and split inline comment
	InlineComment bool
	// DisableMultiLine value parse
	DisableMultiLine bool
	// KeyCheckFn set func check key string is valid
	KeyCheckFn func(key string) error
}

KeyValueMatcher match key-value token. Support parse `KEY=VALUE` line text contents.

func (*KeyValueMatcher) DetectEnd ¶

func (m *KeyValueMatcher) DetectEnd(mark, text string) (ok bool, val string)

DetectEnd for multi line value

func (*KeyValueMatcher) Match ¶

func (m *KeyValueMatcher) Match(text string, prev Token) (Token, error)

Match text line.

type Kind ¶

type Kind uint8

Kind type

const (
	TokInvalid Kind = iota
	TokKey
	TokValue
	TokComments
)

builtin defined kinds

func (Kind) String ¶

func (k Kind) String() string

String name for kind

type LiteToken ¶

type LiteToken interface {
	Kind() Kind
	Value() string
	IsValid() bool
}

LiteToken interface

type Matcher ¶

type Matcher interface {
	// Match text line by kind, if success returns a new Token
	Match(line string, prev Token) (tok Token, err error)
}

Matcher interface

type Parser ¶

type Parser struct {

	// Func for handle tokens
	Func HandleFn
	// contains filtered or unexported fields
}

Parser struct

func NewParser ¶

func NewParser(fn HandleFn) *Parser

NewParser instance

func (*Parser) AddMatchers ¶

func (p *Parser) AddMatchers(ms ...Matcher)

AddMatchers register token matchers

func (*Parser) Parse ¶

func (p *Parser) Parse(bs []byte) error

Parse input bytes

func (*Parser) ParseFrom ¶

func (p *Parser) ParseFrom(r io.Reader) error

ParseFrom input reader

func (*Parser) ParseText ¶

func (p *Parser) ParseText(text string) error

ParseText input string

type StringToken ¶

type StringToken struct {
	BaseToken
}

StringToken struct

func NewEmptyToken ¶

func NewEmptyToken() *StringToken

NewEmptyToken instance. Can use for want skip parse some contents

func NewStringToken ¶

func NewStringToken(k Kind, val string) *StringToken

NewStringToken instance.

func (*StringToken) HasMore ¶

func (t *StringToken) HasMore() bool

HasMore is multi line values

func (*StringToken) MergeSame ¶

func (t *StringToken) MergeSame(_ Token) error

MergeSame implements

func (*StringToken) ScanMore ¶

func (t *StringToken) ScanMore(_ *TextScanner) error

ScanMore implements

type TextScanner ¶

type TextScanner struct {
	// contains filtered or unexported fields
}

TextScanner struct.

func NewScanner ¶

func NewScanner(in any) *TextScanner

NewScanner instance

Example ¶

package main

import (
	"fmt"

	"github.com/gookit/goutil/strutil/textscan"
)

func main() {
	ts := textscan.NewScanner(`source code`)
	// add token matcher, can add your custom matcher
	ts.AddMatchers(
		&textscan.CommentsMatcher{
			InlineChars: []byte{'#'},
		},
		&textscan.KeyValueMatcher{
			MergeComments: true,
		},
	)

	// scan and parsing
	for ts.Scan() {
		tok := ts.Token()

		if !tok.IsValid() {
			continue
		}

		// Custom handle the parsed token
		if tok.Kind() == textscan.TokValue {
			vt := tok.(*textscan.ValueToken)
			fmt.Println(vt)
		}
	}

	if ts.Err() != nil {
		fmt.Println("ERROR:", ts.Err())
	}
}

Output:

func (*TextScanner) AddKind ¶

func (s *TextScanner) AddKind(k Kind, name string)

AddKind register new kind

func (*TextScanner) AddMatchers ¶

func (s *TextScanner) AddMatchers(ms ...Matcher)

AddMatchers register token matchers

func (*TextScanner) Each ¶

func (s *TextScanner) Each(fn func(t Token)) error

Each every token by given func

func (*TextScanner) Err ¶

func (s *TextScanner) Err() error

Err get

func (*TextScanner) Line ¶

func (s *TextScanner) Line() int

Line on current

func (*TextScanner) PrevToken ¶

func (s *TextScanner) PrevToken() Token

PrevToken get of previous scan.

func (*TextScanner) Scan ¶

func (s *TextScanner) Scan() bool

Scan source input and parsing. Can use Token() get current parsed token value

Usage:

ts := textscan.NewScanner(`source ...`)
for ts.Scan() {
	tok := ts.Token()
	// do something...
}
fmt.Println(ts.Err())

func (*TextScanner) ScanNext ¶

func (s *TextScanner) ScanNext() (ok bool, text string)

ScanNext advance and fetch next line text

func (*TextScanner) SetInput ¶

func (s *TextScanner) SetInput(in any)

SetInput for scan and parse

func (*TextScanner) SetNext ¶

func (s *TextScanner) SetNext(text string)

SetNext text for scan and parse

func (*TextScanner) SetSplit ¶

func (s *TextScanner) SetSplit(fn bufio.SplitFunc)

SetSplit set split func on scan

func (*TextScanner) Token ¶

func (s *TextScanner) Token() Token

Token get of current scan.

type Token ¶

type Token interface {
	LiteToken
	String() string
	// HasMore is multi line values
	HasMore() bool
	// ScanMore scan multi line values
	ScanMore(ts *TextScanner) error
	MergeSame(tok Token) error
}

Token parser

type ValueToken ¶

type ValueToken struct {
	BaseToken
	// contains filtered or unexported fields
}

ValueToken contains key and value contents

func (*ValueToken) Comment ¶

func (t *ValueToken) Comment() string

Comment lines string

func (*ValueToken) HasComment ¶

func (t *ValueToken) HasComment() bool

HasComment for the value

func (*ValueToken) HasMore ¶

func (t *ValueToken) HasMore() bool

HasMore is multi line values

func (*ValueToken) Key ¶

func (t *ValueToken) Key() string

Key name

func (*ValueToken) Mark ¶ added in v0.5.15

func (t *ValueToken) Mark() string

Mark for multi line values

func (*ValueToken) MergeSame ¶

func (t *ValueToken) MergeSame(_ Token) error

MergeSame comments token

func (*ValueToken) ScanMore ¶

func (t *ValueToken) ScanMore(ts *TextScanner) error

ScanMore scan multi line values

func (*ValueToken) String ¶

func (t *ValueToken) String() string

String of token

func (*ValueToken) Value ¶

func (t *ValueToken) Value() string

Value text string.

func (*ValueToken) Values ¶ added in v0.5.15

func (t *ValueToken) Values() []string

Values for multi line values

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

TextScan

Install

Examples

Projects using textscan

Documentation ¶

Overview ¶

Index ¶

Examples ¶

Constants ¶

Variables ¶

Functions ¶

func AddKind ¶

func CommentsDetect ¶

func CommentsDetectEnd ¶

func HasKind ¶

func IsKindToken ¶

func KindString ¶

Types ¶

type BaseToken ¶

func (*BaseToken) IsValid ¶

func (*BaseToken) Kind ¶

func (*BaseToken) String ¶

func (*BaseToken) Value ¶

type CommentToken ¶

func NewCommentToken ¶

func (*CommentToken) HasMore ¶

func (*CommentToken) MergeSame ¶

func (*CommentToken) ScanMore ¶

func (*CommentToken) String ¶

func (*CommentToken) Value ¶

type CommentsMatcher ¶

func (*CommentsMatcher) Match ¶

func (*CommentsMatcher) MatchEnd ¶

type ErrScan ¶

func (ErrScan) Error ¶

type HandleFn ¶

type KeyValueMatcher ¶

func (*KeyValueMatcher) DetectEnd ¶

func (*KeyValueMatcher) Match ¶

type Kind ¶

func (Kind) String ¶

type LiteToken ¶

type Matcher ¶

type Parser ¶

func NewParser ¶

func (*Parser) AddMatchers ¶

func (*Parser) Parse ¶

func (*Parser) ParseFrom ¶

func (*Parser) ParseText ¶

type StringToken ¶

func NewEmptyToken ¶

func NewStringToken ¶

func (*StringToken) HasMore ¶

func (*StringToken) MergeSame ¶

func (*StringToken) ScanMore ¶

type TextScanner ¶

func NewScanner ¶

func (*TextScanner) AddKind ¶

func (*TextScanner) AddMatchers ¶

func (*TextScanner) Each ¶

func (*TextScanner) Err ¶

func (*TextScanner) Line ¶

func (*TextScanner) PrevToken ¶

func (*TextScanner) Scan ¶

func (*TextScanner) ScanNext ¶

func (*TextScanner) SetInput ¶

func (*TextScanner) SetNext ¶

func (*TextScanner) SetSplit ¶

func (*TextScanner) Token ¶

type Token ¶

type ValueToken ¶

func (*ValueToken) Comment ¶

func (*ValueToken) HasComment ¶

func (*ValueToken) HasMore ¶

func (*ValueToken) Key ¶

func (*ValueToken) Mark ¶ added in v0.5.15

func (*ValueToken) MergeSame ¶

func (*ValueToken) ScanMore ¶

func (*ValueToken) String ¶

Projects using `textscan`