textscan

package
v0.6.15 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 4, 2024 License: MIT Imports: 10 Imported by: 3

README

TextScan

Package textscan implements text scanner for quickly parse text contents. Can use for parse like INI, Properties format contents.

Install

go get github.com/gookit/goutil/strutil/textscan

Examples

package main

import (
	"fmt"

	"github.com/gookit/goutil/dump"
	"github.com/gookit/goutil/strutil/textscan"
	"github.com/gookit/goutil/testutil/assert"
)

func main() {
	ts := textscan.TextScanner{}
	ts.AddMatchers(
		&textscan.CommentsMatcher{},
		&textscan.KeyValueMatcher{},
	)

	ts.SetInput(`
# comments 1
name = inhere

// comments 2
age = 28

/*
multi line
comments 3
*/
desc = '''
a multi
line string
'''
`)

	data := make(map[string]string)
	err := ts.Each(func(t textscan.Token) {
		fmt.Println("====> Token kind:", t.Kind())
		fmt.Println(t.String())

		if t.Kind() == textscan.TokValue {
			v := t.(*textscan.ValueToken)
			data[v.Key()] = v.Value()
		}
	})

	dump.P(data, err)
}

Output:

====> Token kind: Comments
# comments 1
====> Token kind: Value
key: name
value: "inhere"
comments: 
====> Token kind: Comments
// comments 2
====> Token kind: Value
key: age
value: "28"
comments: 
====> Token kind: Comments
/*
multi line
comments 3
*/
====> Token kind: Value
key: desc
value: "\n\na multi\nline string\n"
comments: 

==== Collected data:
map[string]string { #len=3
  "desc": string("
a multi
line string
"), #len=22
  "name": string("inhere"), #len=6
  "age": string("28"), #len=2
},

Projects using textscan

textscan is used in these projects:

Documentation

Overview

Package textscan Implemented a parser that quickly scans and analyzes text content. It can be used to parse INI, Properties and other formats

Index

Examples

Constants

View Source
const (
	MultiLineValMarkS = "'''"
	MultiLineValMarkD = `"""`
	MultiLineValMarkH = "<<<" // heredoc at start. <<<TXT ... TXT
	MultiLineValMarkQ = "\\"  // at end. eg: properties contents
	MultiLineCmtEnd   = "*/"
)

define special chars consts

Variables

View Source
var ErrCommentsNotEnd = errors.New("not end of multi-line comments")

ErrCommentsNotEnd error

View Source
var ErrMLineValueNotEnd = errors.New("not end of multi line value")

ErrMLineValueNotEnd error

Functions

func AddKind

func AddKind(k Kind, name string)

AddKind add global kind to kinds

func CommentsDetect

func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)

CommentsDetect check.

- inlineChars: #

default match:

  • inline #, //
  • multi line: /*

func CommentsDetectEnd

func CommentsDetectEnd(line string) bool

CommentsDetectEnd multi line comments end

func HasKind

func HasKind(k Kind) bool

HasKind check

func IsKindToken

func IsKindToken(k Kind, tok Token) bool

IsKindToken check

func KindString

func KindString(k Kind) string

KindString name

Types

type BaseToken

type BaseToken struct {
	// contains filtered or unexported fields
}

BaseToken struct

func (*BaseToken) IsValid

func (t *BaseToken) IsValid() bool

IsValid token

func (*BaseToken) Kind

func (t *BaseToken) Kind() Kind

Kind type

func (*BaseToken) String

func (t *BaseToken) String() string

String of token

func (*BaseToken) Value

func (t *BaseToken) Value() string

Value of token

type CommentToken

type CommentToken struct {
	BaseToken
	// contains filtered or unexported fields
}

CommentToken struct

func NewCommentToken

func NewCommentToken(val string) *CommentToken

NewCommentToken instance.

func (*CommentToken) HasMore

func (t *CommentToken) HasMore() bool

HasMore is multi line values

func (*CommentToken) MergeSame

func (t *CommentToken) MergeSame(tok Token) error

MergeSame comments token

func (*CommentToken) ScanMore

func (t *CommentToken) ScanMore(ts *TextScanner) error

ScanMore scan multi line values

func (*CommentToken) String

func (t *CommentToken) String() string

String for token

func (*CommentToken) Value

func (t *CommentToken) Value() string

Value fo token

type CommentsMatcher

type CommentsMatcher struct {
	// InlineChars for match inline comments. default is: #
	InlineChars []byte
	// MatchFn for comments line
	// - mark 	useful on multi line comments
	MatchFn func(text string) (ok, more bool, err error)
	// DetectEnd for multi line comments
	DetectEnd func(text string) bool
}

CommentsMatcher match comments lines. will auto merge prev comments token

func (*CommentsMatcher) Match

func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)

Match comments token

func (*CommentsMatcher) MatchEnd

func (m *CommentsMatcher) MatchEnd(text string) bool

MatchEnd for multi line comments

type ErrScan

type ErrScan struct {
	Msg  string // error message
	Line int    // error line number, start 1
	Text string // text contents on error
}

ErrScan error on scan or parse contents

func (ErrScan) Error

func (e ErrScan) Error() string

Error string

type HandleFn

type HandleFn func(t Token)

HandleFn for token

type KeyValueMatcher

type KeyValueMatcher struct {
	// Separator string for split key and value, default is "="
	Separator string
	// MergeComments collect previous comments token to value token.
	// If set as True, on each s.Scan() please notice skip TokComments
	MergeComments bool
	// InlineComment parse and split inline comment
	InlineComment bool
	// DisableMultiLine value parse
	DisableMultiLine bool
	// KeyCheckFn set func check key string is valid
	KeyCheckFn func(key string) error
}

KeyValueMatcher match key-value token. Support parse `KEY=VALUE` line text contents.

func (*KeyValueMatcher) DetectEnd

func (m *KeyValueMatcher) DetectEnd(mark, text string) (ok bool, val string)

DetectEnd for multi line value

func (*KeyValueMatcher) Match

func (m *KeyValueMatcher) Match(text string, prev Token) (Token, error)

Match text line.

type Kind

type Kind uint8

Kind type

const (
	TokInvalid Kind = iota
	TokKey
	TokValue
	TokComments
)

builtin defined kinds

func (Kind) String

func (k Kind) String() string

String name for kind

type LiteToken

type LiteToken interface {
	Kind() Kind
	Value() string
	IsValid() bool
}

LiteToken interface

type Matcher

type Matcher interface {
	// Match text line by kind, if success returns a new Token
	Match(line string, prev Token) (tok Token, err error)
}

Matcher interface

type Parser

type Parser struct {

	// Func for handle tokens
	Func HandleFn
	// contains filtered or unexported fields
}

Parser struct

func NewParser

func NewParser(fn HandleFn) *Parser

NewParser instance

func (*Parser) AddMatchers

func (p *Parser) AddMatchers(ms ...Matcher)

AddMatchers register token matchers

func (*Parser) Parse

func (p *Parser) Parse(bs []byte) error

Parse input bytes

func (*Parser) ParseFrom

func (p *Parser) ParseFrom(r io.Reader) error

ParseFrom input reader

func (*Parser) ParseText

func (p *Parser) ParseText(text string) error

ParseText input string

type StringToken

type StringToken struct {
	BaseToken
}

StringToken struct

func NewEmptyToken

func NewEmptyToken() *StringToken

NewEmptyToken instance. Can use for want skip parse some contents

func NewStringToken

func NewStringToken(k Kind, val string) *StringToken

NewStringToken instance.

func (*StringToken) HasMore

func (t *StringToken) HasMore() bool

HasMore is multi line values

func (*StringToken) MergeSame

func (t *StringToken) MergeSame(_ Token) error

MergeSame implements

func (*StringToken) ScanMore

func (t *StringToken) ScanMore(_ *TextScanner) error

ScanMore implements

type TextScanner

type TextScanner struct {
	// contains filtered or unexported fields
}

TextScanner struct.

func NewScanner

func NewScanner(in any) *TextScanner

NewScanner instance

Example
package main

import (
	"fmt"

	"github.com/gookit/goutil/strutil/textscan"
)

func main() {
	ts := textscan.NewScanner(`source code`)
	// add token matcher, can add your custom matcher
	ts.AddMatchers(
		&textscan.CommentsMatcher{
			InlineChars: []byte{'#'},
		},
		&textscan.KeyValueMatcher{
			MergeComments: true,
		},
	)

	// scan and parsing
	for ts.Scan() {
		tok := ts.Token()

		if !tok.IsValid() {
			continue
		}

		// Custom handle the parsed token
		if tok.Kind() == textscan.TokValue {
			vt := tok.(*textscan.ValueToken)
			fmt.Println(vt)
		}
	}

	if ts.Err() != nil {
		fmt.Println("ERROR:", ts.Err())
	}
}
Output:

func (*TextScanner) AddKind

func (s *TextScanner) AddKind(k Kind, name string)

AddKind register new kind

func (*TextScanner) AddMatchers

func (s *TextScanner) AddMatchers(ms ...Matcher)

AddMatchers register token matchers

func (*TextScanner) Each

func (s *TextScanner) Each(fn func(t Token)) error

Each every token by given func

func (*TextScanner) Err

func (s *TextScanner) Err() error

Err get

func (*TextScanner) Line

func (s *TextScanner) Line() int

Line on current

func (*TextScanner) PrevToken

func (s *TextScanner) PrevToken() Token

PrevToken get of previous scan.

func (*TextScanner) Scan

func (s *TextScanner) Scan() bool

Scan source input and parsing. Can use Token() get current parsed token value

Usage:

ts := textscan.NewScanner(`source ...`)
for ts.Scan() {
	tok := ts.Token()
	// do something...
}
fmt.Println(ts.Err())

func (*TextScanner) ScanNext

func (s *TextScanner) ScanNext() (ok bool, text string)

ScanNext advance and fetch next line text

func (*TextScanner) SetInput

func (s *TextScanner) SetInput(in any)

SetInput for scan and parse

func (*TextScanner) SetNext

func (s *TextScanner) SetNext(text string)

SetNext text for scan and parse

func (*TextScanner) SetSplit

func (s *TextScanner) SetSplit(fn bufio.SplitFunc)

SetSplit set split func on scan

func (*TextScanner) Token

func (s *TextScanner) Token() Token

Token get of current scan.

type Token

type Token interface {
	LiteToken
	String() string
	// HasMore is multi line values
	HasMore() bool
	// ScanMore scan multi line values
	ScanMore(ts *TextScanner) error
	MergeSame(tok Token) error
}

Token parser

type ValueToken

type ValueToken struct {
	BaseToken
	// contains filtered or unexported fields
}

ValueToken contains key and value contents

func (*ValueToken) Comment

func (t *ValueToken) Comment() string

Comment lines string

func (*ValueToken) HasComment

func (t *ValueToken) HasComment() bool

HasComment for the value

func (*ValueToken) HasMore

func (t *ValueToken) HasMore() bool

HasMore is multi line values

func (*ValueToken) Key

func (t *ValueToken) Key() string

Key name

func (*ValueToken) Mark added in v0.5.15

func (t *ValueToken) Mark() string

Mark for multi line values

func (*ValueToken) MergeSame

func (t *ValueToken) MergeSame(_ Token) error

MergeSame comments token

func (*ValueToken) ScanMore

func (t *ValueToken) ScanMore(ts *TextScanner) error

ScanMore scan multi line values

func (*ValueToken) String

func (t *ValueToken) String() string

String of token

func (*ValueToken) Value

func (t *ValueToken) Value() string

Value text string.

func (*ValueToken) Values added in v0.5.15

func (t *ValueToken) Values() []string

Values for multi line values

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL