Documentation ¶
Overview ¶
Package parsekit implements a simple, reusable parser for simple grammars.
Example ¶
package main import ( "fmt" "net/netip" "strconv" "time" "unicode/utf8" "github.com/TroutSoftware/parsekit" ) func main() { p := parsekit.Init[Lease]( parsekit.ReadFiles("testdata/example_dhcp1"), parsekit.WithLexer(scantk), parsekit.SynchronizeAt("lease"), ) ParseLease(p) lease, err := p.Finish() if err != nil { fmt.Printf("cannot parse lease file: %s", err) return } fmt.Println(lease) } type Lease struct { Interface string FixedAddress netip.Addr Expire time.Time } func ParseLease(p *parsekit.Parser[Lease]) { defer p.Synchronize() var err error p.Expect(IdentToken, "lease") p.Expect('{', "opening bracket") for p.More() { if p.Match('}') { return } p.Expect(IdentToken, "option") opt := p.Lit switch opt { case "interface": p.Expect(StringToken, "interface") p.Value.Interface, err = strconv.Unquote(p.Lit) if err != nil { p.Errf("invalid interface name %q: %s", p.Lit, err) } p.Expect(';', ";") case "fixed-address": p.Expect(IPToken, "IP address") p.Value.FixedAddress, err = netip.ParseAddr(p.Lit) if err != nil { p.Errf("invalid IP address %q: %s", p.Lit, err) } p.Expect(';', ";") case "expire": p.Expect(NumberToken, "number") p.Expect(DateTimeToken, "date and time of expiration") p.Value.Expire, err = time.Parse("2006/01/02 15:04:05", p.Lit) if err != nil { p.Errf("invalid time of expiration %q: %s", p.Lit, err) } p.Expect(';', ";") default: for !p.Match(';') { p.Skip() } } } } const ( NumberToken rune = -1 - iota IPToken DateTimeToken IdentToken StringToken InvalidType ) func scantk(sc *parsekit.Scanner, tk rune) (rune, int) { switch { case tk == 0: return 0, 0 case tk == '{', tk == '}', tk == ';': return tk, 1 case tk == '"': return StringToken, sc.LexString() case '0' <= tk && tk <= '9': return scanumeral(sc, tk) default: return IdentToken, sc.LexIdent() } } // unrolled state machine // transitions generated by calling transgen.awk // TODO make this go:generatable func scanumeral(sc *parsekit.Scanner, lead rune) (rune, int) { const ( numeral uint8 = iota date time ip final ) var transitions = [final][]byte{ numeral: {' ', final, ';', final, '.', ip, '/', date, '0', numeral, '1', numeral, '2', numeral, '3', numeral, '4', numeral, '5', numeral, '6', numeral, '7', numeral, '8', numeral, '9', numeral}, ip: {':', ip, ' ', final, ';', final, '.', ip, '0', ip, '1', ip, '2', ip, '3', ip, '4', ip, '5', ip, '6', ip, '7', ip, '8', ip, '9', ip}, time: {':', time, ';', final, '0', time, '1', time, '2', time, '3', time, '4', time, '5', time, '6', time, '7', time, '8', time, '9', time}, date: {' ', time, '/', date, '0', date, '1', date, '2', date, '3', date, '4', date, '5', date, '6', date, '7', date, '8', date, '9', date}, } st, n := sc.ScanWithTable(transitions[:]) switch st { default: return InvalidType, n case numeral: return NumberToken, n case time: return DateTimeToken, n case ip: return IPToken, n + utf8.RuneLen(lead) - 1 // drop space or training ; } }
Output: {eth0 10.67.21.85 2023-11-03 11:27:26 +0000 UTC}
Index ¶
- Constants
- type Lexer
- type Parser
- func (p *Parser[T]) Errf(format string, args ...any)
- func (p *Parser[T]) Expect(tk rune, msg string)
- func (p *Parser[T]) Finish() (T, error)
- func (p *Parser[T]) Match(tk ...rune) bool
- func (p *Parser[T]) More() bool
- func (p *Parser[T]) Skip()
- func (p *Parser[T]) Synchronize()
- func (p *Parser[T]) Unread(n int)
- type ParserOptions
- type Position
- type Scanner
Examples ¶
Constants ¶
const CatchAll = 0
CatchAll is a well-known transition that get applied if no other transition matches.
const ErrLit = "<error>"
ErrLit is the literal value set after a failed call to Parser.Expect
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Lexer ¶
Lexer is a function to create tokens from a scanner. lead is the first unicode point of the current token. By convention, single-character tokens are represented by their own value (e.g. '{' -> U007B), while multiple-character tokens are represented by negative runes (cf the package example).
type Parser ¶
type Parser[T any] struct { Lit string // token literal Value T // contains filtered or unexported fields }
Parser implements a recursive descent parser. It provides facilities for error reporting, peeking, …
func Init ¶
func Init[T any](opts ...ParserOptions) *Parser[T]
Init creates a new parser. At least two options must be provided: (1) a reader, and (2) a lexer function. Further options (e.g. SynchronizeAt)
func (*Parser[T]) Errf ¶
Errf triggers a panic mode with the given formatted error. The position is correctly attached to the error.
func (*Parser[T]) Expect ¶
Expects advances the parser to the next input, making sure it matches the token tk.
func (*Parser[T]) Finish ¶
Finish returns the value, and error of the parsing. This make it convenient to use at the bottom of a function:
func ReadConfigFiles() (MyStruct, error) { p := Init(ReadFiles(xxx), Lexer(yyy)) parseConfig(p) return p.Finish() }
func (*Parser[T]) Match ¶
Match returns true if tk is found at the current parsing point. It does not consume any input on failure, so can be used in a test.
func (*Parser[T]) Synchronize ¶
func (p *Parser[T]) Synchronize()
Synchronize handles error recovery in the parsing process: when an error occurs, the parser panics all the way to the Parser.Synchronize function. All tokens are thrown until the first of lits is found
Run this in a top-level `defer` statement in at the level of the synchronisation elements.
type ParserOptions ¶
type ParserOptions func(*emb)
ParserOptions specialize the behavior of the parser.
func ReadFiles ¶
func ReadFiles(docs ...string) ParserOptions
ReadFiles is an option to specify which files are to be parsed
func ReadFrom ¶
func ReadFrom(in io.Reader) ParserOptions
ReadFrom is an option to specify to read from an existing reader (e.g. stdin)
func SynchronizeAt ¶
func SynchronizeAt(lits ...string) ParserOptions
SynchronizeAt sets the synchronisation literals for error recovery. See Parser.Synchronize for full documentation.
func Verbose ¶
func Verbose() ParserOptions
func WithLexer ¶
func WithLexer(lx Lexer) ParserOptions
WithLexer options sets the lexer used by the parser
type Position ¶
type Position struct { Filename string // filename, if any Offset int // byte offset, starting at 0 Line int // line number, starting at 1 Column int // column number, starting at 1 (character count per line) }
Position is a value that represents a source position. A position is valid if Line > 0.
type Scanner ¶
type Scanner struct {
// contains filtered or unexported fields
}
Scanner reads tokens from a stream of multiple files. It efficiently tracks position information.
func ScanFiles ¶
ScanFiles create a scanner over files with names. Files are scanned in the order they are given in, and no token can span two files.
func ScanReader ¶
func ScanReader(in io.ReadCloser) *Scanner
func (*Scanner) LexIdent ¶
LexIdent returns the number of characters in the next identifier value. Identifier are recognized as characters (a-zA-Z ASCII) and underscore or dash.
func (*Scanner) LexString ¶
LexString returns the number of characters in the next string value. Strings are delimited between double quotes, single quotes and backtick, and support \" escaping. This lexer assumes the first character of the string (the initial ") has not yet been consumed.
func (*Scanner) ScanWithTable ¶
ScanWithTable uses the state transitions table to read the next characters. A transition table consists, for each state, of the pair (token, next_state). Transitions table can be constructed from a readable textual definition using the transgen.awk script.