prattle

package module
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 27, 2023 License: ISC Imports: 5 Imported by: 0

README

Prattle

GoDoc Go Report Card Coverage Status

Overview

Package prattle implements a general purpose, unicode-aware lexical scanner and top down operator precedence parser suitable for parsing LL(1) grammars. The scanner and parser can be used independently from each other if desired.

Install

go get -u github.com/askeladdk/prattle

Quickstart

Use Scanner to produce a sequence of Tokens by scanning a source text using a ScanFunc. Use the Expect*, Skip and Advance methods to scan tokens.

// Prepare the scanner.
scanner := prattle.Scanner{
	// Scan scans the next token and returns its kind.
	Scan: func(s *prattle.Scanner) (kind int) {
		// Skip any whitespace.
		s.ExpectAny(unicode.IsSpace)
		s.Skip()

		// Scan the next token.
		switch {
		case s.Done(): // Stop when the entire input has been consumed.
			return 0
		case s.Expect('+'): // Scan the addition operator.
			return 1
		case s.ExpectOne(unicode.IsDigit): // Scan a number consisting of one or more digits.
			s.ExpectAny(unicode.IsDigit)
			return 2
		}

		// Invalid token.
		s.Advance()
		return -1
	},
}

Use Parser and Driver to associate tokens produced by Scanner with ParseFuncs. Define the Driver first.

// Define the parsing Driver.
type driver struct {
	stack []int
}

func (d *driver) push(i int) {
	d.stack = append(d.stack, i)
}

func (d *driver) pop() (i int) {
	n := len(d.stack)
	i, d.stack = d.stack[n-1], d.stack[:n-1]
	return
}

func (d *driver) number(p *prattle.Parser, t prattle.Token) error {
	n, _ := strconv.Atoi(t.Text)
	d.push(n)
	return nil
}

func (d *driver) add(p *prattle.Parser, t prattle.Token) error {
	// Parse the right hand operator.
	_ = p.Parse(d.Precedence(t.Kind))

	right := d.pop()
	left := d.pop()
	sum := left + right
	fmt.Printf("%d + %d = %d\n", left, right, sum)
	d.push(sum)
	return nil
}

func (d *driver) Prefix(kind int) prattle.ParseFunc {
	return d.number
}

func (d *driver) Infix(kind int) prattle.ParseFunc {
	return d.add
}

func (d *driver) Precedence(kind int) int {
	return kind
}

func (d *driver) ParseError(t prattle.Token) error {
	return fmt.Errorf("%s", t)
}

// Prepare the parser.
parser := prattle.Parser{
	Driver: &driver{},
}

Finally, Init the scanner and parser, and parse an expression.

source := "1 + 23 + 456 + 7890"
scanner.InitWithString(source)
parser.Init(&scanner).Parse(0)

// Output:
// 1 + 23 = 24
// 24 + 456 = 480
// 480 + 7890 = 8370

See the calculator for a complete example featuring prefix, postfix, infix, left, right and non-associative operators. Also read the documentation on pkg.go.dev.

License

Package prattle is released under the terms of the ISC license.

Documentation

Overview

Package prattle implements a general purpose, unicode-aware lexical scanner and top down operator precedence parser suitable for parsing LL(1) grammars. The scanner and parser can be used independently from each other if desired.

Example (Interpreter)

This example demonstrates parsing a simple programming language that consists of a sequence of statements.

package main

import (
	"fmt"
	"strconv"
	"unicode"

	"github.com/askeladdk/prattle"
)

const (
	ksemicolon = 1 + iota
	kassign
	kplus
	kident
	knumber
)

func testScan(s *prattle.Scanner) int {
	s.ExpectAny(unicode.IsSpace)
	s.Skip()

	switch {
	case s.Done():
		return 0
	case s.ExpectOne(unicode.IsLetter):
		s.ExpectAny(unicode.IsLetter)
		return kident
	case s.ExpectOne(unicode.IsDigit):
		s.ExpectAny(unicode.IsDigit)
		return knumber
	case s.Expect('='):
		return kassign
	case s.Expect('+'):
		return kplus
	case s.Expect(';'):
		return ksemicolon
	}

	s.Advance()
	return -1
}

type testDriver struct {
	stack  []prattle.Token
	idents map[string]int
}

func (d *testDriver) pop() (v prattle.Token) {
	if n := len(d.stack); n > 0 {
		v, d.stack = d.stack[n-1], d.stack[:n-1]
	}
	return v
}

func (d *testDriver) push(v prattle.Token) {
	d.stack = append(d.stack, v)
}

func (d *testDriver) Precedence(kind int) int {
	return kind
}

func (d *testDriver) tonumber(t prattle.Token) int {
	if t.Kind == kident {
		return d.idents[t.Text]
	}
	num, _ := strconv.Atoi(t.Text)
	return num
}

func (d *testDriver) plus(p *prattle.Parser, t prattle.Token) error {
	if err := p.Parse(d.Precedence(t.Kind)); err != nil {
		return err
	}
	right := d.tonumber(d.pop())
	left := d.tonumber(d.pop())
	d.push(prattle.Token{
		Kind: knumber,
		Text: strconv.Itoa(left + right),
	})
	return nil
}

func (d *testDriver) assign(p *prattle.Parser, t prattle.Token) error {
	if err := p.Parse(d.Precedence(kassign)); err != nil {
		return err
	}

	right := d.pop()
	left := d.pop()
	d.idents[left.Text] = d.tonumber(right)
	return nil
}

func (d *testDriver) primitive(p *prattle.Parser, t prattle.Token) error {
	d.push(t)
	return nil
}

func (d *testDriver) Prefix(kind int) prattle.ParseFunc {
	switch kind {
	default:
		return nil
	case kident, knumber:
		return d.primitive
	}
}

func (d *testDriver) Infix(kind int) prattle.ParseFunc {
	switch kind {
	default:
		return nil
	case kplus:
		return d.plus
	case kassign:
		return d.assign
	}
}

func (d *testDriver) ParseError(t prattle.Token) error {
	return fmt.Errorf("%s: unexpected '%s'", t.Position, t.Text)
}

// This example demonstrates parsing a simple programming language that consists of a sequence of statements.
func main() {
	c := testDriver{
		idents: make(map[string]int),
	}

	source := "a = 1;\nb = 2;\nc = a+b+b+a;\n"

	s := prattle.Scanner{Scan: testScan}
	p := prattle.Parser{Driver: &c}
	p.Init(s.InitWithString(source))

	// Parse expressions separated by semicolons.
	for p.Peek().Kind != 0 {
		if err := p.Parse(ksemicolon); err != nil {
			fmt.Println(err)
			return
		} else if !p.Expect(ksemicolon) {
			fmt.Println("expected semicolon")
			return
		}
	}

	fmt.Printf("c = %d\n", c.idents["c"])

}
Output:

c = 6
Example (Readme)

This example is described in the readme.

package main

import (
	"fmt"
	"strconv"
	"unicode"

	"github.com/askeladdk/prattle"
)

type driver struct {
	stack []int
}

func (d *driver) push(i int) {
	d.stack = append(d.stack, i)
}

func (d *driver) pop() (i int) {
	n := len(d.stack)
	i, d.stack = d.stack[n-1], d.stack[:n-1]
	return
}

func (d *driver) number(p *prattle.Parser, t prattle.Token) error {
	n, _ := strconv.Atoi(t.Text)
	d.push(n)
	return nil
}

func (d *driver) add(p *prattle.Parser, t prattle.Token) error {
	// First parse the right hand operator.
	if err := p.Parse(d.Precedence(t.Kind)); err != nil {
		return err
	}

	right := d.pop()
	left := d.pop()
	acc := left + right
	fmt.Printf("%d + %d = %d\n", left, right, acc)
	d.push(acc)
	return nil
}

func (d *driver) Prefix(k int) prattle.ParseFunc {
	if k == 2 {
		return d.number
	}
	return nil
}

func (d *driver) Infix(k int) prattle.ParseFunc {
	if k == 1 {
		return d.add
	}
	return nil
}

func (d *driver) Precedence(k int) int {
	return k
}

func (d *driver) ParseError(t prattle.Token) error {
	return fmt.Errorf("%s", t)
}

// This example is described in the readme.
func main() {
	scanner := prattle.Scanner{
		Scan: func(s *prattle.Scanner) int {
			// Skip whitespaces.
			s.ExpectAny(unicode.IsSpace)
			s.Skip()
			switch {
			case s.Done(): // Stop if the entire input has been consumed.
				return 0
			case s.Expect('+'):
				return 1
			case s.ExpectOne(unicode.IsDigit): // Read a number consisting of one or more digits.
				s.ExpectAny(unicode.IsDigit)
				return 2
			}

			s.Advance()
			return -1
		},
	}

	parser := prattle.Parser{
		Driver: &driver{},
	}

	source := "1 + 23 + 456 + 7890"
	scanner.InitWithString(source)
	if err := parser.Init(&scanner).Parse(0); err != nil {
		fmt.Println(err)
	}

}
Output:

1 + 23 = 24
24 + 456 = 480
480 + 7890 = 8370
Example (Sentence)

This example demonstrates tokenising a sentence into words and punctuation.

package main

import (
	"fmt"
	"unicode"

	"github.com/askeladdk/prattle"
)

func main() {
	scan := func(s *prattle.Scanner) int {
		// Skip any whitespace.
		s.ExpectAny(unicode.IsSpace)
		s.Skip()

		// Return an appropriate token kind.
		switch {
		case s.Done(): // EOF
			return 0
		case s.ExpectOne(unicode.IsLetter): // Word
			s.ExpectAny(unicode.IsLetter)
			return 1
		case s.ExpectOne(unicode.IsPunct): // Punctuation
			return 2
		}

		// Unrecognized character.
		s.Advance()
		return -1
	}

	sentence := "I love it when a plan comes together!"

	s := (&prattle.Scanner{
		Scan: scan,
	}).InitWithString(sentence)

	for tok, ok := s.Next(); ok; tok, ok = s.Next() {
		fmt.Printf("[%d] %s\n", tok.Kind, tok.Text)
	}

}
Output:

[1] I
[1] love
[1] it
[1] when
[1] a
[1] plan
[1] comes
[1] together
[2] !

Index

Examples

Constants

This section is empty.

Variables

View Source
var NonAssoc error = errors.New("non-associative operator")

NonAssoc is returned by infix ParseFuncs to indicate that an operator is non-associative.

Functions

This section is empty.

Types

type AcceptFunc

type AcceptFunc func(rune) bool

AcceptFunc accepts a rune.

func OneOf

func OneOf(chars string) AcceptFunc

OneOf returns an AcceptFunc that reports whether a rune appears in chars.

type Driver added in v0.0.3

type Driver interface {
	// Infix associates an infix ParseFunc with a token.
	// Returning nil is a parse error.
	Infix(kind int) ParseFunc

	// Prefix associates a prefix ParseFunc with a token.
	// Returning nil is a parse error.
	Prefix(kind int) ParseFunc

	// Precedence associates an operator precedence with a token.
	// The higher the precedence, the tighter the token binds.
	Precedence(kind int) (precedence int)

	// ParseError is called by the Parser when it encounters a token that it cannot parse.
	ParseError(Token) error
}

Driver drives the parsing algorithm by associating tokens to parser functions. It is expected to hold the parse state and results like the syntax tree.

type Iterator added in v0.0.6

type Iterator interface {
	// Next returns the next token in the stream
	// and yields true until the stream ends.
	Next() (token Token, ok bool)
}

Iterator represents a stream of tokens.

type ParseFunc

type ParseFunc func(*Parser, Token) error

ParseFunc parses an expression.

type Parser

type Parser struct {
	// Driver drives the Parser.
	Driver
	// contains filtered or unexported fields
}

Parser implements the Pratt parsing algorithm, also known as the top down operator precedence (TDOP) algorithm. This is a recursive descent algorithm that handles operator precedence in a simple and flexible manner.

Parser consumes tokens from an Iterator and uses a Driver to determine precedence and executing parsing functions.

func (*Parser) Advance

func (p *Parser) Advance()

Advance reads the next token from the Iterator.

func (*Parser) Expect

func (p *Parser) Expect(kind int) bool

Expect advances to the next token if the current token kind matches.

func (*Parser) Init

func (p *Parser) Init(iter Iterator) *Parser

Init initializes the Parser with an Iterator and returns it.

func (*Parser) Parse added in v0.0.7

func (p *Parser) Parse(least int) error

Parse parses using the TDOP algorithm until it encounters a token with an equal or lower precedence than least. It may be called in a mutual recursive manner by the parsing functions provided by the Driver.

func (*Parser) Peek

func (p *Parser) Peek() Token

Peek returns the last read token.

type Position

type Position struct {
	// Filename is the filename of the input, if any.
	Filename string

	// Offset is the byte offset, starting at 0.
	Offset int

	// Line is the line number, starting at 1.
	Line int

	// Column is the column number, starting at 1 (character count per line).
	Column int
}

Position represents the position of a Token in the input string.

func (Position) IsValid

func (p Position) IsValid() bool

IsValid reports whether a Position is valid. A Position is valid if its Line field is greater than zero.

func (Position) String

func (p Position) String() string

String implements fmt.Stringer.

type ScanFunc

type ScanFunc func(*Scanner) (kind int)

ScanFunc scans the next token and returns its kind. Zero is reserved to signal end-of-input.

type Scanner

type Scanner struct {
	// Position of the last read token.
	// The Filename field will never be modified by Scanner.
	Position

	// Scan scans tokens.
	Scan ScanFunc
	// contains filtered or unexported fields
}

Scanner produces a stream of tokens from a string or an io.RuneReader.

func (*Scanner) Advance

func (s *Scanner) Advance()

Advance advances the cursor by one rune.

func (*Scanner) Done

func (s *Scanner) Done() bool

Done reports whether the entire input has been consumed.

func (*Scanner) Expect

func (s *Scanner) Expect(r rune) bool

Expect advances the cursor if the current rune matches.

func (*Scanner) ExpectAny

func (s *Scanner) ExpectAny(accept AcceptFunc)

ExpectAny advances the cursor zero or more times.

func (*Scanner) ExpectOne

func (s *Scanner) ExpectOne(accept AcceptFunc) bool

ExpectOne advances the cursor if the current rune is accepted.

func (*Scanner) InitWithReader added in v0.0.4

func (s *Scanner) InitWithReader(r io.RuneReader) *Scanner

Init initializes a Scanner with an input reader and returns it. When initialized this way, the Scanner tokenizes unbounded streams but must allocate.

func (*Scanner) InitWithString added in v0.0.4

func (s *Scanner) InitWithString(source string) *Scanner

Init initializes a Scanner with an input string and returns it. When initialized this way, the Scanner tokenizes without allocations.

func (*Scanner) Next

func (s *Scanner) Next() (Token, bool)

Next implements Iterator.

func (*Scanner) Peek

func (s *Scanner) Peek() rune

Peek returns the current rune.

func (*Scanner) Skip

func (s *Scanner) Skip()

Skip swallows the next token.

func (*Scanner) Text added in v0.0.3

func (s *Scanner) Text() string

Text returns the token string that has been scanned so far.

type Token

type Token struct {
	Position

	// Kind identifies the kind of token.
	Kind int

	// Text is the token value.
	Text string
}

Token is a fragment of tokenised text.

func (Token) String

func (t Token) String() string

String implements fmt.Stringer.

Directories

Path Synopsis
_examples

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL