parser

package module
v0.0.0-...-faed923 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 4, 2018 License: BSD-3-Clause Imports: 2 Imported by: 8

README

go_parser

Parser API in Go

ABOUT

The 'go_parser' package is an API to help you create hand-written lexers and parsers.

The package was inspired by Rob Pikes' video Lexical Scanning In Go and golang's 'template' package.

PARSER INTERFACE

Below is the interface for the main Parser type:

// Parser helps you process lexer tokens
type Parser interface {

	// PeekTokenType allows you to look ahead at tokens without consuming them
	PeekTokenType(int) lexer.TokenType

	// PeekToken allows you to look ahead at tokens without consuming them
	PeekToken(int) *lexer.Token

	// NextToken consumes and returns the next token
	NextToken() *lexer.Token

	// SkipToken consumes the next token without returning it
	SkipToken()

	// SkipTokens consumes the next n tokens without returning them
	SkipTokens(int)

	// BackupToken un-consumes the last token
	BackupToken()

	// BackupTokens un-consumes the last n tokens
	BackupTokens(int)

	// ClearTokens clears all consumed tokens
	ClearTokens()

	// Emit emits an object, consuming matched tokens
	Emit(interface{})

	EOF() bool

	// Next retrieves the next emitted item
	Next() interface{}

	// Marker returns a marker that you can use to reset the parser state later
	Marker() *Marker

	// Reset resets the lexer state to the specified marker
	Reset(*Marker)
}

EXAMPLE

Below is a sample calculator program that uses the parser (and lexer) API:

//
//	calc.go implements a simple calculator using the iNamik lexer and parser api.
//
//	Input is read from STDIN
//
//	The input expression is matched against the following pattern:
//
//	input_exp:
//	( id '=' )? general_exp
//	general_exp:
//		operand ( operator operand )?
//	operand:
//		number | id | '(' general_exp ')'
//	operator:
//		'+' | '-' | '*' | '/'
//	number:
//		digit+ ( '.' digit+ )?
//	digit:
//		['0'..'9']
//	id:
//		alpha ( alpha | digit )*
//	alpha:
//		['a'..'z'] | ['A'..'Z']
//
//	Precedence is as expected, with '*' and '/' have higher precedence
//	than '+' and '-', as follows:
//
//	1 + 2 * 3 - 4 / 5  ==  1 + (2 * 3) - (4 / 5)
//

package main

import (
	"bufio"
	"bytes"
	"fmt"
	"os"
	"strconv"
	"strings"
)
import (
	"github.com/iNamik/go_lexer"
	"github.com/iNamik/go_lexer/rangeutil"
	"github.com/iNamik/go_parser"
)

// We define our lexer tokens starting from the pre-defined EOF token
const (
	T_EOF lexer.TokenType = lexer.TokenTypeEOF
	T_NIL                 = lexer.TokenTypeEOF + iota
	T_ID
	T_NUMBER
	T_PLUS
	T_MINUS
	T_MULTIPLY
	T_DIVIDE
	T_EQUALS
	T_OPEN_PAREN
	T_CLOSE_PAREN
)

// To store variables
var vars = map[string]float64{}

// Single-character tokens
var singleChars = []byte{'+', '-', '*', '/', '=', '(', ')'}

var singleTokens = []lexer.TokenType{T_PLUS, T_MINUS, T_MULTIPLY, T_DIVIDE, T_EQUALS, T_OPEN_PAREN, T_CLOSE_PAREN}

// Multi-character tokens
var bytesWhitespace = []byte{' ', '\t'}

var bytesDigits = rangeutil.RangeToBytes("0-9")

var bytesAlpha = rangeutil.RangeToBytes("a-zA-Z")

var bytesAlphaNum = rangeutil.RangeToBytes("0-9a-zA-Z")

// main
func main() {
	// Create a buffered reader from STDIN
	stdin := bufio.NewReader(os.Stdin)

	for {
		// Read a line of input
		input, _, err := stdin.ReadLine()

		// Error? we're done
		if nil != err {
			break
		}

		// Anything to process?
		if len(input) > 0 {
			// Create a new lexer to turn the input text into tokens
			l := lexer.New(lex, strings.NewReader(string(input)), len(input), 2)

			// Create a new parser that feeds off the lexer and generates expression values
			p := parser.New(parse, l, 2)

			// Loop over parser emits
			for i := p.Next(); nil != i; i = p.Next() {
				fmt.Printf("%v\n", i)
			}
		}
	}
}

// lex is the starting (and only) StateFn for lexing the input into tokens
func lex(l lexer.Lexer) lexer.StateFn {

	// EOF
	if l.MatchEOF() {
		l.EmitEOF()
		return nil // We're done here
	}

	// Single-char token?
	if i := bytes.IndexRune(singleChars, l.PeekRune(0)); i >= 0 {
		l.NextRune()
		l.EmitToken(singleTokens[i])
		return lex
	}

	switch {

	// Skip whitespace
	case l.MatchOneOrMoreBytes(bytesWhitespace):
		l.IgnoreToken()

	// Number
	case l.MatchOneOrMoreBytes(bytesDigits):
		if l.PeekRune(0) == '.' {
			l.NextRune() // skip '.'
			if !l.MatchOneOrMoreBytes(bytesDigits) {
				printError(l.Column(), "Illegal number format - Missing digits after '.'")
				l.IgnoreToken()
				break
			}
		}
		l.EmitTokenWithBytes(T_NUMBER)

	// ID
	case l.MatchOneBytes(bytesAlpha) && l.MatchZeroOrMoreBytes(bytesAlphaNum):
		l.EmitTokenWithBytes(T_ID)

	// Unknown
	default:
		l.NextRune()
		printError(l.Column(), "Unknown Character")
		l.IgnoreToken()
	}

	// See you again soon!
	return lex
}

// parse tries to execute a general expression from the lexed tokens.
// Returns nil - We only take one pass at the input string
func parse(p parser.Parser) parser.StateFn {

	if p.PeekTokenType(0) != T_EOF {
		// Assignment ( id = general_expression )
		if p.PeekTokenType(0) == T_ID && p.PeekTokenType(1) == T_EQUALS {
			tId := p.NextToken()

			p.SkipToken() // skip '='

			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
					printError(t.Column(), "Expecting operator")
				} else {
					id := string(tId.Bytes())
					vars[id] = val
				}
			}
			// General expression
		} else {
			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
					printError(t.Column(), "Expecting operator")
				} else {
					p.Emit(val)
				}
			}
		}
	}

	p.ClearTokens()
	p.Emit(nil) // We're done - One pass only

	return nil
}

// pGeneralExpression is the starting point for parsing a General Expression.
// It is basically a pass-through to pAdditiveExpression, but it feels cleaner
func pGeneralExpression(p parser.Parser) (f float64, ok bool) {
	return pAdditiveExpression(p)
}

// pAdditiveExpression parses [ expression ( ( '+' | '-' ) expression )? ]
func pAdditiveExpression(p parser.Parser) (f float64, ok bool) {

	f, ok = pMultiplicitiveExpression(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

		// Add (+)
		case T_PLUS:
			r, ok := pAdditiveExpression(p)
			if ok {
				f += r
			}

		// Subtract (-)
		case T_MINUS:
			r, ok := pAdditiveExpression(p)
			if ok {
				f -= r
			}

		// Unknown - Send it back upstream
		default:
			p.BackupToken()
			ok = true
		}
	}

	return
}

// pMultiplicitiveExpression parses [ expression ( ( '*' | '/' ) expression )? ]
func pMultiplicitiveExpression(p parser.Parser) (f float64, ok bool) {
	f, ok = pOperand(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

		// Multiply (*)
		case T_MULTIPLY:
			r, ok := pMultiplicitiveExpression(p)
			if ok {
				f *= r
			}

		// Divide (/)
		case T_DIVIDE:
			r, ok := pMultiplicitiveExpression(p)
			if ok {
				f /= r
			}

		// Unknown - Send it back upstream
		default:
			p.BackupToken()
			ok = true
		}
	}

	return
}

// pOperand parses [ id | number | '(' expression ')' ]
func pOperand(p parser.Parser) (f float64, ok bool) {
	var err error

	m := p.Marker()
	t := p.NextToken()

	switch t.Type() {

	// ID
	case T_ID:
		var id = string(t.Bytes())
		f, ok = vars[id]
		if !ok {
			printError(t.Column(), fmt.Sprint("id '", id, "' not defined"))
			f = 0.0
		}

	// Number
	case T_NUMBER:
		f, err = strconv.ParseFloat(string(t.Bytes()), 64)
		ok = nil == err
		if !ok {
			printError(t.Column(), fmt.Sprint("Error reading number: ", err.Error()))
			f = 0.0
		}

	// '(' Expresson ')'
	case T_OPEN_PAREN:
		f, ok = pGeneralExpression(p)
		if ok {
			t2 := p.NextToken()
			if t2.Type() != T_CLOSE_PAREN {
				printError(t.Column(), "Unbalanced Paren")
				ok = false
				f = 0.0
			}
		}

	// EOF
	case T_EOF:
		printError(t.Column(), "Unexpected EOF - Expecting operand")
		ok = false
		f = 0.0

	// Unknown
	default:
		printError(t.Column(), "Expecting operand")
		ok = false
		f = 0.0
	}

	if !ok {
		p.Reset(m)
	}

	return
}

// printError prints an error msg pointing to the specified column of the input.
func printError(col int, msg string) {
	fmt.Print(strings.Repeat(" ", col-1), "^ ", msg, "\n")
}

INSTALL

The package is built using the Go tool. Assuming you have correctly set the $GOPATH variable, you can run the folloing command:

go get github.com/iNamik/go_parser

DEPENDENCIES

go_parser depends on the iNamik go_container queue package and the iNamik go_lexer package:

AUTHORS

  • David Farrell

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Marker

type Marker struct {
	// contains filtered or unexported fields
}

Marker stores the state of the parser to allow rewinding

type Parser

type Parser interface {

	// Line returns the line number of the next token (aka PeekTotenType(0).Line() )
	Line() int

	// Column returns the column number of the next token (aka PeekTokenType(0).column() )
	Column() int

	// PeekTokenType allows you to look ahead at tokens without consuming them
	PeekTokenType(int) lexer.TokenType

	// PeekToken allows you to look ahead at tokens without consuming them
	PeekToken(int) *lexer.Token

	// NextToken consumes and returns the next token
	NextToken() *lexer.Token

	// SkipToken consumes the next token without returning it
	SkipToken()

	// SkipTokens consumes the next n tokens without returning them
	SkipTokens(int)

	// BackupToken un-consumes the last token
	BackupToken()

	// BackupTokens un-consumes the last n tokens
	BackupTokens(int)

	// ClearTokens clears all consumed tokens
	ClearTokens()

	// Emit emits an object, consuming matched tokens
	Emit(interface{})

	EOF() bool

	// Next retrieves the next emitted item
	Next() interface{}

	// Marker returns a marker that you can use to reset the parser state later
	Marker() *Marker

	// Reset resets the lexer state to the specified marker
	Reset(*Marker)
}

Parser helps you process lexer tokens

func New

func New(startState StateFn, lex lexer.Lexer, channelCap int) Parser

New returns a new Parser object

type StateFn

type StateFn func(Parser) StateFn

StateFn represents the state of the parser as a function that returns the next state.

Directories

Path Synopsis
examples

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL