parser

package
v1.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 21, 2020 License: MIT Imports: 10 Imported by: 0

Documentation

Overview

Package parser contains the Rumble parser.

Lexer

Lex() is a lexer function to convert a given source file into a list of tokens.

Based on a talk by Rob Pike: Lexical Scanning in Go

https://www.youtube.com/watch?v=HxaD_trXwRE

Parser

Parse() is a parser which produces a parse tree from a given set of lexer tokens.

Based on an article by Douglas Crockford: Top Down Operator Precedence

http://javascript.crockford.com/tdop/tdop.html

which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence

http://portal.acm.org/citation.cfm?id=512931 https://tdop.github.io/

Index

Constants

View Source
const (
	NodeEOF = "EOF"

	NodeVALUE = "value" // Simple value

	NodeSTATEMENTS = "statements" // List of statements
	NodeLIST       = "list"       // List value
	NodeMAP        = "map"        // Map value
	NodeGUARD      = "guard"      // Guard expressions for conditional statements

	NodeMAPENTRY = "entry" // Map entry value

	NodeOR  = "or"
	NodeAND = "and"
	NodeNOT = "not"

	NodeLIKE       = "like"
	NodeIN         = "in"
	NodeBEGINSWITH = "beginswith"
	NodeENDSWITH   = "endswith"
	NodeNOTIN      = "notin"

	NodeGEQ = ">="
	NodeLEQ = "<="
	NodeNEQ = "!="
	NodeEQ  = "=="
	NodeGT  = ">"
	NodeLT  = "<"

	NodeTRUE  = "true"
	NodeFALSE = "false"
	NodeNULL  = "null"

	NodePLUS   = "plus"
	NodeMINUS  = "minus"
	NodeTIMES  = "times"
	NodeDIV    = "div"
	NodeMODINT = "modint"
	NodeDIVINT = "divint"

	NodeASSIGN = ":="

	NodeFUNCCALL = "funccall"

	NodeACCESS = "access"

	NodeSINK       = "sink"
	NodeKINDMATCH  = "kindmatch"
	NodeSCOPEMATCH = "scopematch"
	NodeSTATEMATCH = "statematch"
	NodePRIORITY   = "priority"
	NodeSUPPRESSES = "suppresses"

	NodeCOND = "cond"
	NodeLOOP = "loop"

	NodeBREAK    = "break"
	NodeCONTINUE = "continue"
)

Available parser AST node types

View Source
const RuneEOF = -1

RuneEOF is a special rune which represents the end of the input

Variables

View Source
var (
	ErrUnexpectedEnd            = errors.New("Unexpected end")
	ErrLexicalError             = errors.New("Lexical error")
	ErrUnknownToken             = errors.New("Unknown term")
	ErrImpossibleNullDenotation = errors.New("Term cannot start an expression")
	ErrImpossibleLeftDenotation = errors.New("Term can only start an expression")
	ErrUnexpectedToken          = errors.New("Unexpected term")
)

Parser related error types

View Source
var KeywordMap = map[string]LexTokenID{

	"sink":       TokenSINK,
	"kindmatch":  TokenKINDMATCH,
	"scopematch": TokenSCOPEMATCH,
	"statematch": TokenSTATEMATCH,
	"priority":   TokenPRIORITY,
	"suppresses": TokenSUPPRESSES,

	"and": TokenAND,
	"or":  TokenOR,
	"not": TokenNOT,

	"like":       TokenLIKE,
	"in":         TokenIN,
	"beginswith": TokenBEGINSWITH,
	"endswith":   TokenENDSWITH,
	"notin":      TokenNOTIN,

	"false": TokenFALSE,
	"true":  TokenTRUE,
	"null":  TokenNULL,

	"if":   TokenIF,
	"elif": TokenELIF,
	"else": TokenELSE,

	"for":      TokenFOR,
	"break":    TokenBREAK,
	"continue": TokenCONTINUE,
}

KeywordMap is a map of keywords - these require spaces between them

View Source
var SymbolMap = map[string]LexTokenID{

	">=": TokenGEQ,
	"<=": TokenLEQ,
	"!=": TokenNEQ,
	"==": TokenEQ,
	">":  TokenGT,
	"<":  TokenLT,

	"(": TokenLPAREN,
	")": TokenRPAREN,
	"[": TokenLBRACK,
	"]": TokenRBRACK,
	"{": TokenLBRACE,
	"}": TokenRBRACE,

	",": TokenCOMMA,
	":": TokenCOLON,
	";": TokenSEMICOLON,

	"+":  TokenPLUS,
	"-":  TokenMINUS,
	"*":  TokenTIMES,
	"/":  TokenDIV,
	"//": TokenDIVINT,
	"%":  TokenMODINT,

	":=": TokenASSIGN,

	"@": TokenAT,

	"$": TokenACCESS,
}

SymbolMap is a map of special symbols which will always be unique - these will separate unquoted strings

Functions

func Lex

func Lex(name string, input string) chan LexToken

Lex lexes a given input. Returns a channel which contains tokens.

func PrettyPrint

func PrettyPrint(ast *ASTNode) (string, error)

PrettyPrint produces a pretty printed EQL query from a given AST.

Types

type ASTNode

type ASTNode struct {
	Name     string     // Name of the node
	Token    *LexToken  // Lexer token of this ASTNode
	Children []*ASTNode // Child nodes
	Runtime  Runtime    // Runtime component for this ASTNode
	// contains filtered or unexported fields
}

ASTNode models a node in the AST

func ASTFromPlain

func ASTFromPlain(plainAST map[string]interface{}) (*ASTNode, error)

ASTFromPlain creates an AST from a plain AST. A plain AST is a nested map structure like this:

{
	name     : <name of node>
	value    : <value of node>
	children : [ <child nodes> ]
}

func Parse

func Parse(name string, input string) (*ASTNode, error)

Parse parses a given input string and returns an AST.

func ParseWithRuntime

func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error)

ParseWithRuntime parses a given input string and returns an AST decorated with runtime components.

func (*ASTNode) Plain

func (n *ASTNode) Plain() map[string]interface{}

Plain returns this ASTNode and all its children as plain AST. A plain AST only contains map objects, lists and primitive types which can be serialized with JSON.

func (*ASTNode) String

func (n *ASTNode) String() string

String returns a string representation of this token.

type Error

type Error struct {
	Source string // Name of the source which was given to the parser
	Type   error  // Error type (to be used for equal checks)
	Detail string // Details of this error
	Line   int    // Line of the error
	Pos    int    // Position of the error
}

Error models a parser related error

func (*Error) Error

func (pe *Error) Error() string

Error returns a human-readable string representation of this error.

type LexToken

type LexToken struct {
	ID      LexTokenID // Token kind
	Pos     int        // Starting position (in bytes)
	Val     string     // Token value
	Literal bool       // Flag if the value is a literal value (quoted constant)
	Lline   int        // Line in the input this token appears
	Lpos    int        // Position in the input line this token appears
}

LexToken represents a token which is returned by the lexer.

func LexToList

func LexToList(name string, input string) []LexToken

LexToList lexes a given input. Returns a list of tokens.

func (LexToken) PosString

func (t LexToken) PosString() string

PosString returns the position of this token in the origianl input as a string.

func (LexToken) String

func (t LexToken) String() string

String returns a string representation of a token.

type LexTokenID

type LexTokenID int

LexTokenID represents a unique lexer token ID

const (
	TokenError LexTokenID = iota // Lexing error token with a message as val
	TokenEOF                     // End-of-file token

	TokenVALUE // Simple value

	TokenSTATEMENTS // List of statements
	TokenLIST       // List value
	TokenMAP        // MAP value
	TokenGUARD      // Guard expressions for conditional statements

	TokenGeneral // General token used for plain ASTs

	TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list

	TokenGEQ
	TokenLEQ
	TokenNEQ
	TokenEQ
	TokenGT
	TokenLT

	TokenLPAREN
	TokenRPAREN
	TokenLBRACK
	TokenRBRACK
	TokenLBRACE
	TokenRBRACE

	TokenCOMMA
	TokenCOLON
	TokenSEMICOLON

	TokenPLUS
	TokenMINUS
	TokenTIMES
	TokenDIV
	TokenDIVINT
	TokenMODINT

	TokenASSIGN

	TokenAT

	TokenACCESS

	TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list

	TokenSINK
	TokenKINDMATCH
	TokenSCOPEMATCH
	TokenSTATEMATCH
	TokenPRIORITY
	TokenSUPPRESSES

	TokenAND
	TokenOR
	TokenNOT

	TokenLIKE
	TokenIN
	TokenBEGINSWITH
	TokenENDSWITH
	TokenNOTIN

	TokenFALSE
	TokenTRUE
	TokenNULL

	TokenIF
	TokenELIF
	TokenELSE

	TokenFOR
	TokenBREAK
	TokenCONTINUE
)

Available lexer token types

type Runtime

type Runtime interface {

	/*
	   Validate this runtime component and all its child components.
	*/
	Validate() error

	/*
		Eval evaluate this runtime component. It gets passed the current variable
		scope and the instance state.

		The instance state is created per execution instance. For example the
		@range function can store here its current state. Code execution per
		instance is always sequential thus locking is not required.
	*/
	Eval(VarsScope, map[string]interface{}) (interface{}, error)
}

Runtime provides the runtime for an ASTNode.

type RuntimeProvider

type RuntimeProvider interface {

	/*
	   Runtime returns a runtime component for a given ASTNode.
	*/
	Runtime(node *ASTNode) Runtime
}

RuntimeProvider provides runtime components for a parse tree.

type VarsScope

type VarsScope interface {

	/*
	   NewChild creates a new child variable scope.
	*/
	NewChild(name string) VarsScope

	/*
	   SetValue sets a new value for a variable.
	*/
	SetValue(varName string, varValue interface{}) error

	/*
	   GetValue gets the current value of a variable.
	*/
	GetValue(varName string) (interface{}, bool, error)

	/*
	   String returns a string representation of this variable scope.
	*/
	String() string
}

VarsScope is used to store variable data and keep track of scoping.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL