lex

package

v0.0.9 Latest Latest Go to latest Published: Mar 20, 2024 License: BSD-3-Clause Imports: 21 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/cogentcore/core

Links

Open Source Insights

Documentation ¶

Overview ¶

Package lex provides all the lexing functions that transform text into lexical tokens, using token types defined in the pi/token package. It also has the basic file source and position / region management functionality.

Index ¶

Variables
func BracePair(r rune) (match rune, right bool)
func BracketIndentLine(src [][]rune, tags []Line, ln int, tabSz int) (pInd, delInd, pLn int, ichr indent.Char)
func DigitVal(ch rune) int
func FirstNonSpaceRune(src []rune) int
func FirstWord(str string) string
func FirstWordApostrophe(str string) string
func FirstWordDigits(str string) string
func HasUpperCase(str string) bool
func InnerBracketScope(str string, brl, brr string) string
func IsDigit(ch rune) bool
func IsLetter(ch rune) bool
func IsLetterOrDigit(ch rune) bool
func IsWhiteSpace(ch rune) bool
func LastField(str string) string
func LastNonSpaceRune(src []rune) int
func LastScopedString(str string) string
func LineIndent(src []rune, tabSz int) (ind int, ichr indent.Char)
func LineStartEndBracket(src []rune, tags Line) (start, end bool)
func MarkupPathsAsLinks(flds []string, maxFlds int) (orig, link []byte)
func MatchCase(src, trg string) string
func OpenFileBytes(fname string) ([]byte, error)
func PrevLineIndent(src [][]rune, tags []Line, ln int, tabSz int) (ind, pln int, ichr indent.Char)
func PrintError(w io.Writer, err error)
func RunesFromBytes(b []byte) [][]rune
func RunesFromString(str string) [][]rune
func TrimLeftToAlpha(nm string) string
type Actions
- func ActionsValues() []Actions
- func (i Actions) Desc() string
- func (i Actions) Int64() int64
- func (i Actions) MarshalText() ([]byte, error)
- func (i *Actions) SetInt64(in int64)
- func (i *Actions) SetString(s string) error
- func (i Actions) String() string
- func (i *Actions) UnmarshalText(text []byte) error
- func (i Actions) Values() []enums.Enum
type EosPos
- func (ep EosPos) FindGt(ch int) int
- func (ep EosPos) FindGtEq(ch int) int
type Error
- func (e Error) Error() string
- func (e Error) Report(basepath string, showSrc, showRule bool) string
type ErrorList
- func (p *ErrorList) Add(pos Pos, fname, msg string, srcln string, rule ki.Ki) *Error
- func (p ErrorList) Err() error
- func (p ErrorList) Error() string
- func (p ErrorList) Len() int
- func (p ErrorList) Less(i, j int) bool
- func (p *ErrorList) RemoveMultiples()
- func (p ErrorList) Report(maxN int, basepath string, showSrc, showRule bool) string
- func (p *ErrorList) Reset()
- func (p ErrorList) Sort()
- func (p ErrorList) Swap(i, j int)
type File
- func (fl *File) AllocLines()
- func (fl *File) EnsureFinalEos(ln int)
- func (fl *File) InitFromLine(sfl *File, ln int) bool
- func (fl *File) InitFromString(str string, fname string, sup fi.Known) bool
- func (fl *File) InsertEos(cp Pos) Pos
- func (fl *File) IsLexPosValid(pos Pos) bool
- func (fl *File) LexAt(cp Pos) *Lex
- func (fl *File) LexAtSafe(cp Pos) Lex
- func (fl *File) LexLine(ln int) Line
- func (fl *File) LexTagSrc() string
- func (fl *File) LexTagSrcLn(ln int) string
- func (fl *File) LinesDeleted(stln, edln int)
- func (fl *File) LinesInserted(stln, nlns int)
- func (fl *File) NLines() int
- func (fl *File) NTokens(ln int) int
- func (fl *File) NextEos(stpos Pos, depth int) (Pos, bool)
- func (fl *File) NextEosAnyDepth(stpos Pos) (Pos, bool)
- func (fl *File) NextTokenPos(pos Pos) (Pos, bool)
- func (fl *File) OpenFile(fname string) error
- func (fl *File) PrevDepth(ln int) int
- func (fl *File) PrevStack(ln int) Stack
- func (fl *File) PrevTokenPos(pos Pos) (Pos, bool)
- func (fl *File) RegSrc(reg Reg) string
- func (fl *File) ReplaceEos(cp Pos)
- func (fl *File) SetBytes(txt []byte)
- func (fl *File) SetLine(ln int, lexs, comments Line, stack Stack)
- func (fl *File) SetLineSrc(ln int, txt []rune) bool
- func (fl *File) SetSrc(src [][]rune, fname, basepath string, sup fi.Known)
- func (fl *File) SrcLine(ln int) string
- func (fl *File) Token(pos Pos) token.KeyToken
- func (fl *File) TokenMapReg(reg Reg) TokenMap
- func (fl *File) TokenRegSrc(reg Reg) string
- func (fl *File) TokenSrc(pos Pos) []rune
- func (fl *File) TokenSrcPos(pos Pos) Reg
- func (fl *File) TokenSrcReg(reg Reg) Reg
- func (fl *File) ValidTokenPos(pos Pos) (Pos, bool)
type LangLexer
type Lex
- func LastLexIgnoreComment(tags Line) (*Lex, int)
- func NewLex(tok token.KeyToken, st, ed int) Lex
- func ObjPathAt(line Line, lx *Lex) *Lex
- func (lx *Lex) ContainsPos(pos int) bool
- func (lx *Lex) Now()
- func (lx *Lex) OverlapsReg(or Lex) bool
- func (lx *Lex) Region(ln int) Reg
- func (lx *Lex) Src(src []rune) []rune
- func (lx *Lex) String() string
type Lexer
type Line
- func MergeLines(t1, t2 Line) Line
- func RuneFields(src []rune) Line
- func (ll *Line) Add(lx Lex)
- func (ll *Line) AddLex(tok token.KeyToken, st, ed int) *Lex
- func (ll *Line) AddSort(lx Lex)
- func (ll *Line) AtPos(pos int) (*Lex, int)
- func (ll *Line) Clone() Line
- func (ll *Line) DeleteIdx(idx int)
- func (ll *Line) DeleteToken(tok token.Tokens)
- func (ll *Line) Insert(idx int, lx Lex)
- func (ll *Line) NonCodeWords(src []rune) Line
- func (ll *Line) RuneStrings(rstr []rune) []string
- func (ll *Line) Sort()
- func (ll *Line) String() string
- func (ll *Line) Strings(src []rune) []string
- func (ll *Line) TagSrc(src []rune) string
type MatchPos
- func MatchPosValues() []MatchPos
- func (i MatchPos) Desc() string
- func (i MatchPos) Int64() int64
- func (i MatchPos) MarshalText() ([]byte, error)
- func (i *MatchPos) SetInt64(in int64)
- func (i *MatchPos) SetString(s string) error
- func (i MatchPos) String() string
- func (i *MatchPos) UnmarshalText(text []byte) error
- func (i MatchPos) Values() []enums.Enum
type Matches
- func MatchesValues() []Matches
- func (i Matches) Desc() string
- func (i Matches) Int64() int64
- func (i Matches) MarshalText() ([]byte, error)
- func (i *Matches) SetInt64(in int64)
- func (i *Matches) SetString(s string) error
- func (i Matches) String() string
- func (i *Matches) UnmarshalText(text []byte) error
- func (i Matches) Values() []enums.Enum
type PassTwo
- func (pt *PassTwo) EosDetect(ts *TwoState)
- func (pt *PassTwo) EosDetectPos(ts *TwoState, pos Pos, nln int)
- func (pt *PassTwo) Error(ts *TwoState, msg string)
- func (pt *PassTwo) HasErrs(ts *TwoState) bool
- func (pt *PassTwo) MismatchError(ts *TwoState, tok token.Tokens)
- func (pt *PassTwo) NestDepth(ts *TwoState)
- func (pt *PassTwo) NestDepthLine(line Line, initDepth int)
- func (pt *PassTwo) PopNest(ts *TwoState, tok token.Tokens)
- func (pt *PassTwo) PushNest(ts *TwoState, tok token.Tokens)
type Pos
- func BraceMatch(src [][]rune, tags []Line, r rune, st Pos, maxLns int) (en Pos, found bool)
- func (ps *Pos) FromString(link string) bool
- func (ps *Pos) IsLess(cmp Pos) bool
- func (ps Pos) String() string
type Reg
- func (tr Reg) Contains(ps Pos) bool
- func (tr Reg) IsNil() bool
type Rule
- func NewRule(par ki.Ki, name ...string) *Rule
- func (lr *Rule) AsLexRule() *Rule
- func (lr *Rule) BaseIface() reflect.Type
- func (lr *Rule) Compile(ls *State) bool
- func (lr *Rule) CompileAll(ls *State) bool
- func (lr *Rule) CompileNameMap(ls *State) bool
- func (lr *Rule) ComputeMatchLen(ls *State)
- func (lr *Rule) DoAct(ls *State, act Actions, tok *token.KeyToken)
- func (lr *Rule) Find(find string) []*Rule
- func (lr *Rule) IsMatch(ls *State) bool
- func (lr *Rule) IsMatchPos(ls *State) bool
- func (t *Rule) KiType() *gti.Type
- func (lr *Rule) Lex(ls *State) *Rule
- func (lr *Rule) LexStart(ls *State) *Rule
- func (t *Rule) New() ki.Ki
- func (t *Rule) SetActs(v ...Actions) *Rule
- func (t *Rule) SetDesc(v string) *Rule
- func (t *Rule) SetMatch(v Matches) *Rule
- func (t *Rule) SetMatchLen(v int) *Rule
- func (t *Rule) SetNameMap(v bool) *Rule
- func (t *Rule) SetNmMap(v map[string]*Rule) *Rule
- func (t *Rule) SetOff(v bool) *Rule
- func (t *Rule) SetOffset(v int) *Rule
- func (t *Rule) SetPos(v MatchPos) *Rule
- func (t *Rule) SetPushState(v string) *Rule
- func (t *Rule) SetSizeAdj(v int) *Rule
- func (t *Rule) SetString(v string) *Rule
- func (t *Rule) SetToken(v token.Tokens) *Rule
- func (t *Rule) SetUntil(v string) *Rule
- func (lr *Rule) TargetLen(ls *State) int
- func (lr *Rule) Validate(ls *State) bool
- func (lr *Rule) WriteGrammar(writer io.Writer, depth int)
type Stack
- func (ss *Stack) Clone() Stack
- func (ss *Stack) Pop() string
- func (ss *Stack) Push(state string)
- func (ss *Stack) Reset()
- func (ss *Stack) Top() string
type State
- func (ls *State) Add(tok token.KeyToken, st, ed int)
- func (ls *State) AtEol() bool
- func (ls *State) CurRune() bool
- func (ls *State) CurState() string
- func (ls *State) Error(pos int, msg string, rule *Rule)
- func (ls *State) Init()
- func (ls *State) LineString() string
- func (ls *State) MatchState(st string) bool
- func (ls *State) Next(inc int) bool
- func (ls *State) NextRune() bool
- func (ls *State) NextSrcLine() string
- func (ls *State) PopState() string
- func (ls *State) PushState(st string)
- func (ls *State) ReadEscape(quote rune) bool
- func (ls *State) ReadName()
- func (ls *State) ReadNameTmp(off int) string
- func (ls *State) ReadNumber() token.Tokens
- func (ls *State) ReadQuoted()
- func (ls *State) ReadUntil(until string)
- func (ls *State) Rune(off int) (rune, bool)
- func (ls *State) ScanMantissa(base int)
- func (ls *State) SetLine(src []rune)
- func (ls *State) String(off, sz int) (string, bool)
type TokenMap
- func (tm TokenMap) Has(tok token.Tokens) bool
- func (tm TokenMap) Set(tok token.Tokens)
type TwoState
- func (ts *TwoState) Error(msg string)
- func (ts *TwoState) Init()
- func (ts *TwoState) NestStackStr() string
- func (ts *TwoState) NextLine()
- func (ts *TwoState) SetSrc(src *File)

Constants ¶

This section is empty.

Variables ¶

View Source

var PosErr = Pos{-1, -1}

PosErr represents an error text position (-1 for both line and char) used as a return value for cases where error positions are possible

View Source

var PosZero = Pos{}

PosZero is the uninitialized zero text position (which is still a valid position)

View Source

var RegZero = Reg{}

RegZero is the zero region

View Source

var RuleType = gti.AddType(&gti.Type{Name: "cogentcore.org/core/pi/lex.Rule", IDName: "rule", Doc: "lex.Rule operates on the text input to produce the lexical tokens.\n\nLexing is done line-by-line -- you must push and pop states to\ncoordinate across multiple lines, e.g., for multi-line comments.\n\nThere is full access to entire line and you can decide based on future\n(offset) characters.\n\nIn general it is best to keep lexing as simple as possible and\nleave the more complex things for the parsing step.", Embeds: []gti.Field{{Name: "Node"}}, Fields: []gti.Field{{Name: "Off", Doc: "disable this rule -- useful for testing and exploration"}, {Name: "Desc", Doc: "description / comments about this rule"}, {Name: "Token", Doc: "the token value that this rule generates -- use None for non-terminals"}, {Name: "Match", Doc: "the lexical match that we look for to engage this rule"}, {Name: "Pos", Doc: "position where match can occur"}, {Name: "String", Doc: "if action is LexMatch, this is the string we match"}, {Name: "Offset", Doc: "offset into the input to look for a match: 0 = current char, 1 = next one, etc"}, {Name: "SizeAdj", Doc: "adjusts the size of the region (plus or minus) that is processed for the Next action -- allows broader and narrower matching relative to tagging"}, {Name: "Acts", Doc: "the action(s) to perform, in order, if there is a match -- these are performed prior to iterating over child nodes"}, {Name: "Until", Doc: "string(s) for ReadUntil action -- will read until any of these strings are found -- separate different options with | -- if you need to read until a literal | just put two || in a row and that will show up as a blank, which is interpreted as a literal |"}, {Name: "PushState", Doc: "the state to push if our action is PushState -- note that State matching is on String, not this value"}, {Name: "NameMap", Doc: "create an optimization map for this rule, which must be a parent with children that all match against a Name string -- this reads the Name and directly activates the associated rule with that String, without having to iterate through them -- use this for keywords etc -- produces a SIGNIFICANT speedup for long lists of keywords."}, {Name: "MatchLen", Doc: "length of source that matched -- if Next is called, this is what will be skipped to"}, {Name: "NmMap", Doc: "NameMap lookup map -- created during Compile"}}, Instance: &Rule{}})

RuleType is the gti.Type for Rule

View Source

var Trace = false

Functions ¶

func BracePair ¶

func BracePair(r rune) (match rune, right bool)

BracePair returns the matching brace-like punctuation for given rune, which must be a left or right brace {}, bracket [] or paren (). Also returns true if it is *right*

func BracketIndentLine ¶

func BracketIndentLine(src [][]rune, tags []Line, ln int, tabSz int) (pInd, delInd, pLn int, ichr indent.Char)

BracketIndentLine returns the indentation level for given line based on previous line's indentation level, and any delta change based on brackets starting or ending the previous or current line. indent level is in increments of tabSz for spaces, and tabs for tabs. Operates on rune source with markup lex tags per line.

func DigitVal ¶

func DigitVal(ch rune) int

func FirstNonSpaceRune ¶

func FirstNonSpaceRune(src []rune) int

FirstNonSpaceRune returns the index of first non-space rune, -1 if not found

func FirstWord ¶

func FirstWord(str string) string

FirstWord returns the first contiguous sequence of purely unicode.IsLetter runes within given string -- skips over any leading non-letters until a letter is found. This does not include numbers -- use FirstWordDigits for that

func FirstWordApostrophe ¶

func FirstWordApostrophe(str string) string

FirstWordApostrophe returns the first contiguous sequence of purely unicode.IsLetter runes. that can also contain an apostrophe *within* the word but not at the end

func FirstWordDigits ¶

func FirstWordDigits(str string) string

FirstWordDigits returns the first contiguous sequence of purely IsLetterOrDigit runes within given string -- skips over any leading non-letters until a *letter* (not digit) is found.

func HasUpperCase ¶

func HasUpperCase(str string) bool

HasUpperCase returns true if string has an upper-case letter

func InnerBracketScope ¶

func InnerBracketScope(str string, brl, brr string) string

InnerBracketScope returns the inner-scope for given bracket type if it is imbalanced -- it is important to do completion based just on that inner scope if that is where the user is at.

func IsDigit ¶

func IsDigit(ch rune) bool

func IsLetter ¶

func IsLetter(ch rune) bool

func IsLetterOrDigit ¶

func IsLetterOrDigit(ch rune) bool

func IsWhiteSpace ¶

func IsWhiteSpace(ch rune) bool

func LastField ¶

func LastField(str string) string

LastField returns the last white-space separated string

func LastNonSpaceRune ¶

func LastNonSpaceRune(src []rune) int

LastNonSpaceRune returns the index of last non-space rune, -1 if not found

func LastScopedString ¶

func LastScopedString(str string) string

LastScopedString returns the last white-space separated, and bracket enclosed string from given string.

func LineIndent ¶

func LineIndent(src []rune, tabSz int) (ind int, ichr indent.Char)

LineIndent returns the number of tabs or spaces at start of given rune-line, based on target tab-size (only relevant for spaces). If line starts with tabs, then those are counted, else spaces -- combinations of tabs and spaces won't produce sensible results.

func LineStartEndBracket ¶

func LineStartEndBracket(src []rune, tags Line) (start, end bool)

LineStartEndBracket checks if line starts with a closing bracket or ends with an opening bracket. This is used for auto-indent for example. Bracket is Paren, Bracket, or Brace.

func MarkupPathsAsLinks ¶

func MarkupPathsAsLinks(flds []string, maxFlds int) (orig, link []byte)

MarkupPathsAsLinks checks for strings that look like file paths / urls and returns the original fields as a byte slice along with a marked-up version of that with html link markups for the files (as <a href="file:///..."). Input is field-parsed already, and maxFlds is the maximum number of fields to look for file paths in (e.g., 2 is a reasonable default, to avoid getting other false-alarm info later in the text). This is mainly used for marking up output from commands, for example.

func MatchCase ¶

func MatchCase(src, trg string) string

MatchCase uses the source string case (upper / lower) to set corresponding case in target string, returning that string.

func OpenFileBytes ¶

func OpenFileBytes(fname string) ([]byte, error)

OpenFileBytes returns bytes in given file, and logs any errors as well

func PrevLineIndent ¶

func PrevLineIndent(src [][]rune, tags []Line, ln int, tabSz int) (ind, pln int, ichr indent.Char)

PrevLineIndent returns indentation level of previous line from given line that has indentation -- skips blank lines. Returns indent level and previous line number, and indent char. indent level is in increments of tabSz for spaces, and tabs for tabs. Operates on rune source with markup lex tags per line.

func PrintError ¶

func PrintError(w io.Writer, err error)

PrintError is a utility function that prints a list of errors to w, one error per line, if the err parameter is an ErrorList. Otherwise it prints the err string.

func RunesFromBytes ¶

func RunesFromBytes(b []byte) [][]rune

RunesFromBytes returns the lines of runes from a basic byte array

func RunesFromString ¶

func RunesFromString(str string) [][]rune

RunesFromString returns the lines of runes from a string (more efficient than converting to bytes)

func TrimLeftToAlpha ¶

func TrimLeftToAlpha(nm string) string

TrimLeftToAlpha returns string without any leading non-alpha runes

Types ¶

type Actions ¶

type Actions int32 //enums:enum

Actions are lexing actions to perform

const (
	// Next means advance input position to the next character(s) after the matched characters
	Next Actions = iota

	// Name means read in an entire name, which is letters, _ and digits after first letter
	// position will be advanced to just after
	Name

	// Number means read in an entire number -- the token type will automatically be
	// set to the actual type of number that was read in, and position advanced to just after
	Number

	// Quoted means read in an entire string enclosed in quote delimeter
	// that is present at current position, with proper skipping of escaped.
	// Position advanced to just after
	Quoted

	// QuotedRaw means read in an entire string enclosed in quote delimeter
	// that is present at start position, with proper skipping of escaped.
	// Position advanced to just after.
	// Raw version supports multi-line and includes CR etc at end of lines (e.g., back-tick
	// in various languages)
	QuotedRaw

	// EOL means read till the end of the line (e.g., for single-line comments)
	EOL

	// ReadUntil reads until string(s) in the Until field are found,
	// or until the EOL if none are found
	ReadUntil

	// PushState means push the given state value onto the state stack
	PushState

	// PopState means pop given state value off the state stack
	PopState

	// SetGuestLex means install the Name (must be a prior action) as the guest
	// lexer -- it will take over lexing until PopGuestLex is called
	SetGuestLex

	// PopGuestLex removes the current guest lexer and returns to the original
	// language lexer
	PopGuestLex
)

The lexical acts

const ActionsN Actions = 11

ActionsN is the highest valid value for type Actions, plus one.

func ActionsValues ¶

func ActionsValues() []Actions

ActionsValues returns all possible values for the type Actions.

func (Actions) Desc ¶

func (i Actions) Desc() string

Desc returns the description of the Actions value.

func (Actions) Int64 ¶

func (i Actions) Int64() int64

Int64 returns the Actions value as an int64.

func (Actions) MarshalText ¶

func (i Actions) MarshalText() ([]byte, error)

MarshalText implements the encoding.TextMarshaler interface.

func (*Actions) SetInt64 ¶

func (i *Actions) SetInt64(in int64)

SetInt64 sets the Actions value from an int64.

func (*Actions) SetString ¶

func (i *Actions) SetString(s string) error

SetString sets the Actions value from its string representation, and returns an error if the string is invalid.

func (Actions) String ¶

func (i Actions) String() string

String returns the string representation of this Actions value.

func (*Actions) UnmarshalText ¶

func (i *Actions) UnmarshalText(text []byte) error

UnmarshalText implements the encoding.TextUnmarshaler interface.

func (Actions) Values ¶

func (i Actions) Values() []enums.Enum

Values returns all possible values for the type Actions.

type EosPos ¶

type EosPos []int

EosPos is a line of EOS token positions, always sorted low-to-high

func (EosPos) FindGt ¶

func (ep EosPos) FindGt(ch int) int

FindGt returns any pos value greater than given token pos, -1 if none

func (EosPos) FindGtEq ¶

func (ep EosPos) FindGtEq(ch int) int

FindGtEq returns any pos value greater than or equal to given token pos, -1 if none

type Error ¶

type Error struct {

	// position where the error occurred in the source
	Pos Pos

	// full filename with path
	Filename string

	// brief error message
	Msg string

	// line of source where error was
	Src string

	// lexer or parser rule that emitted the error
	Rule ki.Ki
}

In an ErrorList, an error is represented by an *Error. The position Pos, if valid, points to the beginning of the offending token, and the error condition is described by Msg.

func (Error) Error ¶

func (e Error) Error() string

Error implements the error interface -- gives the minimal version of error string

func (Error) Report ¶

func (e Error) Report(basepath string, showSrc, showRule bool) string

Report provides customizable output options for viewing errors: - basepath if non-empty shows filename relative to that path. - showSrc shows the source line on a second line -- truncated to 30 chars around err - showRule prints the rule name

type ErrorList ¶

type ErrorList []*Error

ErrorList is a list of *Errors. The zero value for an ErrorList is an empty ErrorList ready to use.

func (*ErrorList) Add ¶

func (p *ErrorList) Add(pos Pos, fname, msg string, srcln string, rule ki.Ki) *Error

Add adds an Error with given position and error message to an ErrorList.

func (ErrorList) Err ¶

func (p ErrorList) Err() error

Err returns an error equivalent to this error list. If the list is empty, Err returns nil.

func (ErrorList) Error ¶

func (p ErrorList) Error() string

An ErrorList implements the error interface.

func (ErrorList) Len ¶

func (p ErrorList) Len() int

ErrorList implements the sort Interface.

func (ErrorList) Less ¶

func (p ErrorList) Less(i, j int) bool

func (*ErrorList) RemoveMultiples ¶

func (p *ErrorList) RemoveMultiples()

RemoveMultiples sorts an ErrorList and removes all but the first error per line.

func (ErrorList) Report ¶

func (p ErrorList) Report(maxN int, basepath string, showSrc, showRule bool) string

Report returns all (or up to maxN if > 0) errors in the list in one string with customizable output options for viewing errors: - basepath if non-empty shows filename relative to that path. - showSrc shows the source line on a second line -- truncated to 30 chars around err - showRule prints the rule name

func (*ErrorList) Reset ¶

func (p *ErrorList) Reset()

Reset resets an ErrorList to no errors.

func (ErrorList) Sort ¶

func (p ErrorList) Sort()

Sort sorts an ErrorList. *Error entries are sorted by position, other errors are sorted by error message, and before any *Error entry.

func (ErrorList) Swap ¶

func (p ErrorList) Swap(i, j int)

type File ¶

type File struct {

	// the current file being lex'd
	Filename string

	// the known file type, if known (typically only known files are processed)
	Sup fi.Known

	// base path for reporting file names -- this must be set externally e.g., by gide for the project root path
	BasePath string

	// lex'd version of the lines -- allocated to size of Lines
	Lexs []Line

	// comment tokens are stored separately here, so parser doesn't need to worry about them, but they are available for highlighting and other uses
	Comments []Line

	// stack present at the end of each line -- needed for contextualizing line-at-time lexing while editing
	LastStacks []Stack

	// token positions per line for the EOS (end of statement) tokens -- very important for scoping top-down parsing
	EosPos []EosPos

	// contents of the file as lines of runes
	Lines [][]rune
}

File contains the contents of the file being parsed -- all kept in memory, and represented by Line as runes, so that positions in the file are directly convertible to indexes in Lines structure

func (*File) AllocLines ¶

func (fl *File) AllocLines()

AllocLines allocates the data per line: lex outputs and stack. We reset state so stale state is not hanging around.

func (*File) EnsureFinalEos ¶

func (fl *File) EnsureFinalEos(ln int)

EnsureFinalEos makes sure that the given line ends with an EOS (if it has tokens). Used for line-at-time parsing just to make sure it matches even if you haven't gotten to the end etc.

func (*File) InitFromLine ¶

func (fl *File) InitFromLine(sfl *File, ln int) bool

InitFromLine initializes from one line of source file

func (*File) InitFromString ¶

func (fl *File) InitFromString(str string, fname string, sup fi.Known) bool

InitFromString initializes from given string. Returns false if string is empty

func (*File) InsertEos ¶

func (fl *File) InsertEos(cp Pos) Pos

InsertEos inserts an EOS just after the given token position (e.g., cp = last token in line)

func (*File) IsLexPosValid ¶

func (fl *File) IsLexPosValid(pos Pos) bool

IsLexPosValid returns true if given lexical token position is valid

func (*File) LexAt ¶

func (fl *File) LexAt(cp Pos) *Lex

LexAt returns Lex item at given position, with no checking

func (*File) LexAtSafe ¶

func (fl *File) LexAtSafe(cp Pos) Lex

LexAtSafe returns the Lex item at given position, or last lex item if beyond end

func (*File) LexLine ¶

func (fl *File) LexLine(ln int) Line

LexLine returns the lexing output for given line, combining comments and all other tokens and allocating new memory using clone

func (*File) LexTagSrc ¶

func (fl *File) LexTagSrc() string

LexTagSrc returns the lex'd tagged source for entire source

func (*File) LexTagSrcLn ¶

func (fl *File) LexTagSrcLn(ln int) string

LexTagSrcLn returns the lex'd tagged source line for given line

func (*File) LinesDeleted ¶

func (fl *File) LinesDeleted(stln, edln int)

LinesDeleted deletes lines -- called e.g., by giv.TextBuf to sync the markup with ongoing edits

func (*File) LinesInserted ¶

func (fl *File) LinesInserted(stln, nlns int)

LinesInserted inserts new lines -- called e.g., by giv.TextBuf to sync the markup with ongoing edits

func (*File) NLines ¶

func (fl *File) NLines() int

NLines returns the number of lines in source

func (*File) NTokens ¶

func (fl *File) NTokens(ln int) int

NTokens returns number of lex tokens for given line

func (*File) NextEos ¶

func (fl *File) NextEos(stpos Pos, depth int) (Pos, bool)

NextEos finds the next EOS position at given depth, false if none

func (*File) NextEosAnyDepth ¶

func (fl *File) NextEosAnyDepth(stpos Pos) (Pos, bool)

NextEosAnyDepth finds the next EOS at any depth

func (*File) NextTokenPos ¶

func (fl *File) NextTokenPos(pos Pos) (Pos, bool)

NextTokenPos returns the next token position, false if at end of tokens

func (*File) OpenFile ¶

func (fl *File) OpenFile(fname string) error

OpenFile sets source to be parsed from given filename

func (*File) PrevDepth ¶

func (fl *File) PrevDepth(ln int) int

PrevDepth returns the depth of the token immediately prior to given line

func (*File) PrevStack ¶

func (fl *File) PrevStack(ln int) Stack

PrevStack returns the stack from the previous line

func (*File) PrevTokenPos ¶

func (fl *File) PrevTokenPos(pos Pos) (Pos, bool)

PrevTokenPos returns the previous token position, false if at end of tokens

func (*File) RegSrc ¶

func (fl *File) RegSrc(reg Reg) string

RegSrc returns the source (as a string) for given region

func (*File) ReplaceEos ¶

func (fl *File) ReplaceEos(cp Pos)

ReplaceEos replaces given token with an EOS

func (*File) SetBytes ¶

func (fl *File) SetBytes(txt []byte)

SetBytes sets source to be parsed from given bytes

func (*File) SetLine ¶

func (fl *File) SetLine(ln int, lexs, comments Line, stack Stack)

SetLine sets the line data from the lexer -- does a clone to keep the copy

func (*File) SetLineSrc ¶

func (fl *File) SetLineSrc(ln int, txt []rune) bool

SetLineSrc sets source runes from given line of runes. Returns false if out of range.

func (*File) SetSrc ¶

func (fl *File) SetSrc(src [][]rune, fname, basepath string, sup fi.Known)

SetSrc sets the source to given content, and alloc Lexs -- if basepath is empty then it is set to the path for the filename

func (*File) SrcLine ¶

func (fl *File) SrcLine(ln int) string

SrcLine returns given line of source, as a string, or "" if out of range

func (*File) Token ¶

func (fl *File) Token(pos Pos) token.KeyToken

Token gets lex token at given Pos (Ch = token index)

func (*File) TokenMapReg ¶

func (fl *File) TokenMapReg(reg Reg) TokenMap

TokenMapReg creates a TokenMap of tokens in region, including their Cat and SubCat levels -- err's on side of inclusiveness -- used for optimizing token matching

func (*File) TokenRegSrc ¶

func (fl *File) TokenRegSrc(reg Reg) string

TokenRegSrc returns the source code associated with the given token region

func (*File) TokenSrc ¶

func (fl *File) TokenSrc(pos Pos) []rune

TokenSrc gets source runes for given token position

func (*File) TokenSrcPos ¶

func (fl *File) TokenSrcPos(pos Pos) Reg

TokenSrcPos returns source reg associated with lex token at given token position

func (*File) TokenSrcReg ¶

func (fl *File) TokenSrcReg(reg Reg) Reg

TokenSrcReg translates a region of tokens into a region of source

func (*File) ValidTokenPos ¶

func (fl *File) ValidTokenPos(pos Pos) (Pos, bool)

ValidTokenPos returns the next valid token position starting at given point, false if at end of tokens

type LangLexer ¶

type LangLexer interface {
	// LexerByName returns the top-level lex.Rule for given language (case invariant lookup)
	LexerByName(lang string) *Rule
}

LangLexer looks up lexer for given language -- impl in parent pi package so we need the interface

var TheLangLexer LangLexer

TheLangLexer is the instance of LangLexer interface used to lookup lexers for languages -- is set in pi/langs.go

type Lex ¶

type Lex struct {

	// token, includes cache of keyword for keyword types, and also has nesting depth: starting at 0 at start of file and going up for every increment in bracket / paren / start tag and down for every decrement. Is computed once and used extensively in parsing.
	Tok token.KeyToken

	// start rune index within original source line for this token
	St int

	// end rune index within original source line for this token (exclusive -- ends one before this)
	Ed int

	// time when region was set -- used for updating locations in the text based on time stamp (using efficient non-pointer time)
	Time nptime.Time
}

Lex represents a single lexical element, with a token, and start and end rune positions within a line of a file. Critically it also contains the nesting depth computed from all the parens, brackets, braces. Todo: also support XML < > </ > tag depth.

func LastLexIgnoreComment ¶

func LastLexIgnoreComment(tags Line) (*Lex, int)

LastTokenIgnoreComment returns the last token of the tags, ignoring any final comment at end

func NewLex ¶

func NewLex(tok token.KeyToken, st, ed int) Lex

func ObjPathAt ¶

func ObjPathAt(line Line, lx *Lex) *Lex

ObjPathAt returns the starting Lex, before given lex, that include sequences of PunctSepPeriod and NameTag which are used for object paths (e.g., field.field.field)

func (*Lex) ContainsPos ¶

func (lx *Lex) ContainsPos(pos int) bool

ContainsPos returns true if the Lex element contains given character position

func (*Lex) Now ¶

func (lx *Lex) Now()

Now sets the time stamp to now

func (*Lex) OverlapsReg ¶

func (lx *Lex) OverlapsReg(or Lex) bool

OverlapsReg returns true if the two regions overlap

func (*Lex) Region ¶

func (lx *Lex) Region(ln int) Reg

Region returns the region for this lexical element, at given line

func (*Lex) Src ¶

func (lx *Lex) Src(src []rune) []rune

Src returns the rune source for given lex item (does no validity checking)

func (*Lex) String ¶

func (lx *Lex) String() string

String satisfies the fmt.Stringer interface

type Lexer ¶

type Lexer interface {
	ki.Ki

	// Compile performs any one-time compilation steps on the rule
	Compile(ls *State) bool

	// Validate checks for any errors in the rules and issues warnings,
	// returns true if valid (no err) and false if invalid (errs)
	Validate(ls *State) bool

	// Lex tries to apply rule to given input state, returns true if matched, false if not
	Lex(ls *State) *Rule

	// AsLexRule returns object as a lex.Rule
	AsLexRule() *Rule
}

Lexer is the interface type for lexers -- likely not necessary except is essential for defining the BaseIface for gui in making new nodes

type Line ¶

type Line []Lex

Line is one line of Lex'd text

func MergeLines ¶

func MergeLines(t1, t2 Line) Line

MergeLines merges the two lines of lex regions into a combined list properly ordered by sequence of tags within the line.

func RuneFields ¶

func RuneFields(src []rune) Line

RuneFields returns a Line of Lex's defining the non-white-space "fields" in the given rune string

func (*Line) Add ¶

func (ll *Line) Add(lx Lex)

Add adds one element to the lex line (just append)

func (*Line) AddLex ¶

func (ll *Line) AddLex(tok token.KeyToken, st, ed int) *Lex

Add adds one element to the lex line with given params, returns pointer to that new lex

func (*Line) AddSort ¶

func (ll *Line) AddSort(lx Lex)

AddSort adds a new lex element in sorted order to list, sorted by start position, and if at the same start position, then sorted *decreasing* by end position -- this allows outer tags to be processed before inner tags which fits a stack-based tag markup logic.

func (*Line) AtPos ¶

func (ll *Line) AtPos(pos int) (*Lex, int)

AtPos returns the Lex in place for given position, and index, or nil, -1 if none

func (*Line) Clone ¶

func (ll *Line) Clone() Line

Clone returns a new copy of the line

func (*Line) DeleteIdx ¶

func (ll *Line) DeleteIdx(idx int)

DeleteIdx deletes at given index

func (*Line) DeleteToken ¶

func (ll *Line) DeleteToken(tok token.Tokens)

DeleteToken deletes a specific token type from list

func (*Line) Insert ¶

func (ll *Line) Insert(idx int, lx Lex)

Insert inserts one element to the lex line at given point

func (*Line) NonCodeWords ¶

func (ll *Line) NonCodeWords(src []rune) Line

NonCodeWords returns a Line of white-space separated word tokens in given tagged source that ignores token.IsCode token regions -- i.e., the "regular" words present in the source line -- this is useful for things like spell checking or manual parsing.

func (*Line) RuneStrings ¶

func (ll *Line) RuneStrings(rstr []rune) []string

RuneStrings returns array of strings for Lex regions defined in Line, for given rune source string

func (*Line) Sort ¶

func (ll *Line) Sort()

Sort sorts the lex elements by starting pos, and ending pos *decreasing* if a tie

func (*Line) String ¶

func (ll *Line) String() string

String satisfies the fmt.Stringer interface

func (*Line) Strings ¶

func (ll *Line) Strings(src []rune) []string

Strings returns a slice of strings for each of the Lex items in given rune src split by Line Lex's. Returns nil if Line empty.

func (*Line) TagSrc ¶

func (ll *Line) TagSrc(src []rune) string

TagSrc returns the token-tagged source

type MatchPos ¶

type MatchPos int32 //enums:enum

MatchPos are special positions for a match to occur

const (
	// AnyPos matches at any position
	AnyPos MatchPos = iota

	// StartOfLine matches at start of line
	StartOfLine

	// EndOfLine matches at end of line
	EndOfLine

	// MiddleOfLine matches not at the start or end
	MiddleOfLine

	// StartOfWord matches at start of word
	StartOfWord

	// EndOfWord matches at end of word
	EndOfWord

	// MiddleOfWord matches not at the start or end
	MiddleOfWord
)

Matching position rules

const MatchPosN MatchPos = 7

MatchPosN is the highest valid value for type MatchPos, plus one.

func MatchPosValues ¶

func MatchPosValues() []MatchPos

MatchPosValues returns all possible values for the type MatchPos.

func (MatchPos) Desc ¶

func (i MatchPos) Desc() string

Desc returns the description of the MatchPos value.

func (MatchPos) Int64 ¶

func (i MatchPos) Int64() int64

Int64 returns the MatchPos value as an int64.

func (MatchPos) MarshalText ¶

func (i MatchPos) MarshalText() ([]byte, error)

MarshalText implements the encoding.TextMarshaler interface.

func (*MatchPos) SetInt64 ¶

func (i *MatchPos) SetInt64(in int64)

SetInt64 sets the MatchPos value from an int64.

func (*MatchPos) SetString ¶

func (i *MatchPos) SetString(s string) error

SetString sets the MatchPos value from its string representation, and returns an error if the string is invalid.

func (MatchPos) String ¶

func (i MatchPos) String() string

String returns the string representation of this MatchPos value.

func (*MatchPos) UnmarshalText ¶

func (i *MatchPos) UnmarshalText(text []byte) error

UnmarshalText implements the encoding.TextUnmarshaler interface.

func (MatchPos) Values ¶

func (i MatchPos) Values() []enums.Enum

Values returns all possible values for the type MatchPos.

type Matches ¶

type Matches int32 //enums:enum

Matches are what kind of lexing matches to make

const (
	// String means match a specific string as given in the rule
	// Note: this only looks for the string with no constraints on
	// what happens after this string -- use StrName to match entire names
	String Matches = iota

	// StrName means match a specific string that is a complete alpha-numeric
	// string (including underbar _) with some other char at the end
	// must use this for all keyword matches to ensure that it isn't just
	// the start of a longer name
	StrName

	// Match any letter, including underscore
	Letter

	// Match digit 0-9
	Digit

	// Match any white space (space, tab) -- input is already broken into lines
	WhiteSpace

	// CurState means match current state value set by a PushState action, using String value in rule
	// all CurState cases must generally be first in list of rules so they can preempt
	// other rules when the state is active
	CurState

	// AnyRune means match any rune -- use this as the last condition where other terminators
	// come first!
	AnyRune
)

Matching rules

const MatchesN Matches = 7

MatchesN is the highest valid value for type Matches, plus one.

func MatchesValues ¶

func MatchesValues() []Matches

MatchesValues returns all possible values for the type Matches.

func (Matches) Desc ¶

func (i Matches) Desc() string

Desc returns the description of the Matches value.

func (Matches) Int64 ¶

func (i Matches) Int64() int64

Int64 returns the Matches value as an int64.

func (Matches) MarshalText ¶

func (i Matches) MarshalText() ([]byte, error)

MarshalText implements the encoding.TextMarshaler interface.

func (*Matches) SetInt64 ¶

func (i *Matches) SetInt64(in int64)

SetInt64 sets the Matches value from an int64.

func (*Matches) SetString ¶

func (i *Matches) SetString(s string) error

SetString sets the Matches value from its string representation, and returns an error if the string is invalid.

func (Matches) String ¶

func (i Matches) String() string

String returns the string representation of this Matches value.

func (*Matches) UnmarshalText ¶

func (i *Matches) UnmarshalText(text []byte) error

UnmarshalText implements the encoding.TextUnmarshaler interface.

func (Matches) Values ¶

func (i Matches) Values() []enums.Enum

Values returns all possible values for the type Matches.

type PassTwo ¶

type PassTwo struct {

	// should we perform EOS detection on this type of file?
	DoEos bool

	// use end-of-line as a default EOS, if nesting depth is same as start of line (python) -- see also EolToks
	Eol bool

	// replace all semicolons with EOS to keep it consistent (C, Go..)
	Semi bool

	// use backslash as a line continuer (python)
	Backslash bool

	// if a right-brace } is detected anywhere in the line, insert an EOS *before* RBrace AND after it (needed for Go) -- do not include RBrace in EolToks in this case
	RBraceEos bool

	// specific tokens to recognize at the end of a line that trigger an EOS (Go)
	EolToks token.KeyTokenList
}

PassTwo performs second pass(s) through the lexicalized version of the source, computing nesting depth for every token once and for all -- this is essential for properly matching tokens and also for colorization in syntax highlighting. Optionally, a subsequent pass finds end-of-statement (EOS) tokens, which are essential for parsing to first break the source down into statement-sized chunks. A separate list of EOS token positions is maintained for very fast access.

func (*PassTwo) EosDetect ¶

func (pt *PassTwo) EosDetect(ts *TwoState)

Perform EOS detection

func (*PassTwo) EosDetectPos ¶

func (pt *PassTwo) EosDetectPos(ts *TwoState, pos Pos, nln int)

Perform EOS detection at given starting position, for given number of lines

func (*PassTwo) Error ¶

func (pt *PassTwo) Error(ts *TwoState, msg string)

Error adds an passtwo error at given position

func (*PassTwo) HasErrs ¶

func (pt *PassTwo) HasErrs(ts *TwoState) bool

HasErrs reports if there are errors in eosing process

func (*PassTwo) MismatchError ¶

func (pt *PassTwo) MismatchError(ts *TwoState, tok token.Tokens)

MismatchError reports a mismatch for given type of parentheses / bracket

func (*PassTwo) NestDepth ¶

func (pt *PassTwo) NestDepth(ts *TwoState)

Perform nesting depth computation

func (*PassTwo) NestDepthLine ¶

func (pt *PassTwo) NestDepthLine(line Line, initDepth int)

Perform nesting depth computation on only one line, starting at given initial depth -- updates the given line

func (*PassTwo) PopNest ¶

func (pt *PassTwo) PopNest(ts *TwoState, tok token.Tokens)

PopNest attempts to pop given token off of nesting stack, generating error if it mismatches

func (*PassTwo) PushNest ¶

func (pt *PassTwo) PushNest(ts *TwoState, tok token.Tokens)

PushNest pushes a nesting left paren / bracket onto stack

type Pos ¶

type Pos struct {
	Ln int
	Ch int
}

Pos is a position within the source file -- it is recorded always in 0, 0 offset positions, but is converted into 1,1 offset for public consumption Ch positions are always in runes, not bytes. Also used for lex token indexes.

func BraceMatch ¶

func BraceMatch(src [][]rune, tags []Line, r rune, st Pos, maxLns int) (en Pos, found bool)

BraceMatch finds the brace, bracket, or paren that is the partner of the one passed to function, within maxLns lines of start. Operates on rune source with markup lex tags per line (tags exclude comments).

func (*Pos) FromString ¶

func (ps *Pos) FromString(link string) bool

FromString decodes text position from a string representation of form: [#]LxxCxx -- used in e.g., URL links -- returns true if successful

func (*Pos) IsLess ¶

func (ps *Pos) IsLess(cmp Pos) bool

IsLess returns true if receiver position is less than given comparison

func (Pos) String ¶

func (ps Pos) String() string

String satisfies the fmt.Stringer interferace

type Reg ¶

type Reg struct {

	// starting position of region
	St Pos

	// ending position of region
	Ed Pos
}

Reg is a contiguous region within the source file

func (Reg) Contains ¶

func (tr Reg) Contains(ps Pos) bool

Contains returns true if region contains position

func (Reg) IsNil ¶

func (tr Reg) IsNil() bool

IsNil checks if the region is empty, because the start is after or equal to the end

type Rule ¶

type Rule struct {
	ki.Node

	// disable this rule -- useful for testing and exploration
	Off bool

	// description / comments about this rule
	Desc string

	// the token value that this rule generates -- use None for non-terminals
	Token token.Tokens

	// the lexical match that we look for to engage this rule
	Match Matches

	// position where match can occur
	Pos MatchPos

	// if action is LexMatch, this is the string we match
	String string

	// offset into the input to look for a match: 0 = current char, 1 = next one, etc
	Offset int

	// adjusts the size of the region (plus or minus) that is processed for the Next action -- allows broader and narrower matching relative to tagging
	SizeAdj int

	// the action(s) to perform, in order, if there is a match -- these are performed prior to iterating over child nodes
	Acts []Actions

	// string(s) for ReadUntil action -- will read until any of these strings are found -- separate different options with | -- if you need to read until a literal | just put two || in a row and that will show up as a blank, which is interpreted as a literal |
	Until string

	// the state to push if our action is PushState -- note that State matching is on String, not this value
	PushState string

	// create an optimization map for this rule, which must be a parent with children that all match against a Name string -- this reads the Name and directly activates the associated rule with that String, without having to iterate through them -- use this for keywords etc -- produces a SIGNIFICANT speedup for long lists of keywords.
	NameMap bool

	// length of source that matched -- if Next is called, this is what will be skipped to
	MatchLen int `view:"-" json:"-" xml:"-"`

	// NameMap lookup map -- created during Compile
	NmMap map[string]*Rule `edit:"-" json:"-" xml:"-"`
}

lex.Rule operates on the text input to produce the lexical tokens.

Lexing is done line-by-line -- you must push and pop states to coordinate across multiple lines, e.g., for multi-line comments.

There is full access to entire line and you can decide based on future (offset) characters.

In general it is best to keep lexing as simple as possible and leave the more complex things for the parsing step.

func NewRule ¶

func NewRule(par ki.Ki, name ...string) *Rule

NewRule adds a new Rule with the given name to the given parent: lex.Rule operates on the text input to produce the lexical tokens.

Lexing is done line-by-line -- you must push and pop states to coordinate across multiple lines, e.g., for multi-line comments.

There is full access to entire line and you can decide based on future (offset) characters.

In general it is best to keep lexing as simple as possible and leave the more complex things for the parsing step.

func (*Rule) AsLexRule ¶

func (lr *Rule) AsLexRule() *Rule

func (*Rule) BaseIface ¶

func (lr *Rule) BaseIface() reflect.Type

func (*Rule) Compile ¶

func (lr *Rule) Compile(ls *State) bool

Compile performs any one-time compilation steps on the rule returns false if there are any problems.

func (*Rule) CompileAll ¶

func (lr *Rule) CompileAll(ls *State) bool

CompileAll is called on the top-level Rule to compile all nodes. returns true if everything is ok

func (*Rule) CompileNameMap ¶

func (lr *Rule) CompileNameMap(ls *State) bool

CompileNameMap compiles name map -- returns false if there are problems.

func (*Rule) ComputeMatchLen ¶

func (lr *Rule) ComputeMatchLen(ls *State)

ComputeMatchLen computes MatchLen based on match type

func (*Rule) DoAct ¶

func (lr *Rule) DoAct(ls *State, act Actions, tok *token.KeyToken)

DoAct performs given action

func (*Rule) Find ¶

func (lr *Rule) Find(find string) []*Rule

Find looks for rules in the tree that contain given string in String or Name fields

func (*Rule) IsMatch ¶

func (lr *Rule) IsMatch(ls *State) bool

IsMatch tests if the rule matches for current input state, returns true if so, false if not

func (*Rule) IsMatchPos ¶

func (lr *Rule) IsMatchPos(ls *State) bool

IsMatchPos tests if the rule matches position

func (*Rule) KiType ¶

func (t *Rule) KiType() *gti.Type

KiType returns the *gti.Type of Rule

func (*Rule) Lex ¶

func (lr *Rule) Lex(ls *State) *Rule

Lex tries to apply rule to given input state, returns lowest-level rule that matched, nil if none

func (*Rule) LexStart ¶

func (lr *Rule) LexStart(ls *State) *Rule

LexStart is called on the top-level lex node to start lexing process for one step

func (*Rule) New ¶

func (t *Rule) New() ki.Ki

New returns a new *Rule value

func (*Rule) SetActs ¶

func (t *Rule) SetActs(v ...Actions) *Rule

SetActs sets the [Rule.Acts]: the action(s) to perform, in order, if there is a match -- these are performed prior to iterating over child nodes

func (*Rule) SetDesc ¶

func (t *Rule) SetDesc(v string) *Rule

SetDesc sets the [Rule.Desc]: description / comments about this rule

func (*Rule) SetMatch ¶

func (t *Rule) SetMatch(v Matches) *Rule

SetMatch sets the [Rule.Match]: the lexical match that we look for to engage this rule

func (*Rule) SetMatchLen ¶

func (t *Rule) SetMatchLen(v int) *Rule

SetMatchLen sets the [Rule.MatchLen]: length of source that matched -- if Next is called, this is what will be skipped to

func (*Rule) SetNameMap ¶

func (t *Rule) SetNameMap(v bool) *Rule

SetNameMap sets the [Rule.NameMap]: create an optimization map for this rule, which must be a parent with children that all match against a Name string -- this reads the Name and directly activates the associated rule with that String, without having to iterate through them -- use this for keywords etc -- produces a SIGNIFICANT speedup for long lists of keywords.

func (*Rule) SetNmMap ¶

func (t *Rule) SetNmMap(v map[string]*Rule) *Rule

SetNmMap sets the [Rule.NmMap]: NameMap lookup map -- created during Compile

func (*Rule) SetOff ¶

func (t *Rule) SetOff(v bool) *Rule

SetOff sets the [Rule.Off]: disable this rule -- useful for testing and exploration

func (*Rule) SetOffset ¶

func (t *Rule) SetOffset(v int) *Rule

SetOffset sets the [Rule.Offset]: offset into the input to look for a match: 0 = current char, 1 = next one, etc

func (*Rule) SetPos ¶

func (t *Rule) SetPos(v MatchPos) *Rule

SetPos sets the [Rule.Pos]: position where match can occur

func (*Rule) SetPushState ¶

func (t *Rule) SetPushState(v string) *Rule

SetPushState sets the [Rule.PushState]: the state to push if our action is PushState -- note that State matching is on String, not this value

func (*Rule) SetSizeAdj ¶

func (t *Rule) SetSizeAdj(v int) *Rule

SetSizeAdj sets the [Rule.SizeAdj]: adjusts the size of the region (plus or minus) that is processed for the Next action -- allows broader and narrower matching relative to tagging

func (*Rule) SetString ¶

func (t *Rule) SetString(v string) *Rule

SetString sets the [Rule.String]: if action is LexMatch, this is the string we match

func (*Rule) SetToken ¶

func (t *Rule) SetToken(v token.Tokens) *Rule

SetToken sets the [Rule.Token]: the token value that this rule generates -- use None for non-terminals

func (*Rule) SetUntil ¶

func (t *Rule) SetUntil(v string) *Rule

SetUntil sets the [Rule.Until]: string(s) for ReadUntil action -- will read until any of these strings are found -- separate different options with | -- if you need to read until a literal | just put two || in a row and that will show up as a blank, which is interpreted as a literal |

func (*Rule) TargetLen ¶

func (lr *Rule) TargetLen(ls *State) int

TargetLen returns the length of the target including offset

func (*Rule) Validate ¶

func (lr *Rule) Validate(ls *State) bool

Validate checks for any errors in the rules and issues warnings, returns true if valid (no err) and false if invalid (errs)

func (*Rule) WriteGrammar ¶

func (lr *Rule) WriteGrammar(writer io.Writer, depth int)

WriteGrammar outputs the lexer rules as a formatted grammar in a BNF-like format it is called recursively

type Stack ¶

type Stack []string

Stack is the stack for states

func (*Stack) Clone ¶

func (ss *Stack) Clone() Stack

Clone returns a copy of the stack

func (*Stack) Pop ¶

func (ss *Stack) Pop() string

Pop takes state off the stack and returns it

func (*Stack) Push ¶

func (ss *Stack) Push(state string)

Push appends state to stack

func (*Stack) Reset ¶

func (ss *Stack) Reset()

Reset stack

func (*Stack) Top ¶

func (ss *Stack) Top() string

Top returns the state at the top of the stack

type State ¶

type State struct {

	// the current file being lex'd
	Filename string

	// if true, record whitespace tokens -- else ignore
	KeepWS bool

	// the current line of source being processed
	Src []rune

	// the lex output for this line
	Lex Line

	// the comments output for this line -- kept separately
	Comments Line

	// the current rune char position within the line
	Pos int

	// the line within overall source that we're operating on (0 indexed)
	Ln int

	// the current rune read by NextRune
	Ch rune

	// state stack
	Stack Stack

	// the last name that was read
	LastName string

	// a guest lexer that can be installed for managing a different language type, e.g., quoted text in markdown files
	GuestLex *Rule

	// copy of stack at point when guest lexer was installed -- restore when popped
	SaveStack Stack

	// time stamp for lexing -- set at start of new lex process
	Time nptime.Time

	// any error messages accumulated during lexing specifically
	Errs ErrorList
}

lex.State is the state maintained for lexing

func (*State) Add ¶

func (ls *State) Add(tok token.KeyToken, st, ed int)

Add adds a lex token for given region -- merges with prior if same

func (*State) AtEol ¶

func (ls *State) AtEol() bool

AtEol returns true if current position is at end of line

func (*State) CurRune ¶

func (ls *State) CurRune() bool

CurRune reads the current rune into Ch and returns false if at end of line

func (*State) CurState ¶

func (ls *State) CurState() string

CurState returns the current state

func (*State) Error ¶

func (ls *State) Error(pos int, msg string, rule *Rule)

Error adds a lexing error at given position

func (*State) Init ¶

func (ls *State) Init()

Init initializes the state at start of parsing

func (*State) LineString ¶

func (ls *State) LineString() string

LineString returns the current lex output as tagged source

func (*State) MatchState ¶

func (ls *State) MatchState(st string) bool

MatchState returns true if the current state matches the string

func (*State) Next ¶

func (ls *State) Next(inc int) bool

Next moves to next position using given increment in source line -- returns false if at end

func (*State) NextRune ¶

func (ls *State) NextRune() bool

NextRune reads the next rune into Ch and returns false if at end of line

func (*State) NextSrcLine ¶

func (ls *State) NextSrcLine() string

NextSrcLine returns the next line of text

func (*State) PopState ¶

func (ls *State) PopState() string

PopState pops state off of stack

func (*State) PushState ¶

func (ls *State) PushState(st string)

PushState pushes state onto stack

func (*State) ReadEscape ¶

func (ls *State) ReadEscape(quote rune) bool

ReadEscape parses an escape sequence where rune is the accepted escaped quote. In case of a syntax error, it stops at the offending character (without consuming it) and returns false. Otherwise it returns true.

func (*State) ReadName ¶

func (ls *State) ReadName()

ReadName reads a standard alpha-numeric_ name -- saves in LastName

func (*State) ReadNameTmp ¶

func (ls *State) ReadNameTmp(off int) string

ReadNameTmp reads a standard alpha-numeric_ name and returns it. Does not update the lexing position -- a "lookahead" name read

func (*State) ReadNumber ¶

func (ls *State) ReadNumber() token.Tokens

ReadNumber reads a number of any sort, returning the type of the number

func (*State) ReadQuoted ¶

func (ls *State) ReadQuoted()

func (*State) ReadUntil ¶

func (ls *State) ReadUntil(until string)

ReadUntil reads until given string(s) -- does do depth tracking if looking for a bracket open / close kind of symbol. For multiple "until" string options, separate each by | and use empty to match a single | or || in combination with other options. Terminates at end of line without error

func (*State) Rune ¶

func (ls *State) Rune(off int) (rune, bool)

Rune gets the rune at given offset from current position, returns false if out of range

func (*State) ScanMantissa ¶

func (ls *State) ScanMantissa(base int)

func (*State) SetLine ¶

func (ls *State) SetLine(src []rune)

SetLine sets a new line for parsing and initializes the lex output and pos

func (*State) String ¶

func (ls *State) String(off, sz int) (string, bool)

String gets the string at given offset and length from current position, returns false if out of range

type TokenMap ¶

type TokenMap map[token.Tokens]struct{}

TokenMap is a token map, for optimizing token exclusion

func (TokenMap) Has ¶

func (tm TokenMap) Has(tok token.Tokens) bool

Has returns true if given token is in the map

func (TokenMap) Set ¶

func (tm TokenMap) Set(tok token.Tokens)

Set sets map for given token

type TwoState ¶

type TwoState struct {

	// position in lex tokens we're on
	Pos Pos

	// file that we're operating on
	Src *File

	// stack of nesting tokens
	NestStack []token.Tokens

	// any error messages accumulated during lexing specifically
	Errs ErrorList
}

TwoState is the state maintained for the PassTwo process

func (*TwoState) Error ¶

func (ts *TwoState) Error(msg string)

Error adds an passtwo error at current position

func (*TwoState) Init ¶

func (ts *TwoState) Init()

Init initializes state for a new pass -- called at start of NestDepth

func (*TwoState) NestStackStr ¶

func (ts *TwoState) NestStackStr() string

NestStackStr returns the token stack as strings

func (*TwoState) NextLine ¶

func (ts *TwoState) NextLine()

NextLine advances to next line

func (*TwoState) SetSrc ¶

func (ts *TwoState) SetSrc(src *File)

SetSrc sets the source we're operating on

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL