simplexer

package module

v0.0.0-...-bce8e06 Latest Latest Go to latest Published: Jan 10, 2018 License: MIT Imports: 5 Imported by: 1

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/macrat/simplexer

Links

Open Source Insights

README ¶

simplexer

A simple lexical analyzser for Go.

example

simplest usage

package main

import (
	"fmt"
	"strings"

	"github.com/macrat/simplexer"
)

func Example() {
	input := "hello_world = \"hello world\"\nnumber = 1"
	lexer := simplexer.NewLexer(strings.NewReader(input))

	fmt.Println(input)
	fmt.Println("==========")

	for {
		token, err := lexer.Scan()
		if err != nil {
			panic(err.Error())
		}
		if token == nil {
			fmt.Println("==========")
			return
		}

		fmt.Printf("line %2d, column %2d: %s: %s\n",
			token.Position.Line,
			token.Position.Column,
			token.Type,
			token.Literal)
	}
}

It is output as follow.

hello_world = "hello world"
number = 1
==========
line  0, column  0: IDENT: hello_world
line  0, column 12: OTHER: =
line  0, column 14: STRING: "hello world"
line  1, column  0: IDENT: number
line  1, column  7: OTHER: =
line  1, column  9: NUMBER: 1
==========

more examples

Please see godoc.

Documentation ¶

Overview ¶

A simple lexical analyzer for Go.

Example ¶

package main

import (
	"fmt"
	"strings"

	"github.com/macrat/simplexer"
)

func main() {
	input := "hello_world = \"hello world\"\nnumber = 1"
	lexer := simplexer.NewLexer(strings.NewReader(input))

	fmt.Println(input)
	fmt.Println("==========")

	for {
		token, err := lexer.Scan()
		if err != nil {
			panic(err.Error())
		}
		if token == nil {
			fmt.Println("==========")
			return
		}

		fmt.Printf("line %2d, column %2d: %s: %s\n",
			token.Position.Line,
			token.Position.Column,
			token.Type,
			token.Literal)
	}

}

Output:

hello_world = "hello world"
number = 1
==========
line  0, column  0: IDENT: hello_world
line  0, column 12: OTHER: =
line  0, column 14: STRING: "hello world"
line  1, column  0: IDENT: number
line  1, column  7: OTHER: =
line  1, column  9: NUMBER: 1
==========

Example (AddOriginalTokenType) ¶

package main

import (
	"fmt"
	"strings"

	"github.com/macrat/simplexer"
)

func main() {
	const (
		SUBSITUATION simplexer.TokenID = iota
		NEWLINE
	)

	input := "hello_world = \"hello world\"\nnumber = 1"
	lexer := simplexer.NewLexer(strings.NewReader(input))

	lexer.Whitespace = simplexer.NewPatternTokenType(-1, []string{"\t", " "})
	// lexer.Whitespace = simplexer.NewRegexpTokenType(-1, `[\t ]`)  // same mean above

	lexer.TokenTypes = append([]simplexer.TokenType{
		simplexer.NewPatternTokenType(SUBSITUATION, []string{"="}),
		simplexer.NewRegexpTokenType(NEWLINE, `^[\n\r]+`),
	}, lexer.TokenTypes...)

	fmt.Println(input)
	fmt.Println("==========")

	for {
		token, err := lexer.Scan()
		if err != nil {
			panic(err.Error())
		}
		if token == nil {
			fmt.Println("==========")
			return
		}

		fmt.Printf("%s: %#v\n", token.Type, token.Literal)
	}

}

Output:

hello_world = "hello world"
number = 1
==========
IDENT: "hello_world"
UNKNOWN(0): "="
STRING: "\"hello world\""
UNKNOWN(1): "\n"
IDENT: "number"
UNKNOWN(0): "="
NUMBER: "1"
==========

Example (PositionInformation) ¶

package main

import (
	"fmt"
	"strings"

	"github.com/macrat/simplexer"
)

func main() {
	input := "this is a\ntest string\n"
	lexer := simplexer.NewLexer(strings.NewReader(input))

	for {
		token, err := lexer.Scan()
		if err != nil {
			panic(err.Error())
		}
		if token == nil {
			break
		}

		fmt.Printf("%d: %s\n", token.Position.Line, lexer.GetLastLine())
		fmt.Printf(" | %s%s\n\n",
			strings.Repeat(" ", token.Position.Column),
			strings.Repeat("=", len(token.Literal)))
	}

}

Output:

0: this is a
 | ====

0: this is a
 |      ==

0: this is a
 |         =

1: test string
 | ====

1: test string
 |      ======

Index ¶

Variables
type Lexer
- func NewLexer(reader io.Reader) *Lexer
type PatternTokenType
- func NewPatternTokenType(id TokenID, patterns []string) *PatternTokenType
type Position
type RegexpTokenType
- func NewRegexpTokenType(id TokenID, re string) *RegexpTokenType
type Token
type TokenID
- func (id TokenID) String() string
type TokenType
type UnknownTokenError
- func (se UnknownTokenError) Error() string

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	DefaultWhitespace = NewPatternTokenType(-1, []string{" ", "\t", "\r", "\n"})

	DefaultTokenTypes = []TokenType{
		NewRegexpTokenType(IDENT, `[a-zA-Z_][a-zA-Z0-9_]*`),
		NewRegexpTokenType(NUMBER, `[0-9]+(?:\.[0-9]+)?`),
		NewRegexpTokenType(STRING, `\"([^"]*)\"`),
		NewRegexpTokenType(OTHER, `.`),
	}
)

Defined default values for properties of Lexer as a package value.

Functions ¶

This section is empty.

Types ¶

type Lexer ¶

type Lexer struct {
	Whitespace TokenType
	TokenTypes []TokenType
	// contains filtered or unexported fields
}

The lexical analyzer.

Whitespace is a TokenType for skipping characters like whitespaces. The default value is simplexer.DefaultWhitespace. Won't skip any characters if Whitespace is nil.

TokenTypes is an array of TokenType. Lexer will sequential check TokenTypes, and return first matched token. Default is simplexer.DefaultTokenTypes.

Please be careful, Lexer will never use it even if append TokenType after OTHER. Because OTHER will accept any single character.

func (*Lexer) GetLastLine ¶

func (l *Lexer) GetLastLine() string

GetCurrentLine returns line of last scanned token.

func (*Lexer) Peek ¶

func (l *Lexer) Peek() (*Token, error)

Peek the first token in the buffer.

Returns nil as *Token if the buffer is empty.

func (*Lexer) Scan ¶

func (l *Lexer) Scan() (*Token, error)

Scan will get the first token in the buffer and remove it from the buffer.

This function using Lexer.Peek. Please read document of Peek.

type PatternTokenType ¶

type PatternTokenType struct {
	ID       TokenID
	Patterns []string
}

PatternTokenType is dictionary token type.

PatternTokenType has some strings and find token that perfect match they.

func NewPatternTokenType ¶

func NewPatternTokenType(id TokenID, patterns []string) *PatternTokenType

Make new PatternTokenType.

id is a TokenID of new PatternTokenType.

patterns is array of patterns.

Example ¶

package main

import (
	"fmt"
	"strings"

	"github.com/macrat/simplexer"
)

func main() {
	const (
		HOGE simplexer.TokenID = iota
		OTHERS
	)

	lexer := simplexer.NewLexer(strings.NewReader("this is hoge and HOGE or Hoge"))

	lexer.TokenTypes = []simplexer.TokenType{
		simplexer.NewPatternTokenType(HOGE, []string{"hoge", "HOGE"}),
		simplexer.NewRegexpTokenType(OTHERS, `[^ ]+`),
	}

	for {
		token, _ := lexer.Scan()
		if token == nil {
			break
		}

		if token.Type.GetID() == HOGE {
			fmt.Printf("!!! %s !!!\n", token.Literal)
		}

		if token.Type.GetID() == OTHERS {
			fmt.Println(token.Literal)
		}
	}

}

Output:

this
is
!!! hoge !!!
and
!!! HOGE !!!
or
Hoge

func (*PatternTokenType) FindToken ¶

func (ptt *PatternTokenType) FindToken(s string, p Position) *Token

FindToken returns new Token if s starts with this token.

func (*PatternTokenType) GetID ¶

func (ptt *PatternTokenType) GetID() TokenID

GetID returns id of token type.

func (*PatternTokenType) String ¶

func (ptt *PatternTokenType) String() string

Get readable string of TokenID.

func (Position) After ¶

func (p Position) After(x Position) bool

Position.After will check p is after than x.

func (Position) Before ¶

func (p Position) Before(x Position) bool

Position.Before will check p is before than x.

func (Position) String ¶

func (p Position) String() string

Convert to string.

type RegexpTokenType ¶

type RegexpTokenType struct {
	ID TokenID
	Re *regexp.Regexp
}

RegexpTokenType is a TokenType implement with regexp.

ID is TokenID for this token type.

Re is regular expression of token. It have to starts with "^".

func NewRegexpTokenType ¶

func NewRegexpTokenType(id TokenID, re string) *RegexpTokenType

Make new RegexpTokenType.

id is a TokenID of new RegexpTokenType.

re is a regular expression of token.

Example ¶

package main

import (
	"fmt"
	"strings"

	"github.com/macrat/simplexer"
)

func main() {
	const (
		NUMBER simplexer.TokenID = iota
		OTHERS
	)

	lexer := simplexer.NewLexer(strings.NewReader("123this is test456"))

	lexer.TokenTypes = []simplexer.TokenType{
		simplexer.NewRegexpTokenType(NUMBER, `[0-9]+`),
		simplexer.NewRegexpTokenType(OTHERS, `[^0-9]+`),
	}

	for {
		token, _ := lexer.Scan()
		if token == nil {
			break
		}

		if token.Type.GetID() == NUMBER {
			fmt.Printf("%s is number\n", token.Literal)
		}

		if token.Type.GetID() == OTHERS {
			fmt.Printf("%s is not number\n", token.Literal)
		}
	}

}

Output:

123 is number
this is test is not number
456 is number

func (*RegexpTokenType) FindToken ¶

func (rtt *RegexpTokenType) FindToken(s string, p Position) *Token

FindToken returns new Token if s starts with this token.

func (*RegexpTokenType) GetID ¶

func (rtt *RegexpTokenType) GetID() TokenID

GetID returns id of this token type.

func (*RegexpTokenType) String ¶

func (rtt *RegexpTokenType) String() string

Get readable string of TokenID.

func (TokenID) String ¶

func (id TokenID) String() string

Convert to readable string.

Be careful, user added token ID's will convert to UNKNOWN.

type TokenType ¶

type TokenType interface {
	GetID() TokenID
	FindToken(string, Position) *Token
}

TokenType is a rule for making Token.

GetID returns TokenID of this TokenType. TokenID can share with another TokenType.

FindToken returns new Token if the head of first argument was matched with the pattern of this TokenType. The second argument is a position of the token in the buffer. In almost implement, Position will pass into result Token directly.

type UnknownTokenError ¶

type UnknownTokenError struct {
	Literal  string
	Position Position
}

The error that returns when found an unknown token.

func (UnknownTokenError) Error ¶

func (se UnknownTokenError) Error() string

Get error message as string.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

simplexer

README ¶

simplexer

example

simplest usage

more examples

Documentation ¶

Overview ¶

Index ¶

Examples ¶

Constants ¶

Variables ¶

Functions ¶

Types ¶

type Lexer ¶

func NewLexer ¶

func (*Lexer) GetLastLine ¶

func (*Lexer) Peek ¶

func (*Lexer) Scan ¶

type PatternTokenType ¶

func NewPatternTokenType ¶

func (*PatternTokenType) FindToken ¶

func (*PatternTokenType) GetID ¶

func (*PatternTokenType) String ¶

type Position ¶

func (Position) After ¶

func (Position) Before ¶

func (Position) String ¶

type RegexpTokenType ¶

func NewRegexpTokenType ¶

func (*RegexpTokenType) FindToken ¶

func (*RegexpTokenType) GetID ¶

func (*RegexpTokenType) String ¶

type Token ¶

type TokenID ¶

func (TokenID) String ¶

type TokenType ¶

type UnknownTokenError ¶

func (UnknownTokenError) Error ¶

Source Files ¶