lex

package
v0.0.0-...-261b5b0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 12, 2016 License: MIT Imports: 6 Imported by: 0

README

Lexing/Dialects

QLBridge implements a few different Dialects: Sql, FilterQL, Expressions, json

  • SQL a subset, non-complete implementation of SQL
  • FilterQL A filtering language, think just the WHERE part of SQL but more DSL'ish with syntax AND ( <expr>, <expr>, <expr> ) instead of <expr> AND <expr> AND <expr>
  • Expression Simple boolean logic expressions see https://github.com/araddon/qlbridge/blob/master/vm/vm_test.go#L57 for examples
  • Json Lexes json (instead of marshal)

Creating a custom Lexer/Parser ie Dialect

See example in dialects/example folder for a custom ql dialect, this example creates a mythical SUBSCRIBETO query language...

// Tokens Specific to our PUBSUB
var TokenSubscribeTo lex.TokenType = 1000

// Custom lexer for our maybe hash function
func LexMaybe(l *ql.Lexer) ql.StateFn {

	l.SkipWhiteSpaces()

	keyWord := strings.ToLower(l.PeekWord())

	switch keyWord {
	case "maybe":
		l.ConsumeWord("maybe")
		l.Emit(lex.TokenIdentity)
		return ql.LexExpressionOrIdentity
	}
	return ql.LexExpressionOrIdentity
}

func main() {

	// We are going to inject new tokens into qlbridge
	lex.TokenNameMap[TokenSubscribeTo] = &lex.TokenInfo{Description: "subscribeto"}

	// OverRide the Identity Characters in qlbridge to allow a dash in identity
	ql.IDENTITY_CHARS = "_./-"

	ql.LoadTokenInfo()
	ourDialect.Init()

	// We are going to create our own Dialect that uses a "SUBSCRIBETO" keyword
	pubsub = &ql.Statement{TokenSubscribeTo, []*ql.Clause{
		{Token: TokenSubscribeTo, Lexer: ql.LexColumns},
		{Token: lex.TokenFrom, Lexer: LexMaybe},
		{Token: lex.TokenWhere, Lexer: ql.LexColumns, Optional: true},
	}}
	ourDialect = &ql.Dialect{
		"Subscribe To", []*ql.Statement{pubsub},
	}

	l := ql.NewLexer(`
			SUBSCRIBETO
				count(x), Name
			FROM ourstream
			WHERE 
				k = REPLACE(LOWER(Name),'cde','xxx');
		`, ourDialect)

}

Documentation

Overview

Lexing for QLBridge

Index

Constants

This section is empty.

Variables

View Source
var (
	// FEATURE FLAGS
	SUPPORT_DURATION = true
	// Identity Quoting
	//  http://stackoverflow.com/questions/1992314/what-is-the-difference-between-single-and-double-quotes-in-sql
	// you might want to set this to not include single ticks
	//  http://dev.mysql.com/doc/refman/5.1/en/string-literals.html
	//IdentityQuoting = []byte{'[', '`', '"'} // mysql ansi-ish, no single quote identities, and allowing double-quote
	IdentityQuoting = []byte{'[', '`', '\''} // more ansi-ish, allow double quotes around identities
)
View Source
var (
	// Which Identity Characters are allowed?
	//    if we allow forward slashes then we can allow xpath esque notation
	// IDENTITY_CHARS = "_."
	IDENTITY_CHARS = "_./-"
	// A much more lax identity char set rule
	IDENTITY_LAX_CHARS = "_./- "
	// sql variables start with @@ ??
	IDENTITY_SQL_CHARS = "@_./-"

	// list of token-name
	TokenNameMap = map[TokenType]*TokenInfo{}/* 120 elements not displayed */

)
View Source
var FilterSelectStatement = []*Clause{
	{Token: TokenSelect, Lexer: LexSelectClause, Optional: false},
	{Token: TokenFrom, Lexer: LexTableReferences, Optional: false},
	{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true},
	{Token: TokenFilter, Lexer: LexFilterClause, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
	{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
}
View Source
var FilterStatement = []*Clause{
	{Token: TokenFilter, Lexer: LexFilterClause, Optional: true},
	{Token: TokenFrom, Lexer: LexTableReferences, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
	{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
}
View Source
var LexDataTypeIdentity = LexDataType(TokenDataType)

LexIdentifier scans and finds named things (tables, columns)

and specifies them as TokenIdentity, uses LexIdentifierType

TODO: dialect controls escaping/quoting techniques

[name]         select [first name] from usertable;
'name'         select 'user' from usertable;
first_name     select first_name from usertable;
usertable      select first_name AS fname from usertable;
_name          select _name AS name from stuff;
View Source
var LexTableIdentifier = LexIdentifierOfType(TokenTable)
View Source
var SqlAlter = []*Clause{
	{Token: TokenAlter, Lexer: LexEmpty},
	{Token: TokenTable, Lexer: LexIdentifier},
	{Token: TokenChange, Lexer: LexDdlColumn},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}
View Source
var SqlDelete = []*Clause{
	{Token: TokenDelete, Lexer: LexEmpty},
	{Token: TokenFrom, Lexer: LexIdentifierOfType(TokenTable)},
	{Token: TokenSet, Lexer: LexColumns, Optional: true},
	{Token: TokenWhere, Lexer: LexColumns, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}
View Source
var SqlDescribe = []*Clause{
	{Token: TokenDescribe, Lexer: LexColumns},
}
View Source
var SqlDescribeAlt = []*Clause{
	{Token: TokenDesc, Lexer: LexColumns},
}

alternate spelling of Describe

View Source
var SqlExplain = []*Clause{
	{Token: TokenExplain, Lexer: LexColumns},
}

Explain is alias of describe

View Source
var SqlInsert = []*Clause{
	{Token: TokenInsert, Lexer: LexUpsertClause, Name: "insert.entry"},
	{Token: TokenLeftParenthesis, Lexer: LexColumnNames, Optional: true},
	{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
	{Token: TokenSelect, Optional: true, Clauses: insertSubQuery},
	{Token: TokenValues, Lexer: LexTableColumns, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}
View Source
var SqlPrepare = []*Clause{
	{Token: TokenPrepare, Lexer: LexPreparedStatement},
	{Token: TokenFrom, Lexer: LexTableReferences},
}
View Source
var SqlReplace = []*Clause{
	{Token: TokenReplace, Lexer: LexEmpty},
	{Token: TokenInto, Lexer: LexIdentifierOfType(TokenTable)},
	{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
	{Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}
View Source
var SqlSelect = []*Clause{
	{Token: TokenSelect, Lexer: LexSelectClause},
	{Token: TokenInto, Lexer: LexIdentifierOfType(TokenTable), Optional: true},
	{Token: TokenFrom, Lexer: LexTableReferenceFirst, Optional: true, Repeat: false, Clauses: fromSource, Name: "sqlSelect.From"},
	{KeywordMatcher: sourceMatch, Optional: true, Repeat: true, Clauses: moreSources, Name: "sqlSelect.sources"},
	{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true, Clauses: whereQuery, Name: "sqlSelect.where"},
	{Token: TokenGroupBy, Lexer: LexColumns, Optional: true, Name: "sqlSelect.groupby"},
	{Token: TokenHaving, Lexer: LexConditionalClause, Optional: true, Name: "sqlSelect.having"},
	{Token: TokenOrderBy, Lexer: LexOrderByColumn, Optional: true, Name: "sqlSelect.orderby"},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenOffset, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
	{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
}
View Source
var SqlSet = []*Clause{
	{Token: TokenSet, Lexer: LexColumns},
}
View Source
var SqlShow = []*Clause{
	{Token: TokenShow, Lexer: LexShowClause},
	{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true},
}
View Source
var SqlUpdate = []*Clause{
	{Token: TokenUpdate, Lexer: LexIdentifierOfType(TokenTable)},
	{Token: TokenSet, Lexer: LexColumns},
	{Token: TokenWhere, Lexer: LexColumns, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}
View Source
var SqlUpsert = []*Clause{
	{Token: TokenUpsert, Lexer: LexUpsertClause, Name: "upsert.entry"},
	{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
	{Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}
View Source
var SqlUse = []*Clause{
	{Token: TokenUse, Lexer: LexIdentifier},
}

Functions

func IdentityRunesOnly

func IdentityRunesOnly(identity string) bool

func LoadTokenInfo

func LoadTokenInfo()

Types

type Clause

type Clause struct {
	Optional       bool      // Is this Clause/Keyword optional?
	Repeat         bool      // Repeatable clause?
	Token          TokenType // Token identifiyng start of clause, optional
	KeywordMatcher KeywordMatcher
	Lexer          StateFn   // Lex Function to lex clause, optional
	Clauses        []*Clause // Children Clauses
	Name           string
	// contains filtered or unexported fields
}

func (*Clause) MatchesKeyword

func (c *Clause) MatchesKeyword(peekWord string, l *Lexer) bool

func (*Clause) String

func (c *Clause) String() string

type Dialect

type Dialect struct {
	Name       string
	Statements []*Clause
	// contains filtered or unexported fields
}

Dialect is a Language made up of multiple Statements

SQL
CQL
INFLUXQL   etc
var ExpressionDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenNil, Clauses: expressionStatement},
	},
}

ExpressionDialect, is a Single Expression dialect, useful for parsing Single function

eq(tolower(item_name),"buy")
var FilterQLDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenFilter, Clauses: FilterStatement},
		&Clause{Token: TokenSelect, Clauses: FilterSelectStatement},
	},
}

FilterQL is a Where Clause filtering language slightly

more DSL'ish than SQL Where Clause
var JsonDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenNil, Clauses: jsonDialectStatement},
	},
}

JsonDialect, is a json lexer

["hello","world"]
{"name":"bob","apples":["honeycrisp","fuji"]}
var LogicalExpressionDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenNil, Clauses: logicalEpressions},
	},
}

logical Expression Statement of the following functional format

5 > 4   => true
4 + 5   => 9
tolower(item) + 12 > 4
4 IN (4,5,6)
var SqlDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenPrepare, Clauses: SqlPrepare},
		&Clause{Token: TokenSelect, Clauses: SqlSelect},
		&Clause{Token: TokenUpdate, Clauses: SqlUpdate},
		&Clause{Token: TokenUpsert, Clauses: SqlUpsert},
		&Clause{Token: TokenInsert, Clauses: SqlInsert},
		&Clause{Token: TokenDelete, Clauses: SqlDelete},
		&Clause{Token: TokenAlter, Clauses: SqlAlter},
		&Clause{Token: TokenDescribe, Clauses: SqlDescribe},
		&Clause{Token: TokenExplain, Clauses: SqlExplain},
		&Clause{Token: TokenDesc, Clauses: SqlDescribeAlt},
		&Clause{Token: TokenShow, Clauses: SqlShow},
		&Clause{Token: TokenSet, Clauses: SqlSet},
		&Clause{Token: TokenUse, Clauses: SqlUse},
	},
}

SqlDialect is a SQL like dialect

SELECT
UPDATE
INSERT
UPSERT
DELETE

SHOW idenity;
DESCRIBE identity;
PREPARE

ddl

  ALTER

TODO:
    CREATE
    VIEW

func (*Dialect) Init

func (m *Dialect) Init()

type KeywordMatcher

type KeywordMatcher func(c *Clause, peekWord string, l *Lexer) bool

A Clause may supply a keyword matcher instead of keyword-token

type Lexer

type Lexer struct {
	// contains filtered or unexported fields
}

Lexer holds the state of the lexical scanning.

Holds a *Dialect* which gives much of the
  rules specific to this language

many-generations removed from that Based on the lexer from the "text/template" package. See http://www.youtube.com/watch?v=HxaD_trXwRE

func NewFilterQLLexer

func NewFilterQLLexer(input string) *Lexer

creates a new lexer for the input string using SqlDialect

this is sql(ish) compatible parser

func NewJsonLexer

func NewJsonLexer(input string) *Lexer

Creates a new json dialect lexer for the input string

func NewLexer

func NewLexer(input string, dialect *Dialect) *Lexer

Creates a new lexer for the input string

func NewSqlLexer

func NewSqlLexer(input string) *Lexer

creates a new lexer for the input string using SqlDialect

this is sql(ish) compatible parser

func (*Lexer) ConsumeWord

func (l *Lexer) ConsumeWord(word string)

lets move position to consume given word

func (*Lexer) Emit

func (l *Lexer) Emit(t TokenType)

emit passes an token back to the client.

func (*Lexer) IsComment

func (l *Lexer) IsComment() bool

Is this a comment?

func (*Lexer) IsEnd

func (l *Lexer) IsEnd() bool

have we consumed all input

func (*Lexer) LexMatchSkip

func (l *Lexer) LexMatchSkip(tok TokenType, skip int, fn StateFn) StateFn

matches expected tokentype emitting the token on success and returning passed state function.

func (*Lexer) Next

func (l *Lexer) Next() (r rune)

next returns the next rune in the input

func (*Lexer) NextToken

func (l *Lexer) NextToken() Token

returns the next token from the input

func (*Lexer) Peek

func (l *Lexer) Peek() rune

peek returns but does not consume the next rune in the input.

func (*Lexer) PeekWord

func (l *Lexer) PeekWord() string

lets grab the next word (till whitespace, without consuming)

func (*Lexer) PeekWord2

func (l *Lexer) PeekWord2() string

lets grab the next word (till whitespace, without consuming)

func (*Lexer) PeekX

func (l *Lexer) PeekX(x int) string

grab the next x characters without consuming

func (*Lexer) Push

func (l *Lexer) Push(name string, state StateFn)

func (*Lexer) RawInput

func (l *Lexer) RawInput() string

func (*Lexer) Remainder

func (l *Lexer) Remainder() (string, bool)

SQL and other string expressions may contain more than one

statement such as:

  use schema_x;  show tables;

  set @my_var = "value"; select a,b from `users` where name = @my_var;

func (*Lexer) ReverseTrim

func (l *Lexer) ReverseTrim()

Skips white space characters at end by trimming so we can recognize the end

more easily

func (*Lexer) SkipWhiteSpaces

func (l *Lexer) SkipWhiteSpaces()

Skips white space characters in the input.

func (*Lexer) SkipWhiteSpacesNewLine

func (l *Lexer) SkipWhiteSpacesNewLine() bool

Skips white space characters in the input, returns bool

for if it contained new line

type NamedStateFn

type NamedStateFn struct {
	Name    string
	StateFn StateFn
}

type StateFn

type StateFn func(*Lexer) StateFn

StateFn represents the state of the lexer as a function that returns the next state.

func LexColumnNames

func LexColumnNames(l *Lexer) StateFn

Handle list of column names on insert/update statements

<insert_into> <col_names> VALUES <col_value_list>

<col_names> := '(' <identity> [, <identity>]* ')'

func LexColumns

func LexColumns(l *Lexer) StateFn

Alias for Expression

func LexComment

func LexComment(l *Lexer) StateFn

LexComment looks for valid comments which are any of the following

 including the in-line comment blocks

/* hello */
//  hello
-- hello
# hello
SELECT name --name is the combined first-last name
       , age FROM `USER` ...

func LexConditionalClause

func LexConditionalClause(l *Lexer) StateFn

Handle logical Conditional Clause used for [WHERE, WITH, JOIN ON] logicaly grouped with parens and/or seperated by commas or logic (AND/OR/NOT)

SELECT ... WHERE <conditional_clause>

<conditional_clause> ::= <expr> [( AND <expr> | OR <expr> | '(' <expr> ')' )]

<expr> ::= <predicatekw> '('? <expr> [, <expr>] ')'? | <func> | <subselect>

SEE: <expr> = LexExpression

func LexDataType

func LexDataType(forToken TokenType) StateFn

LexDataType scans and finds datatypes

 [] are valid inside of data types, no escaping such as ',"

[]string       CREATE table( field []string )
map[string]int
int, string, etc

func LexDdlColumn

func LexDdlColumn(l *Lexer) StateFn

data definition language column

CHANGE col1_old col1_new varchar(10),
CHANGE col2_old col2_new TEXT
ADD col3 BIGINT AFTER col1_new
ADD col2 TEXT FIRST,

func LexDialectForStatement

func LexDialectForStatement(l *Lexer) StateFn

Find first keyword in the current queryText, then find appropriate statement in dialect. ie [SELECT, ALTER, CREATE, INSERT] in sql

func LexDuration

func LexDuration(l *Lexer) StateFn

LexDuration floats, integers time-durations

durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms

func LexEmpty

func LexEmpty(l *Lexer) StateFn

Doesn't actually lex anything, used for single token clauses

func LexEndOfStatement

func LexEndOfStatement(l *Lexer) StateFn

Look for end of statement defined by either a semicolon or end of file

func LexExpression

func LexExpression(l *Lexer) StateFn

<expr> Handle single logical expression which may be nested and has

user defined function names that are NOT validated by lexer

<expr> ::= <predicatekw> '('? <expr> [, <expr>] ')'? | <func> | <subselect>

<func> ::= <identity>'(' <expr> ')'
<predicatekw> ::= [NOT] (IN | INTERSECTS | CONTAINS | RANGE | LIKE | EQUALS )

Examples:

(colx = y OR colb = b)
cola = 'a5'
cola != "a5", colb = "a6"
REPLACE(cola,"stuff") != "hello"
FirstName = REPLACE(LOWER(name," "))
cola IN (1,2,3)
cola LIKE "abc"
eq(name,"bob") AND age > 5
time > now() -1h
(4 + 5) > 10
reg_date BETWEEN x AND y

func LexExpressionOrIdentity

func LexExpressionOrIdentity(l *Lexer) StateFn

look for either an Expression or Identity

expressions:    Legal identity characters, terminated by (
identity:    legal identity characters

REPLACE(name,"stuff")
name

func LexExpressionParens

func LexExpressionParens(l *Lexer) StateFn

lex Expression looks for an expression, identified by parenthesis, may be nested

       |--expr----|
dostuff(name,"arg")    // the left parenthesis identifies it as Expression
eq(trim(name," "),"gmail.com")

func LexFilterClause

func LexFilterClause(l *Lexer) StateFn

Handle Filter QL Main Statement

FILTER := ( <filter_bool_expr> | <filter_expr> )

<filter_bool_expr> :=  ( AND | OR ) '(' ( <filter_bool_expr> | <filter_expr> ) [, ( <filter_bool_expr> | <filter_expr> ) ] ')'

<filter_expr> :=  <expr>

Examples:

FILTER

/ AND (

       daysago(datefield) < 100
       , domain(url) == "google.com"
       , INCLUDE name_of_filter
       ,
       , OR (
           momentum > 20
          , propensity > 50
       )
    )
 ALIAS myfilter

FILTER x > 7

func LexIdentifierOfType

func LexIdentifierOfType(forToken TokenType) StateFn

LexIdentifierOfType scans and finds named things (tables, columns)

supports quoted, bracket, or raw identifiers

TODO: dialect controls escaping/quoting techniques

[name]         select [first name] from usertable;
'name'         select 'user' from usertable;
`user`         select first_name from `user`;
first_name     select first_name from usertable;
usertable      select first_name AS fname from usertable;
_name          select _name AS name from stuff;
@@varname      select @@varname;

func LexIdentityOrValue

func LexIdentityOrValue(l *Lexer) StateFn

look for either an Identity or Value

func LexInlineComment

func LexInlineComment(l *Lexer) StateFn

Comment begining with //, # or --

func LexInlineCommentNoTag

func LexInlineCommentNoTag(l *Lexer) StateFn

Comment begining with //, # or -- but do not emit the tag just text comment

func LexJoinEntry

func LexJoinEntry(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexJson

func LexJson(l *Lexer) StateFn

Lex Valid Json

Must start with { or [

func LexJsonArray

func LexJsonArray(l *Lexer) StateFn

Lex Valid Json Array

Must End with ]

func LexJsonIdentity

func LexJsonIdentity(l *Lexer) StateFn

lex a string value value:

strings must be quoted

"stuff"    -> stuff
"items's with quote"

func LexJsonObject

func LexJsonObject(l *Lexer) StateFn

Lex Valid Json Object

Must End with }

func LexJsonOrKeyValue

func LexJsonOrKeyValue(l *Lexer) StateFn

Lex either Json or Key/Value pairs

Must start with { or [ for json
Start with identity for key/value pairs

func LexJsonValue

func LexJsonValue(l *Lexer) StateFn

LexJsonValue: Consume values, first consuming Colon

<jsonvalue> ::= ':' ( <value>, <array>, <jsonobject> ) [, ...]

func LexListOfArgs

func LexListOfArgs(l *Lexer) StateFn

list of arguments, comma seperated list of args which may be a mixture

of expressions, identities, values

    REPLACE(LOWER(x),"xyz")
    REPLACE(x,"xyz")
    COUNT(*)
    sum( 4 * toint(age))
    IN (a,b,c)
    varchar(10)
    CAST(field AS int)

    (a,b,c,d)   -- For Insert statment, list of columns

func LexLogical

func LexLogical(l *Lexer) StateFn

LexLogical is a lex entry function for logical expression language (+-/> etc)

ie, the full logical boolean logic

func LexMatchClosure

func LexMatchClosure(tok TokenType, nextFn StateFn) StateFn

matches expected tokentype emitting the token on success and returning passed state function.

func LexMultilineComment

func LexMultilineComment(l *Lexer) StateFn

A multi-line comment of format /* comment */ it does not have to actually be multi-line, just surrounded by those comments

func LexNameValueArgs

func LexNameValueArgs(l *Lexer) StateFn

<name_value_args> Handle comma delimited list of name = value args

Examples:

colx = y OR colb = b
cola = 'a5'
cola != "a5", colb = "a6"

func LexNumber

func LexNumber(l *Lexer) StateFn

LexNumber floats, integers, hex, exponential, signed

1.23
100
-827
6.02e23
0X1A2B,  0x1a2b, 0x1A2B.2B

Floats must be in decimal and must either:

  • Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
  • Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.

Integers can be:

  • decimal (e.g. -827)
  • hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)

func LexNumberOrDuration

func LexNumberOrDuration(l *Lexer) StateFn

LexNumberOrDuration floats, integers, hex, exponential, signed

1.23
100
-827
6.02e23
0X1A2B,  0x1a2b, 0x1A2B.2B

durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms

Floats must be in decimal and must either:

  • Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
  • Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.

Integers can be:

  • decimal (e.g. -827)
  • hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)

func LexOrderByColumn

func LexOrderByColumn(l *Lexer) StateFn

Handle columnar identies with keyword appendate (ASC, DESC)

[ORDER BY] ( <identity> | <expr> ) [(ASC | DESC)]

func LexPreparedStatement

func LexPreparedStatement(l *Lexer) StateFn

Handle prepared statements

<PREPARE_STMT> := PREPARE <identity> FROM <string_value>

func LexRegex

func LexRegex(l *Lexer) StateFn

lex a regex: first character must be a /

/^stats\./i
/.*/
/^stats.*/

func LexRightParen

func LexRightParen(l *Lexer) StateFn

lex the right side paren of something

func LexSelectClause

func LexSelectClause(l *Lexer) StateFn

Handle start of select statements, specifically looking for

@@variables, *, or else we drop into <select_list>

 <SELECT> :==
     (DISTINCT|ALL)? ( <sql_variable> | * | <select_list> ) [FROM <source_clause>]

 <sql_variable> = @@stuff

func LexSelectList

func LexSelectList(l *Lexer) StateFn

Handle repeating Select List for columns

   SELECT <select_list>

   <select_list> := <select_col> [, <select_col>]*

   <select_col> :== ( <identifier> | <expression> | '*' ) [AS <identifier>] [IF <expression>] [<comment>]

Note, our Columns support a non-standard IF guard at a per column basis

func LexShowClause

func LexShowClause(l *Lexer) StateFn

Handle show statement

SHOW [FULL] <multi_word_identifier> <identity> <like_or_where>

func LexStatement

func LexStatement(l *Lexer) StateFn

LexStatement is the main entrypoint to lex Grammars primarily associated with QL type languages, which is keywords seperate clauses, and have order [select .. FROM name WHERE ..] the keywords which are reserved serve as identifiers to stop lexing and move to next clause lexer

func LexSubQuery

func LexSubQuery(l *Lexer) StateFn

Handle recursive subqueries

func LexTableColumns

func LexTableColumns(l *Lexer) StateFn

Handle repeating Insert/Upsert/Update statements

<insert_into> <col_names> VALUES <col_value_list>
<set> <upsert_cols> VALUES <col_value_list>

<upsert_cols> := <upsert_col> [, <upsert_col>]*
<upsert_col> := <identity> = <expr>

<col_names> := <identity> [, <identity>]*
<col_value_list> := <col_value_row> [, <col_value_row>] *

<col_value_row> := '(' <expr> [, <expr>]* ')'

func LexTableReferenceFirst

func LexTableReferenceFirst(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexTableReferences

func LexTableReferences(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexUpsertClause

func LexUpsertClause(l *Lexer) StateFn

Handle start of insert, Upsert statements

func LexValue

func LexValue(l *Lexer) StateFn

lex a value: string, integer, float

- literal strings must be quoted - numerics with no period are integers - numerics with period are floats

"stuff"    -> [string] = stuff
'stuff'    -> [string] = stuff
"items's with quote" -> [string] = items's with quote
1.23  -> [float] = 1.23
100   -> [integer] = 100
["hello","world"]  -> [array] {"hello","world"}

type Token

type Token struct {
	T     TokenType // type
	V     string    // value
	Quote byte      // quote mark:    " ` [ '
}

token represents a text string returned from the lexer.

func (Token) String

func (t Token) String() string

convert to human readable string

type TokenInfo

type TokenInfo struct {
	T  TokenType
	Kw string

	HasSpaces   bool
	Description string
	// contains filtered or unexported fields
}

type TokenType

type TokenType uint16

TokenType identifies the type of lexical tokens.

const (

	// Basic grammar items
	TokenNil      TokenType = 0 // not used
	TokenEOF      TokenType = 1 // EOF
	TokenEOS      TokenType = 2 // ;
	TokenEofOrEos TokenType = 3 // End of file, OR ;
	TokenError    TokenType = 4 // error occurred; value is text of error
	TokenRaw      TokenType = 5 // raw unlexed text string
	TokenNewLine  TokenType = 6 // NewLine  = \n

	// Comments
	TokenComment           TokenType = 10 // Comment value string
	TokenCommentML         TokenType = 11 // Comment MultiValue
	TokenCommentStart      TokenType = 12 // /*
	TokenCommentEnd        TokenType = 13 // */
	TokenCommentSlashes    TokenType = 14 // Single Line comment:   // hello
	TokenCommentSingleLine TokenType = 15 // Single Line comment:   -- hello
	TokenCommentHash       TokenType = 16 // Single Line comment:  # hello

	// Misc
	TokenComma        TokenType = 20 // ,
	TokenStar         TokenType = 21 // *
	TokenColon        TokenType = 22 // :
	TokenLeftBracket  TokenType = 23 // [
	TokenRightBracket TokenType = 24 // ]
	TokenLeftBrace    TokenType = 25 // {
	TokenRightBrace   TokenType = 26 // }

	//  operand related tokens
	TokenMinus            TokenType = 60 // -
	TokenPlus             TokenType = 61 // +
	TokenPlusPlus         TokenType = 62 // ++
	TokenPlusEquals       TokenType = 63 // +=
	TokenDivide           TokenType = 64 // /
	TokenMultiply         TokenType = 65 // *
	TokenModulus          TokenType = 66 // %
	TokenEqual            TokenType = 67 // =
	TokenEqualEqual       TokenType = 68 // ==
	TokenNE               TokenType = 69 // !=
	TokenGE               TokenType = 70 // >=
	TokenLE               TokenType = 71 // <=
	TokenGT               TokenType = 72 // >
	TokenLT               TokenType = 73 // <
	TokenIf               TokenType = 74 // IF
	TokenOr               TokenType = 75 // ||
	TokenAnd              TokenType = 76 // &&
	TokenBetween          TokenType = 77 // between
	TokenLogicOr          TokenType = 78 // OR
	TokenLogicAnd         TokenType = 79 // AND
	TokenIN               TokenType = 80 // IN
	TokenLike             TokenType = 81 // LIKE
	TokenNegate           TokenType = 82 // NOT
	TokenLeftParenthesis  TokenType = 83 // (
	TokenRightParenthesis TokenType = 84 // )
	TokenTrue             TokenType = 85 // True
	TokenFalse            TokenType = 86 // False
	TokenIs               TokenType = 87 // IS
	TokenNull             TokenType = 88 // NULL
	TokenContains         TokenType = 89 // CONTAINS
	TokenIntersects       TokenType = 90 // INTERSECTS

	// ql top-level keywords, these first keywords determine parser
	TokenPrepare   TokenType = 200
	TokenInsert    TokenType = 201
	TokenUpdate    TokenType = 202
	TokenDelete    TokenType = 203
	TokenSelect    TokenType = 204
	TokenUpsert    TokenType = 205
	TokenAlter     TokenType = 206
	TokenCreate    TokenType = 207
	TokenSubscribe TokenType = 208
	TokenFilter    TokenType = 209
	TokenShow      TokenType = 210
	TokenDescribe  TokenType = 211 // We can also use TokenDesc
	TokenExplain   TokenType = 212 // another alias for desccribe
	TokenReplace   TokenType = 213 // Insert/Replace are interchangeable on insert statements

	// Other QL Keywords, These are clause-level keywords that mark seperation between clauses
	TokenTable    TokenType = 301 // table
	TokenFrom     TokenType = 302 // from
	TokenWhere    TokenType = 303 // where
	TokenHaving   TokenType = 304 // having
	TokenGroupBy  TokenType = 305 // group by
	TokenBy       TokenType = 306 // by
	TokenAlias    TokenType = 307 // alias
	TokenWith     TokenType = 308 // with
	TokenValues   TokenType = 309 // values
	TokenInto     TokenType = 310 // into
	TokenLimit    TokenType = 311 // limit
	TokenOrderBy  TokenType = 312 // order by
	TokenInner    TokenType = 313 // inner , ie of join
	TokenCross    TokenType = 314 // cross
	TokenOuter    TokenType = 315 // outer
	TokenLeft     TokenType = 316 // left
	TokenRight    TokenType = 317 // right
	TokenJoin     TokenType = 318 // Join
	TokenOn       TokenType = 319 // on
	TokenDistinct TokenType = 320 // DISTINCT
	TokenAll      TokenType = 321 // all
	TokenInclude  TokenType = 322 // INCLUDE
	TokenExists   TokenType = 323 // EXISTS
	TokenOffset   TokenType = 324 // OFFSET
	TokenFull     TokenType = 325 // FULL
	TokenGlobal   TokenType = 326 // GLOBAL
	TokenSession  TokenType = 327 // SESSION
	TokenTables   TokenType = 328 // TABLES

	// ddl
	TokenChange       TokenType = 400 // change
	TokenAdd          TokenType = 401 // add
	TokenFirst        TokenType = 402 // first
	TokenAfter        TokenType = 403 // after
	TokenCharacterSet TokenType = 404 // character set

	// Other QL keywords
	TokenSet  TokenType = 500 // set
	TokenAs   TokenType = 501 // as
	TokenAsc  TokenType = 502 // ascending
	TokenDesc TokenType = 503 // descending
	TokenUse  TokenType = 504 // use

	// User defined function/expression
	TokenUdfExpr TokenType = 550

	// Value Types
	TokenIdentity             TokenType = 600 // identity, either column, table name etc
	TokenValue                TokenType = 601 // 'some string' string or continous sequence of chars delimited by WHITE SPACE | ' | , | ( | )
	TokenValueWithSingleQuote TokenType = 602 // ” becomes ' inside the string, parser will need to replace the string
	TokenRegex                TokenType = 603 // regex
	TokenDuration             TokenType = 604 // 14d , 22w, 3y, 45ms, 45us, 24hr, 2h, 45m, 30s

	// Scalar literal data-types
	TokenDataType TokenType = 1000 // A generic Identifier of DataTypes
	TokenBool     TokenType = 1001
	TokenFloat    TokenType = 1002
	TokenInteger  TokenType = 1003
	TokenString   TokenType = 1004
	TokenVarChar  TokenType = 1005
	TokenBigInt   TokenType = 1006
	TokenText     TokenType = 1007
	TokenJson     TokenType = 1008

	// Composite Data Types
	TokenList TokenType = 1050
	TokenMap  TokenType = 1051
)

// List of datatypes from MySql, implement them as tokens? or leave as Identity during // DDL create/alter statements? BOOL TINYINT BOOLEAN TINYINT CHARACTER VARYING(M) VARCHAR(M) FIXED DECIMAL FLOAT4 FLOAT FLOAT8 DOUBLE INT1 TINYINT INT2 SMALLINT INT3 MEDIUMINT INT4 INT INT8 BIGINT LONG VARBINARY MEDIUMBLOB LONG VARCHAR MEDIUMTEXT LONG MEDIUMTEXT MIDDLEINT MEDIUMINT NUMERIC DECIMAL

func (TokenType) MatchString

func (typ TokenType) MatchString() string

which keyword should we look for, either full keyword OR in case of spaces such as "group by" look for group

func (TokenType) MultiWord

func (typ TokenType) MultiWord() bool

is this a word such as "Group by" with multiple words?

func (TokenType) String

func (typ TokenType) String() string

convert to human readable string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL