sqllexer

package module
v0.0.11 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 4, 2024 License: MIT Imports: 3 Imported by: 1

README

go-sqllexer

This repository contains a hand written SQL Lexer that tokenizes SQL queries with a focus on obfuscating and normalization. The lexer is written in Go with no external dependencies. Note This is NOT a SQL parser, it only tokenizes SQL queries.

Features

  • 🚀 Fast and lightweight tokenization (not regex based)
  • 🔒 Obfuscates sensitive data (e.g. numbers, strings, specific literals like dollar quoted strings in Postgres, etc.)
  • 📖 Even works with truncated queries
  • 🌐 UTF-8 support
  • 🔧 Normalizes obfuscated queries

Installation

go get github.com/DataDog/go-sqllexer

Usage

Tokenize
import "github.com/DataDog/go-sqllexer"

func main() {
    query := "SELECT * FROM users WHERE id = 1"
    lexer := sqllexer.New(query)
    tokens := lexer.ScanAll()
    for _, token := range tokens {
        fmt.Println(token)
    }
}
Obfuscate
import (
    "fmt"
    "github.com/DataDog/go-sqllexer"
)

func main() {
    query := "SELECT * FROM users WHERE id = 1"
    obfuscator := sqllexer.NewObfuscator()
    obfuscated := obfuscator.Obfuscate(query)
    // "SELECT * FROM users WHERE id = ?"
    fmt.Println(obfuscated)
}
Normalize
import (
    "fmt"
    "github.com/DataDog/go-sqllexer"
)

func main() {
    query := "SELECT * FROM users WHERE id in (?, ?)"
    normalizer := sqllexer.NewNormalizer(
        WithCollectComments(true),
        WithCollectCommands(true),
        WithCollectTables(true),
        WithKeepSQLAlias(false),
    )
    normalized, statementMetadata, err := normalizer.Normalize(query)
    // "SELECT * FROM users WHERE id in (?)"
    fmt.Println(normalized)
}

Testing

go test -v ./...

Benchmarks

go test -bench=. -benchmem ./...

License

MIT License

Documentation

Index

Examples

Constants

View Source
const (
	StringPlaceholder = "?"
	NumberPlaceholder = "?"
)

Variables

This section is empty.

Functions

func WithCollectCommands

func WithCollectCommands(collectCommands bool) normalizerOption

func WithCollectComments

func WithCollectComments(collectComments bool) normalizerOption

func WithCollectProcedures added in v0.0.7

func WithCollectProcedures(collectProcedure bool) normalizerOption

func WithCollectTables

func WithCollectTables(collectTables bool) normalizerOption

func WithDBMS

func WithDBMS(dbms DBMSType) lexerOption

func WithDollarQuotedFunc

func WithDollarQuotedFunc(dollarQuotedFunc bool) obfuscatorOption

func WithKeepIdentifierQuotation added in v0.0.9

func WithKeepIdentifierQuotation(keepIdentifierQuotation bool) normalizerOption

func WithKeepSQLAlias

func WithKeepSQLAlias(keepSQLAlias bool) normalizerOption

func WithKeepTrailingSemicolon added in v0.0.9

func WithKeepTrailingSemicolon(keepTrailingSemicolon bool) normalizerOption

func WithRemoveSpaceBetweenParentheses added in v0.0.8

func WithRemoveSpaceBetweenParentheses(removeSpaceBetweenParentheses bool) normalizerOption

func WithReplaceBoolean added in v0.0.3

func WithReplaceBoolean(replaceBoolean bool) obfuscatorOption

func WithReplaceDigits

func WithReplaceDigits(replaceDigits bool) obfuscatorOption

func WithReplaceNull added in v0.0.3

func WithReplaceNull(replaceNull bool) obfuscatorOption

func WithReplacePositionalParameter added in v0.0.3

func WithReplacePositionalParameter(replacePositionalParameter bool) obfuscatorOption

func WithUppercaseKeywords added in v0.0.2

func WithUppercaseKeywords(uppercaseKeywords bool) normalizerOption

Types

type DBMSType added in v0.0.2

type DBMSType string
const (
	// DBMSSQLServer is a MS SQL
	DBMSSQLServer DBMSType = "mssql"
	// DBMSPostgres is a PostgreSQL Server
	DBMSPostgres DBMSType = "postgresql"
	// DBMSMySQL is a MySQL Server
	DBMSMySQL DBMSType = "mysql"
	// DBMSOracle is a Oracle Server
	DBMSOracle DBMSType = "oracle"
	// DBMSSnowflake is a Snowflake Server
	DBMSSnowflake DBMSType = "snowflake"
)

type Lexer

type Lexer struct {
	// contains filtered or unexported fields
}

SQL Lexer inspired from Rob Pike's talk on Lexical Scanning in Go

Example
query := "SELECT * FROM users WHERE id = 1"
lexer := New(query)
tokens := lexer.ScanAll()
fmt.Println(tokens)
Output:

[{6 SELECT} {2  } {9 *} {2  } {6 FROM} {2  } {6 users} {2  } {6 WHERE} {2  } {6 id} {2  } {8 =} {2  } {5 1}]

func New

func New(input string, opts ...lexerOption) *Lexer

func (*Lexer) Scan

func (s *Lexer) Scan() Token

Scan scans the next token and returns it.

func (*Lexer) ScanAll

func (s *Lexer) ScanAll() []Token

ScanAll scans the entire input string and returns a slice of tokens.

func (*Lexer) ScanAllTokens

func (s *Lexer) ScanAllTokens() <-chan Token

ScanAllTokens scans the entire input string and returns a channel of tokens. Use this if you want to process the tokens as they are scanned.

type LexerConfig added in v0.0.2

type LexerConfig struct {
	DBMS DBMSType `json:"dbms,omitempty"`
}

type Normalizer

type Normalizer struct {
	// contains filtered or unexported fields
}
Example
normalizer := NewNormalizer(
	WithCollectComments(true),
	WithCollectCommands(true),
	WithCollectTables(true),
	WithCollectProcedures(true),
	WithKeepSQLAlias(false),
)

normalizedSQL, statementMetadata, _ := normalizer.Normalize(
	`
		/* this is a comment */
		SELECT * FROM users WHERE id in (?, ?)
		`,
)

fmt.Println(normalizedSQL)
fmt.Println(statementMetadata)
Output:

SELECT * FROM users WHERE id in ( ? )
&{34 [users] [/* this is a comment */] [SELECT] []}

func NewNormalizer

func NewNormalizer(opts ...normalizerOption) *Normalizer

func (*Normalizer) Normalize

func (n *Normalizer) Normalize(input string, lexerOpts ...lexerOption) (normalizedSQL string, statementMetadata *StatementMetadata, err error)

Normalize takes an input SQL string and returns a normalized SQL string, a StatementMetadata struct, and an error. The normalizer collapses input SQL into compact format, groups obfuscated values into single placeholder, and collects metadata such as table names, comments, and commands.

type Obfuscator

type Obfuscator struct {
	// contains filtered or unexported fields
}
Example
obfuscator := NewObfuscator()
obfuscated := obfuscator.Obfuscate("SELECT * FROM users WHERE id = 1")
fmt.Println(obfuscated)
Output:

SELECT * FROM users WHERE id = ?

func NewObfuscator

func NewObfuscator(opts ...obfuscatorOption) *Obfuscator

func (*Obfuscator) Obfuscate

func (o *Obfuscator) Obfuscate(input string, lexerOpts ...lexerOption) string

Obfuscate takes an input SQL string and returns an obfuscated SQL string. The obfuscator replaces all literal values with a single placeholder

func (*Obfuscator) ObfuscateTokenValue added in v0.0.3

func (o *Obfuscator) ObfuscateTokenValue(token Token, lexerOpts ...lexerOption) string

type StatementMetadata

type StatementMetadata struct {
	Size       int      `json:"size"`
	Tables     []string `json:"tables"`
	Comments   []string `json:"comments"`
	Commands   []string `json:"commands"`
	Procedures []string `json:"procedures"`
}

func ObfuscateAndNormalize added in v0.0.3

func ObfuscateAndNormalize(input string, obfuscator *Obfuscator, normalizer *Normalizer, lexerOpts ...lexerOption) (normalizedSQL string, statementMetadata *StatementMetadata, err error)

ObfuscateAndNormalize takes an input SQL string and returns an normalized SQL string with metadata This function is a convenience function that combines the Obfuscator and Normalizer in one pass

type Token

type Token struct {
	Type  TokenType
	Value string
}

Token represents a SQL token with its type and value.

type TokenType

type TokenType int
const (
	ERROR TokenType = iota
	EOF
	WS                     // whitespace
	STRING                 // string literal
	INCOMPLETE_STRING      // incomplete string literal so that we can obfuscate it, e.g. 'abc
	NUMBER                 // number literal
	IDENT                  // identifier
	QUOTED_IDENT           // quoted identifier
	OPERATOR               // operator
	WILDCARD               // wildcard *
	COMMENT                // comment
	MULTILINE_COMMENT      // multiline comment
	PUNCTUATION            // punctuation
	DOLLAR_QUOTED_FUNCTION // dollar quoted function
	DOLLAR_QUOTED_STRING   // dollar quoted string
	POSITIONAL_PARAMETER   // numbered parameter
	BIND_PARAMETER         // bind parameter
	FUNCTION               // function
	SYSTEM_VARIABLE        // system variable
	UNKNOWN                // unknown token
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL