interp

package
v1.26.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 28, 2024 License: MIT Imports: 25 Imported by: 18

Documentation

Overview

Package interp is the GoAWK interpreter.

For basic usage, use the Exec function. For more complicated use cases and configuration options, first use the parser package to parse the AWK source, and then use ExecProgram to execute it with a specific configuration.

If you need to re-run the same parsed program repeatedly on different inputs or with different variables, use New to instantiate an Interpreter and then call the Interpreter.Execute method as many times as you need.

Example
package main

import (
	"fmt"
	"strings"

	"github.com/benhoyt/goawk/interp"
)

func main() {
	input := strings.NewReader("foo bar\n\nbaz buz")
	err := interp.Exec("$0 { print $1 }", " ", input, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
}
Output:

foo
baz
Example (Csv)
package main

import (
	"fmt"
	"strings"

	"github.com/benhoyt/goawk/interp"
	"github.com/benhoyt/goawk/parser"
)

func main() {
	src := `{ total += @"amount" } END { print total }`
	input := `# comment
name,amount
Bob,17.50
Jill,20
"Boba Fett",100.00
`
	prog, err := parser.ParseProgram([]byte(src), nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	config := &interp.Config{
		Stdin:     strings.NewReader(input),
		InputMode: interp.CSVMode,
		CSVInput:  interp.CSVInputConfig{Comment: '#', Header: true},
	}
	_, err = interp.ExecProgram(prog, config)
	if err != nil {
		fmt.Println(err)
		return
	}
}
Output:

137.5
Example (Fieldsep)
package main

import (
	"fmt"
	"strings"

	"github.com/benhoyt/goawk/interp"
)

func main() {
	// Use ',' as the field separator
	input := strings.NewReader("1,2\n3,4")
	err := interp.Exec("{ print $1, $2 }", ",", input, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
}
Output:

1 2
3 4
Example (Funcs)
package main

import (
	"fmt"
	"strings"

	"github.com/benhoyt/goawk/interp"
	"github.com/benhoyt/goawk/parser"
)

func main() {
	src := `BEGIN { print sum(), sum(1), sum(2, 3, 4), repeat("xyz", 3) }`

	parserConfig := &parser.ParserConfig{
		Funcs: map[string]interface{}{
			"sum": func(args ...float64) float64 {
				sum := 0.0
				for _, a := range args {
					sum += a
				}
				return sum
			},
			"repeat": strings.Repeat,
		},
	}
	prog, err := parser.ParseProgram([]byte(src), parserConfig)
	if err != nil {
		fmt.Println(err)
		return
	}
	interpConfig := &interp.Config{
		Funcs: parserConfig.Funcs,
	}
	_, err = interp.ExecProgram(prog, interpConfig)
	if err != nil {
		fmt.Println(err)
		return
	}
}
Output:

0 1 9 xyzxyzxyz
Example (New)
package main

import (
	"fmt"
	"strings"

	"github.com/benhoyt/goawk/interp"
	"github.com/benhoyt/goawk/parser"
)

func main() {
	// We'll execute this program multiple times on different inputs.
	src := `{ print $1, x, $3; x++ }`

	// Parse the program and set up the interpreter.
	prog, err := parser.ParseProgram([]byte(src), nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	interpreter, err := interp.New(prog)
	if err != nil {
		fmt.Println(err)
		return
	}

	// Run it once on one input.
	_, err = interpreter.Execute(&interp.Config{
		Stdin:   strings.NewReader("one two three"),
		Environ: []string{}, // avoid calling os.Environ each time
	})
	if err != nil {
		fmt.Println(err)
		return
	}

	// Reset variables and run it again efficiently on a different input (this
	// could be from a completely different data source).
	interpreter.ResetVars()
	_, err = interpreter.Execute(&interp.Config{
		Stdin:   strings.NewReader("a b c\nd e f\n"),
		Environ: []string{},
	})
	if err != nil {
		fmt.Println(err)
		return
	}

	// Run it on another input, this time without resetting variables.
	_, err = interpreter.Execute(&interp.Config{
		Stdin:   strings.NewReader("x y z"),
		Environ: []string{},
	})
	if err != nil {
		fmt.Println(err)
		return
	}

}
Output:

one  three
a  c
d 1 f
x 2 z
Example (Program)
package main

import (
	"fmt"
	"strings"

	"github.com/benhoyt/goawk/interp"
	"github.com/benhoyt/goawk/parser"
)

func main() {
	src := "{ print NR, tolower($0) }"
	input := "A\naB\nAbC"

	prog, err := parser.ParseProgram([]byte(src), nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	config := &interp.Config{
		Stdin: strings.NewReader(input),
		Vars:  []string{"OFS", ":"},
	}
	_, err = interp.ExecProgram(prog, config)
	if err != nil {
		fmt.Println(err)
		return
	}
}
Output:

1:a
2:ab
3:abc

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func Exec

func Exec(source, fieldSep string, input io.Reader, output io.Writer) error

Exec provides a simple way to parse and execute an AWK program with the given field separator. Exec reads input from the given reader (nil means use os.Stdin) and writes output to stdout (nil means use a buffered version of os.Stdout).

func ExecProgram

func ExecProgram(program *parser.Program, config *Config) (int, error)

ExecProgram executes the parsed program using the given interpreter config, returning the exit status code of the program. Error is nil on successful execution of the program, even if the program returns a non-zero status code.

As of GoAWK version v1.16.0, a nil config is valid and will use the defaults (zero values). However, it may be simpler to use Exec in that case.

Types

type CSVInputConfig added in v1.17.0

type CSVInputConfig struct {
	// Input field separator character. If this is zero, it defaults to ','
	// when InputMode is CSVMode and '\t' when InputMode is TSVMode.
	Separator rune

	// If nonzero, specifies that lines beginning with this character (and no
	// leading whitespace) should be ignored as comments.
	Comment rune

	// If true, parse the first row in each input file as a header row (that
	// is, a list of field names), and enable the @"field" syntax to get a
	// field by name as well as the FIELDS special array.
	Header bool
}

CSVInputConfig holds additional configuration for when InputMode is CSVMode or TSVMode.

type CSVOutputConfig added in v1.17.0

type CSVOutputConfig struct {
	// Output field separator character. If this is zero, it defaults to ','
	// when OutputMode is CSVMode and '\t' when OutputMode is TSVMode.
	Separator rune
}

CSVOutputConfig holds additional configuration for when OutputMode is CSVMode or TSVMode.

type Config

type Config struct {
	// Standard input reader (defaults to os.Stdin)
	Stdin io.Reader

	// Writer for normal output (defaults to a buffered version of os.Stdout).
	// If you need to write to stdout but want control over the buffer size or
	// allocation, wrap os.Stdout yourself and set Output to that.
	Output io.Writer

	// Writer for non-fatal error messages (defaults to os.Stderr)
	Error io.Writer

	// The name of the executable (accessible via ARGV[0])
	Argv0 string

	// Input arguments (usually filenames): empty slice means read
	// only from Stdin, and a filename of "-" means read from Stdin
	// instead of a real file.
	//
	// Arguments of the form "var=value" are treated as variable
	// assignments.
	Args []string

	// Set to true to disable "var=value" assignments in Args.
	NoArgVars bool

	// List of name-value pairs for variables to set before executing
	// the program (useful for setting FS and other built-in
	// variables, for example []string{"FS", ",", "OFS", ","}).
	Vars []string

	// Map of named Go functions to allow calling from AWK. You need
	// to pass this same map to the parser.ParseProgram config.
	//
	// Functions can have any number of parameters, and variadic
	// functions are supported. Functions can have no return values,
	// one return value, or two return values (result, error). In the
	// two-value case, if the function returns a non-nil error,
	// program execution will stop and ExecProgram will return that
	// error.
	//
	// Apart from the error return value, the types supported are
	// bool, integer and floating point types (excluding complex),
	// and string types (string or []byte).
	//
	// It's not an error to call a Go function from AWK with fewer
	// arguments than it has parameters in Go. In this case, the zero
	// value will be used for any additional parameters. However, it
	// is a parse error to call a non-variadic function from AWK with
	// more arguments than it has parameters in Go.
	//
	// Functions defined with the "function" keyword in AWK code
	// take precedence over functions in Funcs.
	Funcs map[string]interface{}

	// Set one or more of these to true to prevent unsafe behaviours,
	// useful when executing untrusted scripts:
	//
	// * NoExec prevents system calls via system() or pipe operator
	// * NoFileWrites prevents writing to files via '>' or '>>'
	// * NoFileReads prevents reading from files via getline or the
	//   filenames in Args
	NoExec       bool
	NoFileWrites bool
	NoFileReads  bool

	// Exec args used to run system shell. Typically, this will
	// be {"/bin/sh", "-c"}
	ShellCommand []string

	// List of name-value pairs to be assigned to the ENVIRON special
	// array, for example []string{"USER", "bob", "HOME", "/home/bob"}.
	// If nil (the default), values from os.Environ() are used.
	//
	// If the script doesn't need environment variables, set Environ to a
	// non-nil empty slice, []string{}.
	Environ []string

	// Mode for parsing input fields and record: default is to use normal FS
	// and RS behaviour. If set to CSVMode or TSVMode, FS and RS are ignored,
	// and input records are parsed as comma-separated values or tab-separated
	// values, respectively. Parsing is done as per RFC 4180 and the
	// "encoding/csv" package, but FieldsPerRecord is not supported,
	// LazyQuotes is always on, and TrimLeadingSpace is always off.
	//
	// You can also enable CSV or TSV input mode by setting INPUTMODE to "csv"
	// or "tsv" in Vars or in the BEGIN block (those override this setting).
	//
	// For further documentation about GoAWK's CSV support, see the full docs
	// in "../docs/csv.md".
	InputMode IOMode

	// Additional options if InputMode is CSVMode or TSVMode. The zero value
	// is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode.
	//
	// You can also specify these options by setting INPUTMODE in the BEGIN
	// block, for example, to use '|' as the field separator, '#' as the
	// comment character, and enable header row parsing:
	//
	//     BEGIN { INPUTMODE="csv separator=| comment=# header" }
	CSVInput CSVInputConfig

	// Mode for print output: default is to use normal OFS and ORS
	// behaviour. If set to CSVMode or TSVMode, the "print" statement with one
	// or more arguments outputs fields using CSV or TSV formatting,
	// respectively. Output is written as per RFC 4180 and the "encoding/csv"
	// package.
	//
	// You can also enable CSV or TSV output mode by setting OUTPUTMODE to
	// "csv" or "tsv" in Vars or in the BEGIN block (those override this
	// setting).
	OutputMode IOMode

	// Additional options if OutputMode is CSVMode or TSVMode. The zero value
	// is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode.
	//
	// You can also specify these options by setting OUTPUTMODE in the BEGIN
	// block, for example, to use '|' as the output field separator:
	//
	//     BEGIN { OUTPUTMODE="csv separator=|" }
	CSVOutput CSVOutputConfig
}

Config defines the interpreter configuration for ExecProgram.

type Error

type Error struct {
	// contains filtered or unexported fields
}

Error (actually *Error) is returned by Exec and Eval functions on interpreter error, for example FS being set to an invalid regex.

func (*Error) Error

func (e *Error) Error() string

type IOMode added in v1.17.0

type IOMode int

IOMode specifies the input parsing or print output mode.

const (
	// DefaultMode uses normal AWK field and record separators: FS and RS for
	// input, OFS and ORS for print output.
	DefaultMode IOMode = 0

	// CSVMode uses comma-separated value mode for input or output.
	CSVMode IOMode = 1

	// TSVMode uses tab-separated value mode for input or output.
	TSVMode IOMode = 2
)

type Interpreter added in v1.16.0

type Interpreter struct {
	// contains filtered or unexported fields
}

Interpreter is an interpreter for a specific program, allowing you to efficiently execute the same program over and over with different inputs. Use New to create an Interpreter.

Most programs won't need reusable execution, and should use the simpler Exec or ExecProgram functions instead.

func New added in v1.16.0

func New(program *parser.Program) (*Interpreter, error)

New creates a reusable interpreter for the given program.

Most programs won't need reusable execution, and should use the simpler Exec or ExecProgram functions instead.

func (*Interpreter) Array added in v1.21.0

func (p *Interpreter) Array(name string) map[string]interface{}

Array returns a map representing the items in the named AWK array. AWK numbers are included as type float64, strings (including "numeric strings") are included as type string. If the named array does not exist, return nil.

func (*Interpreter) Execute added in v1.16.0

func (p *Interpreter) Execute(config *Config) (int, error)

Execute runs this program with the given execution configuration (input, output, and variables) and returns the exit status code of the program. A nil config is valid and will use the defaults (zero values).

Internal memory allocations are reused, so calling Execute on the same Interpreter instance is significantly more efficient than calling ExecProgram multiple times.

I/O state is reset between each run, but variables and the random number generator seed are not; use ResetVars and ResetRand to reset those.

It's best to set config.Environ to a non-nil slice, otherwise Execute will call the relatively inefficient os.Environ each time. Set config.Environ to []string{} if the script doesn't need environment variables, or call os.Environ once and set config.Environ to that value each execution.

Note that config.Funcs must be the same value provided to parser.ParseProgram, and must not change between calls to Execute.

func (*Interpreter) ExecuteContext added in v1.16.0

func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error)

ExecuteContext is like Execute, but takes a context to allow the caller to set an execution timeout or cancel the execution. For efficiency, the context is only tested every 1000 virtual machine instructions.

Context handling is not preemptive: currently long-running operations like system() won't be interrupted.

func (*Interpreter) ResetRand added in v1.16.0

func (p *Interpreter) ResetRand()

ResetRand resets this interpreter's random number generator seed, so that rand() produces the same sequence it would have after calling New. This is a relatively CPU-intensive operation.

func (*Interpreter) ResetVars added in v1.16.0

func (p *Interpreter) ResetVars()

ResetVars resets this interpreter's variables, setting scalar variables to null, clearing arrays, and resetting special variables such as FS and RS to their defaults.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL