flatjsonl

package
v0.8.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 4, 2024 License: MIT Imports: 37 Imported by: 0

Documentation

Overview

Package flatjsonl implements a CLI tool to scan JSONL files and transform them to flat tables.

Index

Constants

View Source
const (
	TypeString = Type("string")
	TypeInt    = Type("int")
	TypeFloat  = Type("float")
	TypeBool   = Type("bool")
	TypeNull   = Type("null")
	TypeAbsent = Type("")
)

Type enumeration.

View Source
const NopFile = "<nop>"

NopFile indicates a no op file.

Variables

This section is empty.

Functions

func Format

func Format(v interface{}) string

Format turns value into a string.

func KeyFromPath

func KeyFromPath(path []string) string

KeyFromPath joins path elements into a dot-separated scalar key.

func Main added in v0.5.15

func Main()

Main is the entry point for flatjsonl CLI tool.

Types

type CSVWriter

type CSVWriter struct {
	// contains filtered or unexported fields
}

CSVWriter writes rows to CSV file.

func NewCSVWriter

func NewCSVWriter(fn string) (*CSVWriter, error)

NewCSVWriter creates an instance of CSVWriter.

func (*CSVWriter) Close

func (c *CSVWriter) Close() error

Close flushes rows and closes file.

func (CSVWriter) Metrics added in v0.5.11

func (c CSVWriter) Metrics() []progress.Metric

Metrics return available metrics.

func (*CSVWriter) ReceiveRow

func (c *CSVWriter) ReceiveRow(seq int64, values []Value) error

ReceiveRow receives rows.

func (*CSVWriter) SetupKeys added in v0.4.0

func (c *CSVWriter) SetupKeys(keys []flKey) (err error)

SetupKeys writes CSV headers.

type Config

type Config struct {
	MatchLinePrefix    string             `json:"matchLinePrefix" yaml:"matchLinePrefix"`
	IncludeKeys        []string           `json:"includeKeys" yaml:"includeKeys"`
	IncludeKeysRegex   []string           `json:"includeKeysRegex" yaml:"includeKeysRegex"`
	ExcludeKeys        []string           `json:"excludeKeys" yaml:"excludeKeys" description:"List of keys remove from columns."`
	ReplaceKeys        map[string]string  `json:"replaceKeys" yaml:"replaceKeys"`
	ReplaceKeysRegex   map[string]string  `json:"replaceKeysRegex" yaml:"replaceKeysRegex"`
	ParseTime          map[string]string  `json:"parseTime" yaml:"parseTime" description:"Map of key to time format, RAW format means no processing of original value."`
	OutputTimeFormat   string             `` /* 140-byte string literal not displayed */
	OutputTimezone     string             `json:"outputTZ" yaml:"outputTZ" example:"UTC"`
	ConcatDelimiter    *string            `` /* 152-byte string literal not displayed */
	Transpose          map[string]string  `json:"transpose" yaml:"transpose" description:"Map of key prefixes to transposed table names."`
	ExtractValuesRegex map[string]extract `` /* 133-byte string literal not displayed */
}

Config describes processing options.

type FastWalker added in v0.3.0

type FastWalker struct {
	// These callbacks are invoked during JSON traversal.
	// Common arguments:
	// * seq is a sequence number of parent line,
	// * flatPath is a dot-separated path to the current element,
	// * parserPool is a length of parent prefix in flatPath,
	// * path holds a list of segments, it is nil if WantPath is false.
	FnNumber func(seq int64, flatPath []byte, path []string, value float64, raw []byte)
	FnString func(seq int64, flatPath []byte, path []string, value []byte) extractor
	FnBool   func(seq int64, flatPath []byte, path []string, value bool)
	FnNull   func(seq int64, flatPath []byte, path []string)

	WantPath       bool
	ExtractStrings bool
	// contains filtered or unexported fields
}

FastWalker walks JSON with fastjson.

func (*FastWalker) GetKey added in v0.5.13

func (fv *FastWalker) GetKey(seq int64, flatPath []byte, path []string, v *fastjson.Value)

GetKey walks into a single key.

func (*FastWalker) WalkFastJSON added in v0.3.0

func (fv *FastWalker) WalkFastJSON(seq int64, flatPath []byte, path []string, v *fastjson.Value)

WalkFastJSON iterates fastjson.Value JSON structure.

type Flags

type Flags struct {
	Verbosity        int
	ProgressInterval time.Duration
	Input            string
	Output           string

	CSV string

	SQLite     string
	SQLMaxCols int
	SQLTable   string

	PGDump string

	Raw      string
	RawDelim string

	MaxLines     int
	OffsetLines  int
	MaxLinesKeys int
	FieldLimit   int
	KeyLimit     int
	BufSize      int

	Config            string
	GetKey            string
	ReplaceKeys       bool
	ExtractStrings    bool
	SkipZeroCols      bool
	AddSequence       bool
	MatchLinePrefix   string
	CaseSensitiveKeys bool

	ShowKeysFlat bool
	ShowKeysHier bool
	ShowKeysInfo bool

	Concurrency int
	MemLimit    int
}

Flags contains command-line flags.

func (*Flags) Inputs

func (f *Flags) Inputs() []Input

Inputs returns list of file names to read.

func (*Flags) Parse

func (f *Flags) Parse()

Parse parses and prepares command-line flags.

func (*Flags) PrepareOutput

func (f *Flags) PrepareOutput()

PrepareOutput parses output flag.

func (*Flags) Register

func (f *Flags) Register()

Register registers command-line flags.

type Input added in v0.5.0

type Input struct {
	FileName string
	Reader   interface {
		io.Reader
		Size() int64
		Reset()
		Compression() string
	}
}

Input can be either a file name or a reader.

type KeyHierarchy added in v0.4.0

type KeyHierarchy struct {
	Name string
	Sub  map[string]KeyHierarchy
}

KeyHierarchy collects structural relations.

func (*KeyHierarchy) Add added in v0.4.0

func (k *KeyHierarchy) Add(path []string)

Add registers path to KeyHierarchy.

func (KeyHierarchy) Hierarchy added in v0.4.0

func (k KeyHierarchy) Hierarchy() interface{}

Hierarchy exposes keys as tree hierarchy.

type LoopReader added in v0.5.5

type LoopReader struct {
	BytesLimit int
	// contains filtered or unexported fields
}

LoopReader repeats bytes buffer until the limit is hit.

func LoopReaderFromFile added in v0.5.5

func LoopReaderFromFile(fn string, bytesLimit int) (*LoopReader, error)

LoopReaderFromFile creates LoopReader from a file.

func (*LoopReader) Compression added in v0.5.5

func (l *LoopReader) Compression() string

Compression implements Input.

func (*LoopReader) Read added in v0.5.5

func (l *LoopReader) Read(p []byte) (n int, err error)

Read implements io.Reader.

func (*LoopReader) Reset added in v0.5.5

func (l *LoopReader) Reset()

Reset resets the counter.

func (*LoopReader) Size added in v0.5.5

func (l *LoopReader) Size() int64

Size implements Input.

type PGDumpWriter added in v0.5.6

type PGDumpWriter struct {
	// contains filtered or unexported fields
}

PGDumpWriter creates PostgreSQL dump file. .

func NewPGDumpWriter added in v0.5.6

func NewPGDumpWriter(fn string, tableName string, p *Processor) (*PGDumpWriter, error)

NewPGDumpWriter creates an instance of PGDumpWriter.

func (*PGDumpWriter) Close added in v0.5.6

func (c *PGDumpWriter) Close() error

Close flushes CSV and closes output file.

func (*PGDumpWriter) ReceiveRow added in v0.5.6

func (c *PGDumpWriter) ReceiveRow(seq int64, values []Value) error

ReceiveRow receives rows.

func (*PGDumpWriter) SetupKeys added in v0.5.6

func (c *PGDumpWriter) SetupKeys(keys []flKey) error

SetupKeys creates tables.

type Processor

type Processor struct {
	Log    func(args ...any)
	Stdout io.Writer
	// contains filtered or unexported fields
}

Processor reads JSONL files with Reader and passes flat rows to Writer.

func New added in v0.7.0

func New(f Flags) (*Processor, error)

New creates Processor from config.

func NewProcessor

func NewProcessor(f Flags, cfg Config, inputs ...Input) (*Processor, error)

NewProcessor creates an instance of Processor.

func (*Processor) PrepareKeys added in v0.5.0

func (p *Processor) PrepareKeys() error

PrepareKeys runs first pass of reading if necessary to scan the keys.

func (*Processor) Process

func (p *Processor) Process() error

Process dispatches data from Reader to Writer.

func (*Processor) WriteOutput added in v0.5.0

func (p *Processor) WriteOutput() error

WriteOutput runs second pass of reading to create the output.

type RawWriter added in v0.3.19

type RawWriter struct {
	// contains filtered or unexported fields
}

RawWriter writes rows to RAW file.

func NewRawWriter added in v0.3.19

func NewRawWriter(fn string, delimiter string) (*RawWriter, error)

NewRawWriter creates an instance of RawWriter.

func (*RawWriter) Close added in v0.3.19

func (c *RawWriter) Close() error

Close flushes rows and closes file.

func (RawWriter) Metrics added in v0.5.11

func (c RawWriter) Metrics() []progress.Metric

Metrics return available metrics.

func (*RawWriter) ReceiveRow added in v0.3.19

func (c *RawWriter) ReceiveRow(seq int64, values []Value) error

ReceiveRow receives rows.

func (*RawWriter) SetupKeys added in v0.4.0

func (c *RawWriter) SetupKeys(keys []flKey) (err error)

SetupKeys initializes writer.

type Reader

type Reader struct {
	AddSequence bool
	MaxLines    int64
	OffsetLines int64
	OnError     func(err error)
	Progress    *progress.Progress
	Buf         []byte
	Concurrency int
	Processor   *Processor

	Sequence int64

	MatchPrefix    *regexp.Regexp
	ExtractStrings bool
	// contains filtered or unexported fields
}

Reader scans lines and decodes JSON in them.

func (*Reader) Read

func (rd *Reader) Read(sess *readSession) error

Read reads single file with JSON lines.

type SQLiteWriter

type SQLiteWriter struct {
	// contains filtered or unexported fields
}

SQLiteWriter inserts rows into SQLite database.

func NewSQLiteWriter

func NewSQLiteWriter(fn string, tableName string, p *Processor) (*SQLiteWriter, error)

NewSQLiteWriter creates an instance of SQLiteWriter.

func (*SQLiteWriter) Close

func (c *SQLiteWriter) Close() error

Close commits outstanding transaction and closes database instance.

func (*SQLiteWriter) ReceiveRow

func (c *SQLiteWriter) ReceiveRow(seq int64, values []Value) error

ReceiveRow receives rows.

func (*SQLiteWriter) SetupKeys added in v0.4.0

func (c *SQLiteWriter) SetupKeys(keys []flKey) error

SetupKeys creates tables.

type Type added in v0.2.0

type Type string

Type is a scalar type.

func (Type) Update added in v0.2.0

func (t Type) Update(u Type) Type

Update merges original type with updated.

type URL added in v0.7.0

type URL struct {
	Scheme   string     `json:"scheme,omitempty"`
	User     string     `json:"user,omitempty"`
	Pass     string     `json:"pass,omitempty"`
	Host     string     `json:"host,omitempty"`
	Port     string     `json:"port,omitempty"`
	Query    url.Values `json:"query,omitempty"`
	Path     []string   `json:"path,omitempty"`
	Fragment string     `json:"fragment,omitempty"`
}

URL is a JSON representation of URL.

type Value added in v0.3.0

type Value struct {
	Dst       string
	Type      Type
	String    string
	Number    float64
	RawNumber string
	Bool      bool
}

Value encapsulates value of an allowed Type.

func (Value) Format added in v0.3.0

func (v Value) Format() string

Format formats Value as string.

type WriteReceiver

type WriteReceiver interface {
	SetupKeys(keys []flKey) error
	ReceiveRow(seq int64, values []Value) error
	Close() error
}

WriteReceiver can receive a row for processing.

type Writer

type Writer struct {
	Progress *progress.Progress
	// contains filtered or unexported fields
}

Writer dispatches rows to multiple receivers.

func (*Writer) Add

func (w *Writer) Add(r WriteReceiver)

Add adds another row receiver.

func (*Writer) Close

func (w *Writer) Close() error

Close tries to close all receivers and returns combined error in case of failures.

func (*Writer) HasReceivers

func (w *Writer) HasReceivers() bool

HasReceivers is true if there are receivers.

func (*Writer) ReceiveRow

func (w *Writer) ReceiveRow(seq int64, values []Value) error

ReceiveRow passes row to all receivers.

func (*Writer) SetupKeys added in v0.4.0

func (w *Writer) SetupKeys(keys []flKey) error

SetupKeys configures writers.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL