sybil

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 10, 2017 License: BSD-2-Clause-Views Imports: 23 Imported by: 0

Documentation

Index

Constants

View Source
const (
	NO_OP       = iota
	OP_AVG      = iota
	OP_HIST     = iota
	OP_DISTINCT = iota
)
View Source
const (
	INT_VAL = iota
	STR_VAL = iota
	SET_VAL = iota
)

Variables

View Source
var BLOCKS_BEFORE_GC = 8
View Source
var BLOCKS_PER_CACHE_FILE = 64
View Source
var BLOCK_VERSION = int32(1)

the BLOCK_VERSION is how we get hints about decoding blocks for backwards compatibility. at least, it will be in the future

View Source
var BREAK_MAP = make(map[string]int, 0)
View Source
var CACHE_DIR = "cache"
View Source
var CARDINALITY_THRESHOLD = 4
View Source
var CHUNKS_BEFORE_GC = 16
View Source
var CHUNK_SIZE = 1024 * 8 * 8
View Source
var CHUNK_THRESHOLD = CHUNK_SIZE / 8
View Source
var COPY_RECORD_INTERNS = false
View Source
var DEBUG_OUTLIERS = false
View Source
var DEBUG_RECORD_CONSISTENCY = false
View Source
var DEBUG_TIMING = false
View Source
var DELETE_BLOCKS = make([]string, 0)
View Source
var DELETE_BLOCKS_AFTER_QUERY = true
View Source
var EMPTY = ""
View Source
var ENABLE_LUA = false
View Source
var ENV_FLAG = os.Getenv("DEBUG")

extracted from and influenced by https://groups.google.com/forum/#!topic/golang-nuts/ct99dtK2Jo4 use env variable DEBUG=1 to turn on debug output

View Source
var FALSE = false
View Source
var FILE_DIGEST_THRESHOLD = 256
View Source
var FLAGS = FlagDefs{}

TODO: merge these two into one thing current problem is that FLAGS needs pointers

View Source
var FORMATS = map[string]string{
	"ansic":       time.ANSIC,
	"unixdate":    time.UnixDate,
	"rubydate":    time.RubyDate,
	"rfc822":      time.RFC822,
	"rfc822z":     time.RFC822Z,
	"rfc850":      time.RFC850,
	"rfc1123":     time.RFC1123,
	"rfc1123z":    time.RFC1123Z,
	"rfc3339":     time.RFC3339,
	"rfc3339nano": time.RFC3339Nano,
	"kitchen":     time.Kitchen,
	"stamp":       time.Stamp,
	"stampmilli":  time.StampMilli,
	"stampmicro":  time.StampMicro,
	"stampnano":   time.StampNano,
}
View Source
var GROUP_BY_WIDTH = 8 // bytes
View Source
var GROUP_DELIMITER = "\t"
View Source
var GZIP_EXT = ".gz"
View Source
var HOLD_MATCHES = false
View Source
var INGEST_DIR = "ingest"
View Source
var INTERNAL_RESULT_LIMIT = 100000
View Source
var KB = int64(1024)
View Source
var LOADED_TABLES = make(map[string]*Table)
View Source
var LOCK_TRIES = 50
View Source
var LOCK_US = time.Millisecond * 3
View Source
var MAX_LOCK_BREAKS = 5
View Source
var MAX_ROW_STORE_TRIES = 20
View Source
var MIN_CUTOFF = 5 // need at least this many elements before we determine min/max
View Source
var MIN_FILES_TO_DIGEST = 0
View Source
var NO_MORE_BLOCKS = GROUP_DELIMITER
View Source
var NUM_BUCKETS = 1000
View Source
var OPTS = OptionDefs{}
View Source
var PROFILER_ENABLED bool
View Source
var READ_ROWS_ONLY = false
View Source
var REGEX_CACHE_SIZE = 100000
View Source
var ROW_STORE_BLOCK = "ROW_STORE"
View Source
var RUN_PROFILER = func() ProfilerStop {
	return NoProfile{}
}
View Source
var SINGLE_EVENT_DURATION = int64(30) // i think this means 30 seconds
View Source
var SIZE_DIGEST_THRESHOLD = int64(1024) * 2
View Source
var STD_CUTOFF = 1000.0 // if value is 1000 SDs away, we ignore it
View Source
var STOMACHE_DIR = "stomache"
View Source
var STOP_PROFILER = func() {
}
View Source
var TEMP_INGEST_DIR = ".ingest.temp"
View Source
var TEST_MODE = false
View Source
var TOP_STRING_COUNT = 20
View Source
var TRUE = true
View Source
var VERSION_STRING = "0.2.0"

Functions

func Debug added in v0.2.0

func Debug(args ...interface{})

func Error added in v0.2.0

func Error(args ...interface{})

func FilterAndAggRecords

func FilterAndAggRecords(querySpec *QuerySpec, recordsPtr *RecordList) int

func GetFileDecoder added in v0.2.0

func GetFileDecoder(filename string) *gob.Decoder

func GetTimeFormat

func GetTimeFormat(time_fmt string) string

func GetVersionInfo added in v0.2.0

func GetVersionInfo() map[string]interface{}

func LoadAndSessionize

func LoadAndSessionize(tables []*Table, querySpec *QuerySpec, sessionSpec *SessionSpec) int

func LoadRowBlockCB

func LoadRowBlockCB(digestname string, records RecordList)

func Print added in v0.2.0

func Print(args ...interface{})

func PrintResults

func PrintResults(querySpec *QuerySpec)

func PrintTables

func PrintTables()

func PrintVersionInfo added in v0.2.0

func PrintVersionInfo()

func RecoverLock

func RecoverLock(lock RecoverableLock) bool

func RenameAndMod added in v0.2.0

func RenameAndMod(src, dst string) error

TODO: We should really split this into two functions based on dir / file

func SearchBlocks

func SearchBlocks(querySpec *QuerySpec, block_list map[string]*TableBlock) map[string]*QuerySpec

OLD SEARCHING FUNCTIONS BELOW HERE

func SessionizeRecords

func SessionizeRecords(querySpec *QuerySpec, sessionSpec *SessionSpec, recordsptr *RecordList)

func SetDefaults

func SetDefaults()

func SetLuaScript added in v0.2.0

func SetLuaScript(filename string)

func SortResults

func SortResults(querySpec *QuerySpec)

func Warn added in v0.2.0

func Warn(args ...interface{})

Types

type ActiveSession

type ActiveSession struct {
	Records RecordList
	Stats   *SessionStats

	Path       []string
	PathKey    bytes.Buffer
	PathLength int
	PathStats  map[string]int
}

func (*ActiveSession) AddRecord

func (as *ActiveSession) AddRecord(r *Record)

func (*ActiveSession) CombineSession

func (as *ActiveSession) CombineSession(session *ActiveSession)

func (*ActiveSession) ExpireRecords

func (as *ActiveSession) ExpireRecords(timestamp int) []RecordList

func (*ActiveSession) IsExpired

func (as *ActiveSession) IsExpired() bool

type Activity

type Activity struct {
	Count int
}

type ActivityMap

type ActivityMap map[int]Activity

type AfterLoadQueryCB

type AfterLoadQueryCB struct {
	// contains filtered or unexported fields
}

func (*AfterLoadQueryCB) CB

func (cb *AfterLoadQueryCB) CB(digestname string, records RecordList)

type AfterRowBlockLoad

type AfterRowBlockLoad func(string, RecordList)

type Aggregation

type Aggregation struct {
	// contains filtered or unexported fields
}

type BlockLock

type BlockLock struct {
	Lock
}

func (*BlockLock) Recover

func (l *BlockLock) Recover() bool

type CacheLock added in v0.2.0

type CacheLock struct {
	Lock
}

func (*CacheLock) Recover added in v0.2.0

func (l *CacheLock) Recover() bool

type Calendar

type Calendar struct {
	Daily   ActivityMap
	Weekly  ActivityMap
	Monthly ActivityMap

	Min int64
	Max int64
}

Trying out a calendar with stats by day, week and month

func NewCalendar

func NewCalendar() *Calendar

func (*Calendar) AddActivity

func (c *Calendar) AddActivity(timestamp int)

func (*Calendar) CombineCalendar

func (c *Calendar) CombineCalendar(cc *Calendar)

type DigestLock

type DigestLock struct {
	Lock
}

func (*DigestLock) Recover

func (l *DigestLock) Recover() bool

type Filter

type Filter interface {
	Filter(*Record) bool
}

func BuildFilters

func BuildFilters(t *Table, loadSpec *LoadSpec, filterSpec FilterSpec) []Filter

type FilterSpec

type FilterSpec struct {
	Int string
	Str string
	Set string
}

This is the passed in flags

type FlagDefs

type FlagDefs struct {
	OP          *string
	PRINT       *bool
	EXPORT      *bool
	INT_FILTERS *string
	STR_FILTERS *string
	STR_REPLACE *string // regex replacement for strings
	SET_FILTERS *string

	SESSION_COL *string
	INTS        *string
	STRS        *string
	GROUPS      *string

	ADD_RECORDS *int

	TIME        *bool
	TIME_COL    *string
	TIME_BUCKET *int
	HIST_BUCKET *int

	FIELD_SEPARATOR    *string
	FILTER_SEPARATOR   *string
	PRINT_KEYS         *bool
	LOAD_AND_QUERY     *bool
	LOAD_THEN_QUERY    *bool
	READ_INGESTION_LOG *bool
	READ_ROWSTORE      *bool
	SKIP_COMPACT       *bool

	PROFILE     *bool
	PROFILE_MEM *bool

	RECYCLE_MEM *bool

	WEIGHT_COL *string

	LIMIT *int

	DEBUG *bool
	JSON  *bool
	GC    *bool

	DIR        *string
	SORT       *string
	TABLE      *string
	PRINT_INFO *bool
	SAMPLES    *bool

	LUA     *bool
	LUAFILE *string

	UPDATE_TABLE_INFO *bool
	SKIP_OUTLIERS     *bool

	// Join keys
	JOIN_TABLE *string
	JOIN_KEY   *string
	JOIN_GROUP *string

	// Sessionization stuff
	SESSION_CUTOFF *int
	RETENTION      *bool
	PATH_KEY       *string
	PATH_LENGTH    *int

	// STATS
	ANOVA_ICC *bool
}

type Grouping

type Grouping struct {
	// contains filtered or unexported fields
}

type Hist

type Hist struct {
	Max     int64
	Min     int64
	Samples int
	Count   int64
	Avg     float64
	// contains filtered or unexported fields
}

func (*Hist) Combine

func (h *Hist) Combine(next_hist *Hist)

func (*Hist) GetBuckets

func (h *Hist) GetBuckets() map[string]int64

func (*Hist) GetMeanVariance

func (h *Hist) GetMeanVariance() float64

func (*Hist) GetPercentiles

func (h *Hist) GetPercentiles() []int64

func (*Hist) GetStdDev

func (h *Hist) GetStdDev() float64

VARIANCE is defined as the squared error from the mean STD DEV is defined as sqrt(VARIANCE)

func (*Hist) GetVariance

func (h *Hist) GetVariance() float64

func (*Hist) Print

func (h *Hist) Print()

func (*Hist) SetupBuckets

func (h *Hist) SetupBuckets(buckets int, min, max int64)

func (*Hist) Sum

func (h *Hist) Sum() int64

func (*Hist) TrackPercentiles

func (h *Hist) TrackPercentiles()

type InfoLock

type InfoLock struct {
	Lock
}

func (*InfoLock) Recover

func (l *InfoLock) Recover() bool

type IntArr

type IntArr []IntField

type IntField

type IntField int64

type IntFilter

type IntFilter struct {
	Field   string
	FieldId int16
	Op      string
	Value   int
	// contains filtered or unexported fields
}

func (IntFilter) Filter

func (filter IntFilter) Filter(r *Record) bool

type IntInfo

type IntInfo struct {
	Min   int64
	Max   int64
	Avg   float64
	M2    float64 // used for calculating std dev, expressed as M2 / (Count - 1)
	Count int
}

type IntInfoTable

type IntInfoTable map[int16]*IntInfo

type LoadSpec

type LoadSpec struct {
	LoadAllColumns bool
	// contains filtered or unexported fields
}

func NewLoadSpec

func NewLoadSpec() LoadSpec

func (*LoadSpec) Int

func (l *LoadSpec) Int(name string)

func (*LoadSpec) Set

func (l *LoadSpec) Set(name string)

func (*LoadSpec) Str

func (l *LoadSpec) Str(name string)

type Lock

type Lock struct {
	Name  string
	Table *Table
	// contains filtered or unexported fields
}

func (*Lock) ForceDeleteFile

func (l *Lock) ForceDeleteFile()

func (*Lock) ForceMakeFile

func (l *Lock) ForceMakeFile(pid int64)

func (*Lock) Grab

func (l *Lock) Grab() bool

func (*Lock) Recover

func (l *Lock) Recover() bool

func (*Lock) Release

func (l *Lock) Release() bool

type LuaKey added in v0.2.0

type LuaKey interface{}

type LuaTable added in v0.2.0

type LuaTable map[string]interface{}

type NoFilter

type NoFilter struct{}

FILTERS RETURN TRUE ON MATCH SUCCESS

func (NoFilter) Filter

func (f NoFilter) Filter(r *Record) bool

type NoProfile

type NoProfile struct{}

func (NoProfile) Start

func (p NoProfile) Start() ProfilerStart

func (NoProfile) Stop

func (p NoProfile) Stop()

type OptionDefs

type OptionDefs struct {
	SORT_COUNT              string
	SAMPLES                 bool
	STR_REPLACEMENTS        map[string]StrReplace
	WEIGHT_COL              bool
	WEIGHT_COL_ID           int16
	DELTA_ENCODE_INT_VALUES bool
	DELTA_ENCODE_RECORD_IDS bool
	WRITE_BLOCK_INFO        bool
	TIMESERIES              bool
	TIME_COL_ID             int16
	TIME_FORMAT             string
	GROUP_BY                []string
}

type ProfilerStart

type ProfilerStart interface {
	Stop()
}

type ProfilerStop

type ProfilerStop interface {
	Start() ProfilerStart
}

type QuerySpec

type QuerySpec struct {
	Filters      []Filter
	Groups       []Grouping
	Aggregations []Aggregation

	OrderBy    string
	Limit      int16
	TimeBucket int

	Cumulative  *Result
	Results     ResultMap
	TimeResults map[int]ResultMap
	Sorted      []*Result
	Matched     RecordList
	Sessions    SessionList

	BlockList map[string]TableBlock
	Table     *Table

	LuaResult LuaTable
	LuaState  *C.struct_lua_State
}

func CombineResults

func CombineResults(querySpec *QuerySpec, block_specs map[string]*QuerySpec) *QuerySpec

func CopyQuerySpec

func CopyQuerySpec(querySpec *QuerySpec) *QuerySpec

func (*QuerySpec) CalculateICC

func (querySpec *QuerySpec) CalculateICC() map[string]float64

to calculate SSW and SSB, we do: SSW = sum of squares within groups. Take each group and calculate its variance, then add all those variances together SSB = sum of square between groups. Take each group's averages and calculate their variance against the overall average.

func (*QuerySpec) PrintResults

func (qs *QuerySpec) PrintResults()

func (*QuerySpec) Punctuate

func (querySpec *QuerySpec) Punctuate()

func (*QuerySpec) ResetResults

func (querySpec *QuerySpec) ResetResults()

type Record

type Record struct {
	Strs      []StrField
	Ints      []IntField
	SetMap    map[int16]SetField
	Populated []int8

	Timestamp int64
	Path      string
	// contains filtered or unexported fields
}

func (*Record) AddIntField

func (r *Record) AddIntField(name string, val int64)

func (*Record) AddSetField

func (r *Record) AddSetField(name string, val []string)

func (*Record) AddStrField

func (r *Record) AddStrField(name string, val string)

func (*Record) CopyRecord

func (r *Record) CopyRecord() *Record

func (*Record) GetIntVal

func (r *Record) GetIntVal(name string) (int, bool)

func (*Record) GetSetVal

func (r *Record) GetSetVal(name string) ([]string, bool)

func (*Record) GetStrVal

func (r *Record) GetStrVal(name string) (string, bool)

func (*Record) ResizeFields

func (r *Record) ResizeFields(length int16)

type RecordList

type RecordList []*Record

Before we save the new record list in a table, we tend to sort by time

func CombineMatches

func CombineMatches(block_specs map[string]*QuerySpec) RecordList

func (RecordList) Len

func (a RecordList) Len() int

func (RecordList) ResetRecords added in v0.2.0

func (rl RecordList) ResetRecords(tb *TableBlock)

recycle allocated records between blocks that means we need a wash and rinse cycle we can re-use blocks if:

same loadSpec
table is the same
NumRecords are the same

to do so,

we clean out the different arrays inside a block
re-home the record list into the table block

func (RecordList) Swap

func (a RecordList) Swap(i, j int)

type RecoverableLock

type RecoverableLock interface {
	Grab() bool
	Release() bool
	Recover() bool
}

Every LockFile should have a recovery plan

type Result

type Result struct {
	Hists map[string]*Hist

	GroupByKey  string
	BinaryByKey string
	Count       int64
	Samples     int64
}

func NewResult

func NewResult() *Result

func (*Result) Combine

func (rs *Result) Combine(next_result *Result)

This does an in place combine of the next_result into this one...

type ResultJSON

type ResultJSON map[string]interface{}

type ResultMap

type ResultMap map[string]*Result

func (*ResultMap) Combine

func (master_result *ResultMap) Combine(results *ResultMap)

type RowSavedInt

type RowSavedInt struct {
	Name  int16
	Value int64
}

type RowSavedSet

type RowSavedSet struct {
	Name  int16
	Value []string
}

type RowSavedStr

type RowSavedStr struct {
	Name  int16
	Value string
}

type Sample

type Sample map[string]interface{}

type SaveBlockChunkCB

type SaveBlockChunkCB struct {
	// contains filtered or unexported fields
}

func (*SaveBlockChunkCB) CB

func (cb *SaveBlockChunkCB) CB(digestname string, records RecordList)

type SavedBlockCache added in v0.2.0

type SavedBlockCache map[string]*SavedColumnInfo

type SavedColumnInfo

type SavedColumnInfo struct {
	NumRecords int32

	StrInfoMap SavedStrInfo
	IntInfoMap SavedIntInfo
}

type SavedIntBucket

type SavedIntBucket struct {
	Value   int64
	Records []uint32
}

type SavedIntColumn

type SavedIntColumn struct {
	Name            string
	DeltaEncodedIDs bool
	ValueEncoded    bool
	BucketEncoded   bool
	Bins            []SavedIntBucket
	Values          []int64
	VERSION         int32
}

func NewSavedIntColumn

func NewSavedIntColumn() SavedIntColumn

type SavedIntInfo

type SavedIntInfo map[string]*IntInfo

type SavedRecord

type SavedRecord struct {
	Ints []RowSavedInt
	Strs []RowSavedStr
	Sets []RowSavedSet
}

type SavedRecords

type SavedRecords struct {
	RecordList []*SavedRecord
}

type SavedSetBucket

type SavedSetBucket struct {
	Value   int32
	Records []uint32
}

type SavedSetColumn

type SavedSetColumn struct {
	Name            string
	Bins            []SavedSetBucket
	Values          [][]int32
	StringTable     []string
	DeltaEncodedIDs bool
	BucketEncoded   bool
	VERSION         int32
}

func NewSavedSetColumn

func NewSavedSetColumn() SavedSetColumn

type SavedStrBucket

type SavedStrBucket struct {
	Value   int32
	Records []uint32
}

type SavedStrColumn

type SavedStrColumn struct {
	Name            string
	DeltaEncodedIDs bool
	BucketEncoded   bool
	Bins            []SavedStrBucket
	Values          []int32
	StringTable     []string
	VERSION         int32
}

func NewSavedStrColumn

func NewSavedStrColumn() SavedStrColumn

type SavedStrInfo

type SavedStrInfo map[string]*StrInfo

type SeparatedColumns

type SeparatedColumns struct {
	// contains filtered or unexported fields
}

type SessionList

type SessionList struct {
	List Sessions

	JoinTable *Table
	Results   map[string]*SessionStats

	PathCounts  map[string]int
	PathUniques map[string]int

	Expiration     int
	LastExpiration int
}

func (*SessionList) AddRecord

func (sl *SessionList) AddRecord(group_key string, r *Record)

func (*SessionList) ExpireRecords

func (sl *SessionList) ExpireRecords() int

func (*SessionList) NoMoreRecordsBefore

func (as *SessionList) NoMoreRecordsBefore(timestamp int)

type SessionSpec

type SessionSpec struct {
	ExpireAfter int // Seconds to expire a session after not seeing any new events

	Sessions SessionList
	Count    int
}

func NewSessionSpec

func NewSessionSpec() SessionSpec

func (*SessionSpec) CombineSessions

func (ss *SessionSpec) CombineSessions(sessionspec *SessionSpec)

func (*SessionSpec) ExpireRecords

func (ss *SessionSpec) ExpireRecords()

func (*SessionSpec) Finalize

func (ss *SessionSpec) Finalize()

func (*SessionSpec) PrintResults

func (ss *SessionSpec) PrintResults()

type SessionStats

type SessionStats struct {
	NumEvents       Hist
	NumBounces      Hist
	NumSessions     Hist
	SessionDuration Hist
	Retention       Hist
	Calendar        *Calendar

	SessionDelta Hist

	LastSessionEnd int64
}

func NewSessionStats

func NewSessionStats() *SessionStats

func (*SessionStats) CombineStats

func (ss *SessionStats) CombineStats(stats *SessionStats)

func (*SessionStats) PrintStats

func (ss *SessionStats) PrintStats(key string)

func (*SessionStats) SummarizeSession

func (ss *SessionStats) SummarizeSession(records RecordList)

type Sessions

type Sessions map[string]*ActiveSession

type SetArr

type SetArr []SetField

type SetField

type SetField []int32

type SetFilter

type SetFilter struct {
	Field   string
	FieldId int16
	Op      string
	Value   string
	// contains filtered or unexported fields
}

func (SetFilter) Filter

func (filter SetFilter) Filter(r *Record) bool

type SetMap

type SetMap map[int16]SetField

type SortBlocksByEndTime

type SortBlocksByEndTime []*TableBlock

func (SortBlocksByEndTime) Len

func (a SortBlocksByEndTime) Len() int

func (SortBlocksByEndTime) Less

func (a SortBlocksByEndTime) Less(i, j int) bool

func (SortBlocksByEndTime) Swap

func (a SortBlocksByEndTime) Swap(i, j int)

type SortBlocksByTime

type SortBlocksByTime []*TableBlock

func (SortBlocksByTime) Len

func (a SortBlocksByTime) Len() int

func (SortBlocksByTime) Less

func (a SortBlocksByTime) Less(i, j int) bool

func (SortBlocksByTime) Swap

func (a SortBlocksByTime) Swap(i, j int)

type SortRecordsByTime

type SortRecordsByTime struct {
	RecordList
}

func (SortRecordsByTime) Less

func (a SortRecordsByTime) Less(i, j int) bool

type SortResultsByCol

type SortResultsByCol struct {
	Results []*Result

	Col string
}

func (SortResultsByCol) Len

func (a SortResultsByCol) Len() int

func (SortResultsByCol) Less

func (a SortResultsByCol) Less(i, j int) bool

This sorts the records in descending order

func (SortResultsByCol) Swap

func (a SortResultsByCol) Swap(i, j int)

type SortStrsByCount

type SortStrsByCount []StrInfoCol

func (SortStrsByCount) Len

func (a SortStrsByCount) Len() int

func (SortStrsByCount) Less

func (a SortStrsByCount) Less(i, j int) bool

func (SortStrsByCount) Swap

func (a SortStrsByCount) Swap(i, j int)

type StrArr

type StrArr []StrField

type StrField

type StrField int32

type StrFilter

type StrFilter struct {
	Field   string
	FieldId int16
	Op      string
	Value   string
	Regex   *regexp.Regexp
	// contains filtered or unexported fields
}

func (StrFilter) Filter

func (filter StrFilter) Filter(r *Record) bool

type StrInfo

type StrInfo struct {
	TopStringCount map[int32]int
	Cardinality    int
}

StrInfo and IntInfo contains interesting tidbits about columns they also get serialized to disk in the block's info.db

type StrInfoCol

type StrInfoCol struct {
	Name  int32
	Value int
}

type StrInfoTable

type StrInfoTable map[int16]*StrInfo

type StrReplace

type StrReplace struct {
	// contains filtered or unexported fields
}

type Table

type Table struct {
	Name      string
	BlockList map[string]*TableBlock
	KeyTable  map[string]int16 // String Key Names
	KeyTypes  map[int16]int8

	// Need to keep track of the last block we've used, right?
	LastBlock TableBlock
	RowBlock  *TableBlock

	StrInfo StrInfoTable
	IntInfo IntInfoTable

	BlockInfoCache map[string]*SavedColumnInfo
	NewBlockInfos  []string
	// contains filtered or unexported fields
}

func GetTable

func GetTable(name string) *Table

This is a singleton constructor for Tables

func (*Table) Aggregation

func (t *Table) Aggregation(name string, op string) Aggregation

func (*Table) AppendRecordsToLog

func (t *Table) AppendRecordsToLog(records RecordList, blockname string)

func (*Table) BuildJoinMap

func (t *Table) BuildJoinMap()

func (*Table) ChunkAndSave

func (t *Table) ChunkAndSave()

func (*Table) CompactRecords added in v0.2.0

func (t *Table) CompactRecords()

TODO: figure out how often we actually do a collation check by storing last collation inside a file somewhere

func (*Table) DeduceTableInfoFromBlocks

func (t *Table) DeduceTableInfoFromBlocks()

Alright, so... I accidentally broke my info.db file How can I go about loading the TableInfo based off the blocks? I think I go through each block and load the block, verifying the different column types

func (*Table) DigestRecords

func (t *Table) DigestRecords()

Go through rowstore and save records out to column store

func (*Table) FillPartialBlock

func (t *Table) FillPartialBlock() bool

TODO: find any open blocks and then fill them...

func (*Table) FindPartialBlocks

func (t *Table) FindPartialBlocks() []*TableBlock

func (*Table) GetColumnType

func (t *Table) GetColumnType(v string) int8

func (*Table) GetRecordById

func (t *Table) GetRecordById(id string) *Record

func (*Table) GrabBlockLock

func (t *Table) GrabBlockLock(name string) bool

func (*Table) GrabCacheLock added in v0.2.0

func (t *Table) GrabCacheLock() bool

func (*Table) GrabDigestLock

func (t *Table) GrabDigestLock() bool

func (*Table) GrabInfoLock

func (t *Table) GrabInfoLock() bool

func (*Table) Grouping

func (t *Table) Grouping(name string) Grouping

func (*Table) HasFlagFile

func (t *Table) HasFlagFile() bool

func (*Table) IngestRecords

func (t *Table) IngestRecords(blockname string)

Go through newRecords list and save all the new records out to a row store

func (*Table) IntFilter

func (t *Table) IntFilter(name string, op string, value int) IntFilter

func (*Table) IsNotExist added in v0.2.0

func (t *Table) IsNotExist() bool

func (*Table) LoadAndQueryRecords

func (t *Table) LoadAndQueryRecords(loadSpec *LoadSpec, querySpec *QuerySpec) int

func (*Table) LoadBlockCache added in v0.2.0

func (t *Table) LoadBlockCache()

func (*Table) LoadBlockFromDir

func (t *Table) LoadBlockFromDir(dirname string, loadSpec *LoadSpec, load_records bool) *TableBlock

TODO: have this only pull the blocks into column format and not materialize the columns immediately

func (*Table) LoadBlockInfo added in v0.2.0

func (t *Table) LoadBlockInfo(dirname string) *SavedColumnInfo

func (*Table) LoadRecords

func (t *Table) LoadRecords(loadSpec *LoadSpec) int

func (*Table) LoadRecordsFromLog

func (t *Table) LoadRecordsFromLog(filename string) RecordList

func (*Table) LoadRowStoreRecords

func (t *Table) LoadRowStoreRecords(digest string, after_block_load_cb AfterRowBlockLoad)

func (*Table) LoadSavedRecordsFromLog

func (t *Table) LoadSavedRecordsFromLog(filename string) []*SavedRecord

func (*Table) LoadTableInfo

func (t *Table) LoadTableInfo() bool

func (*Table) LoadTableInfoFrom

func (t *Table) LoadTableInfoFrom(filename string) bool

func (*Table) MakeDir added in v0.2.0

func (t *Table) MakeDir()

func (*Table) MatchAndAggregate

func (t *Table) MatchAndAggregate(querySpec *QuerySpec)

func (*Table) MaybeCompactRecords

func (t *Table) MaybeCompactRecords()

we compact if: we have over X files we have over X megabytes of data remember, there is no reason to actually read the data off disk until we decide to compact

func (*Table) NewHist

func (t *Table) NewHist(info *IntInfo) *Hist

func (*Table) NewLoadSpec

func (t *Table) NewLoadSpec() LoadSpec

func (*Table) NewRecord

func (t *Table) NewRecord() *Record

func (*Table) PrintColInfo

func (t *Table) PrintColInfo()

func (*Table) PrintRecord

func (t *Table) PrintRecord(r *Record)

func (*Table) PrintRecords

func (t *Table) PrintRecords(records RecordList)

func (*Table) PrintSamples

func (t *Table) PrintSamples()

func (*Table) ReadBlockInfoFromDir

func (t *Table) ReadBlockInfoFromDir(dirname string) *SavedColumnInfo

TODO: have this only pull the blocks into column format and not materialize the columns immediately

func (*Table) ReleaseBlockLock

func (t *Table) ReleaseBlockLock(name string) bool

func (*Table) ReleaseCacheLock added in v0.2.0

func (t *Table) ReleaseCacheLock() bool

func (*Table) ReleaseDigestLock

func (t *Table) ReleaseDigestLock() bool

func (*Table) ReleaseInfoLock

func (t *Table) ReleaseInfoLock() bool

func (*Table) ReleaseRecords

func (t *Table) ReleaseRecords()

Remove our pointer to the blocklist so a GC is triggered and a bunch of new memory becomes available

func (*Table) ResetBlockCache added in v0.2.0

func (t *Table) ResetBlockCache()

func (*Table) RestoreUningestedFiles

func (t *Table) RestoreUningestedFiles()

func (*Table) SaveRecordsToBlock

func (t *Table) SaveRecordsToBlock(records RecordList, filename string) bool

func (*Table) SaveRecordsToColumns

func (t *Table) SaveRecordsToColumns() bool

func (*Table) SaveTableInfo

func (t *Table) SaveTableInfo(fname string)

func (*Table) SetFilter

func (t *Table) SetFilter(name string, op string, value string) SetFilter

func (*Table) ShouldCompactRowStore added in v0.2.0

func (t *Table) ShouldCompactRowStore(digest string) bool

func (*Table) ShouldLoadBlockFromDir

func (t *Table) ShouldLoadBlockFromDir(dirname string, querySpec *QuerySpec) bool

optimizing for integer pre-cached info

func (*Table) StrFilter

func (t *Table) StrFilter(name string, op string, value string) StrFilter

func (*Table) TrimTable

func (t *Table) TrimTable(trimSpec *TrimSpec) []*TableBlock

List all the blocks that should be trimmed to keep the table within it's memory limits

func (*Table) WriteBlockCache added in v0.2.0

func (t *Table) WriteBlockCache()

type TableBlock

type TableBlock struct {
	Name       string
	RecordList RecordList
	Info       *SavedColumnInfo
	Size       int64
	Matched    RecordList

	IntInfo IntInfoTable
	StrInfo StrInfoTable
	// contains filtered or unexported fields
}

Table Block should have a bunch of metadata next to it, too

func (*TableBlock) ExportBlockData added in v0.2.0

func (b *TableBlock) ExportBlockData()

func (*TableBlock) GetColumnInfo

func (tb *TableBlock) GetColumnInfo(name_id int16) *TableColumn

func (*TableBlock) RecycleSlab added in v0.2.0

func (tb *TableBlock) RecycleSlab(loadSpec *LoadSpec)

func (*TableBlock) SaveInfoToColumns

func (tb *TableBlock) SaveInfoToColumns(dirname string)

func (*TableBlock) SaveIntsToColumns

func (tb *TableBlock) SaveIntsToColumns(dirname string, same_ints map[int16]ValueMap)

func (*TableBlock) SaveSetsToColumns

func (tb *TableBlock) SaveSetsToColumns(dirname string, same_sets map[int16]ValueMap)

func (*TableBlock) SaveStrsToColumns

func (tb *TableBlock) SaveStrsToColumns(dirname string, same_strs map[int16]ValueMap)

func (*TableBlock) SaveToColumns

func (tb *TableBlock) SaveToColumns(filename string) bool

func (*TableBlock) SeparateRecordsIntoColumns

func (tb *TableBlock) SeparateRecordsIntoColumns() SeparatedColumns

type TableColumn

type TableColumn struct {
	Type        int8
	StringTable map[string]int32
	RCache      map[int]bool
	// contains filtered or unexported fields
}

type TrimSpec

type TrimSpec struct {
	MBLimit      int64 // size limit of DB in megabytes
	DeleteBefore int64 // delete records older than DeleteBefore in seconds
}

type ValueMap

type ValueMap map[int64][]uint32

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL