letarette

package
v0.2.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 5, 2022 License: Apache-2.0 Imports: 56 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrStemmerSettingsMismatch = fmt.Errorf("config does not match index state")

ErrStemmerSettingsMismatch is returned when config and index state does not match

Functions

func CheckIndex

func CheckIndex(dbo Database) error

CheckIndex runs an integrity check on the index

func CheckStemmerSettings

func CheckStemmerSettings(db Database, cfg Config) error

CheckStemmerSettings verifies that the index stemmer settings match the current config. If there are no index settings, they will be set from the provided config.

func CompressIndex added in v0.2.0

func CompressIndex(ctx context.Context, dbo Database) error

CompressIndex compresses the txt column

func ForceIndexStemmerState

func ForceIndexStemmerState(state snowball.Settings, dbo Database) error

ForceIndexStemmerState resets the stemmer state stored in the database to the provided state.

func GetSpellfixLag

func GetSpellfixLag(ctx context.Context, dbo Database, minCount int) (int, error)

GetSpellfixLag returns how many words in the main index that are not yet in the spelling index.

func InitializeShard added in v0.2.0

func InitializeShard(ctx context.Context, conn *nats.Conn, db Database, cfg Config, monitor StatusMonitor) error

InitializeShard tries to locate healthier shards to clone from, to cut down start-up times and reduce load on the DocumentManager.

func LoadShardClone added in v0.2.0

func LoadShardClone(ctx context.Context, db Database, source io.Reader) error

LoadShardClone loads a clone from a reader source

func RebuildIndex

func RebuildIndex(dbo Database) error

RebuildIndex rebuilds the fts index from the docs table

func ResetMigration

func ResetMigration(cfg Config, version int) error

ResetMigration forces the migration version of a db. It is typically used to back out of a failed migration. Note: no migration steps are actually performed, it only sets the version and resets the dirty flag.

func SQL added in v0.2.0

func SQL(path string) (string, error)

SQL loads sql code from resources and strips away comments

func SetIndexPageSize

func SetIndexPageSize(dbo Database, pageSize int) error

SetIndexPageSize sets the max page size for future index allocations.

func SetSynonyms added in v0.2.0

func SetSynonyms(ctx context.Context, dbo Database, synonyms []Synonyms) error

SetSynonyms replaces the current list of synonyms in the index

func ShardIndexFromDocumentID added in v0.2.0

func ShardIndexFromDocumentID(docID protocol.DocumentID, shardGroupSize int) int

ShardIndexFromDocumentID calculated a shard index based on a hash of the document ID. The hash algorithm is chosen for even distribution in a shard group.

func UpdateSpellfix

func UpdateSpellfix(ctx context.Context, dbo Database, minCount int) error

UpdateSpellfix updates the spelling table with the top terms from the fts.

func Usage

func Usage(advanced bool)

Usage prints usage help to stdout

func VacuumIndex added in v0.2.0

func VacuumIndex(dbo Database) error

VacuumIndex runs vacuum on the database to reclaim space

Types

type BulkLoader added in v0.2.0

type BulkLoader struct {
	// contains filtered or unexported fields
}

BulkLoader performs transactional loading of documents into the index

func StartBulkLoad added in v0.2.0

func StartBulkLoad(dbo Database, space string) (*BulkLoader, error)

StartBulkLoad creates a new BulkLoader instance for the specified database and space.

func (*BulkLoader) Commit added in v0.2.0

func (bl *BulkLoader) Commit() error

Commit - commits the bulk load transaction and performs vacuuming. The BulkLoader should not be used after committing.

func (*BulkLoader) Load added in v0.2.0

func (bl *BulkLoader) Load(doc protocol.Document) error

Load loads one document into the current loading transaction

func (*BulkLoader) LoadedBytes added in v0.2.0

func (bl *BulkLoader) LoadedBytes() uint32

LoadedBytes returns the number of bytes loaded by the current BulkLoader instance

func (*BulkLoader) Rollback added in v0.2.0

func (bl *BulkLoader) Rollback() error

Rollback - rolls back the complete bulk load transaction. The BulkLoader should not be used after rolling back.

type Cache

type Cache struct {
	// contains filtered or unexported fields
}

Cache keeps search results for a set duration or until the cache max size is reached.

func NewCache

func NewCache(timeout time.Duration, maxSize uint64) *Cache

NewCache creates cache with a given max size.

func (*Cache) Get

func (cache *Cache) Get(query string, spaces []string, limit uint16, offset uint16) (protocol.SearchResult, bool)

Get fetches cached search results

func (*Cache) Invalidate added in v0.2.0

func (cache *Cache) Invalidate(doc protocol.DocumentID)

Invalidate marks a document as updated

func (*Cache) Put

func (cache *Cache) Put(query string, spaces []string, limit uint16, offset uint16, res protocol.SearchResult)

Put stores search results in the cache

type Cloner added in v0.2.0

type Cloner struct {
	// contains filtered or unexported fields
}

The Cloner listens to clone requests over NATS and responds with a URL to a clone stream.

func StartCloner added in v0.2.0

func StartCloner(nc *nats.Conn, db Database, cfg Config) (*Cloner, error)

StartCloner returns a running cloning service, listening to NATS requests and providing shard clones over HTTPS.

func (*Cloner) Close added in v0.2.0

func (cs *Cloner) Close() error

Close stops the cloning service

func (*Cloner) ServeHTTP added in v0.2.0

func (cs *Cloner) ServeHTTP(w http.ResponseWriter, req *http.Request)

type Config

type Config struct {
	Nats struct {
		URLS     []string `default:"nats://localhost:4222"`
		SeedFile string
		RootCAs  []string
		Topic    string `default:"leta"`
	}
	DB struct {
		Path           string `default:"letarette.db"`
		CacheSizeMB    uint32 `default:"1024" desc:"advanced"` // default 1G DB cache
		MMapSizeMB     uint32 `default:"0" desc:"internal"`    // no DB mmap by default
		ToolConnection bool   `ignored:"true"`
	}
	Index struct {
		Spaces         []string `required:"true" default:"docs"`
		ListSize       uint16   `default:"500" desc:"advanced"`
		ReqSize        uint16   `default:"50" desc:"advanced"`
		MaxOutstanding uint16   `split_words:"true" default:"4" desc:"advanced"`
		Wait           struct {
			Cycle      time.Duration `default:"100ms" desc:"advanced"`
			EmptyCycle time.Duration `default:"5s" desc:"advanced"`
			Interest   time.Duration `default:"5s" desc:"advanced"`
			Document   time.Duration `default:"30s" desc:"advanced"`
			Refetch    time.Duration `default:"3s" desc:"advanced"`
		}
		Disable  bool `default:"false" desc:"advanced"`
		Compress bool `default:"false"`
	}
	Spelling struct {
		MinFrequency int `split_words:"true" default:"5" desc:"advanced"`
		MaxLag       int `split_words:"true" default:"100" desc:"advanced"`
	}
	Stemmer struct {
		Languages        []string `split_words:"true" required:"true" default:"english"`
		RemoveDiacritics bool     `split_words:"true" default:"true" desc:"advanced"`
		TokenCharacters  string   `desc:"advanced"`
		Separators       string   `desc:"advanced"`
		StopwordCutoff   float32  `split_words:"true" default:"1" desc:"advanced"`
	}
	Search struct {
		Timeout        time.Duration `default:"4s"`
		Cap            int           `default:"10000"`
		CacheTimeout   time.Duration `split_words:"true" default:"10m"`
		CacheMaxsizeMB uint64        `split_words:"true" default:"250"`
		Disable        bool          `default:"false" desc:"advanced"`
		Strategy       int           `default:"1" desc:"internal"`
	}
	Shard          string `default:"1/1"`
	ShardgroupSize uint16 `ignored:"true"`
	ShardIndex     uint16 `ignored:"true"`
	CloningPort    uint16 `default:"8192"`
	CloningHost    string
	Profile        struct {
		HTTP  int    `desc:"internal"`
		CPU   string `desc:"internal"`
		Mem   string `desc:"internal"`
		Block string `desc:"internal"`
		Mutex string `desc:"internal"`
	}
}

Config holds the main configuration

func LoadConfig

func LoadConfig() (cfg Config, err error)

LoadConfig loads configuration variables from the environment and returns a fully populated Config instance.

type Database

type Database interface {
	Close() error
	RawQuery(q string, args ...interface{}) ([]string, error)
	RawExec(q string, args ...interface{}) error
}

Database is a live connection to a SQLite database file, providing access methods for all db interactions.

func OpenDatabase

func OpenDatabase(cfg Config) (Database, error)

OpenDatabase connects to a new or existing database and migrates the database up to the latest version.

type IndexOptimizer

type IndexOptimizer struct {
	// contains filtered or unexported fields
}

IndexOptimizer is used to run step-wise index optimization. The instance must be closed by calling Close() to return the database connection to the pool.

func StartIndexOptimization

func StartIndexOptimization(dbo Database, pageIncrement int) (*IndexOptimizer, error)

StartIndexOptimization initiates a step-wise index optimization and returns an IndexOptimizer instance on success.

func (IndexOptimizer) Close

func (o IndexOptimizer) Close() error

Close returns the database connection to the pool.

func (IndexOptimizer) Step

func (o IndexOptimizer) Step() (bool, error)

Step runs one step of the optimizer. Returns true when optimization is complete. Stopping before done is OK.

type Indexer

type Indexer interface {
	Close()
}

Indexer continuously runs the indexing process, until Close is called.

func StartIndexer

func StartIndexer(nc *nats.Conn, db Database, cfg Config, cache *Cache) (Indexer, error)

StartIndexer creates and starts an indexer instance. This is really a singleton in that only one instance with the same database or config can be run at the same time.

type Interest

type Interest struct {
	DocID   protocol.DocumentID `db:"docID"`
	State   InterestState
	Updated int64 `db:"updatedNanos"`
}

Interest represents one row in the interest list

type InterestListState

type InterestListState struct {
	CreatedAt        int64               `db:"listCreatedAtNanos"`
	LastUpdated      int64               `db:"lastUpdatedAtNanos"`
	LastUpdatedDocID protocol.DocumentID `db:"lastUpdatedDocID"`
}

InterestListState keeps track of where the index process is

type InterestState

type InterestState int

InterestState represents the state of an interest

type MetricsCollector added in v0.2.0

type MetricsCollector interface {
	Close()
}

MetricsCollector listens and responds to metrics requests

func StartMetricsCollector added in v0.2.0

func StartMetricsCollector(nc *nats.Conn, db Database, cfg Config) (MetricsCollector, error)

StartMetricsCollector creates a new metrics collector, and starts responding to requests

type Phrase

type Phrase struct {
	Text     string
	Wildcard bool
	Exclude  bool
}

Phrase represents one parsed query phrase, with flags

func CanonicalizePhraseList

func CanonicalizePhraseList(phrases []Phrase) []Phrase

CanonicalizePhraseList turns all phrases in a phrase list to lower case, sorts it and eliminates duplicates.

func ParseQuery

func ParseQuery(query string) []Phrase

ParseQuery tokenizes a query string and returns a list of parsed phrases with exclusion and wildcard flags.

func ReducePhraseList

func ReducePhraseList(phrases []Phrase) []Phrase

ReducePhraseList removes one character phrases from a list of phrases.

func (Phrase) String

func (p Phrase) String() string

type Profiler added in v0.2.0

type Profiler struct {
	// contains filtered or unexported fields
}

Profiler wraps native profiling tools

func StartProfiler added in v0.2.0

func StartProfiler(cfg Config) (*Profiler, error)

StartProfiler starts a profiler if setup in the config

func (*Profiler) Close added in v0.2.0

func (p *Profiler) Close() error

Close finishes profiling

type Searcher

type Searcher interface {
	Close()
}

Searcher continuously runs the search process, until Close is called.

func StartSearcher

func StartSearcher(nc *nats.Conn, db Database, cfg Config, cache *Cache) (Searcher, error)

StartSearcher creates and starts a searcher instance.

type ShardCloner added in v0.2.0

type ShardCloner struct {
	// contains filtered or unexported fields
}

A ShardCloner creates a copy of all documents in the index that matches a specific shard group. The result is a gzipped, gob-encoded file, ready to be loaded.

func StartShardClone added in v0.2.0

func StartShardClone(ctx context.Context, db *database, shardGroup string, dest io.Writer) (*ShardCloner, error)

StartShardClone starts the process of cloning all documents in the index for loading into a specified shard group.

func (*ShardCloner) Close added in v0.2.0

func (s *ShardCloner) Close() (int, error)

Close stops the cloning process and closes the output.

func (*ShardCloner) Step added in v0.2.0

func (s *ShardCloner) Step(ctx context.Context) (bool, error)

Step runs one step of the cloning process. Returns false when cloning is complete.

type ShardInfo added in v0.2.0

type ShardInfo struct {
	ShardgroupSize uint16
	ShardIndex     uint16
	DocCount       uint64
}

ShardInfo holds info about a healthy shard as a source for cloning.

type Stats

type Stats struct {
	Spaces []struct {
		Name  string
		State InterestListState
	}
	CommonTerms []struct {
		Term  string
		Count int
	}
	TotalTerms  int
	UniqueTerms int
	Docs        int
	Stemmer     snowball.Settings
}

Stats holds statistics gathered by GetIndexStats

func GetIndexStats

func GetIndexStats(dbo Database) (Stats, error)

GetIndexStats collects statistics about the index, partly by the use of the fts5vocab virtual table.

type StatusMonitor

type StatusMonitor interface {
	Close()
	GetHealthyShards() []ShardInfo
	ShardInitDone()
}

StatusMonitor communicates worker status with the cluster, and listens to status broadcasts from other workers.

It identifies the shard configurations in the cluster and their corresponding status. The cluster is considered healthy as long as one shard config is healthy.

func StartStatusMonitor

func StartStatusMonitor(nc *nats.Conn, db Database, cfg Config) (StatusMonitor, error)

StartStatusMonitor creates a new StatusMonitor, listening to status broadcasts and broadcasting our status.

type Synonyms added in v0.2.0

type Synonyms struct {
	Description string
	Words       []string
}

Synonyms is a named list of words that are considered synonyms

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL