suggest

package
v0.0.0-...-3b44145 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 11, 2021 License: MIT Imports: 18 Imported by: 0

Documentation

Overview

Package suggest provides fuzzy search and autocomplete functionality

Example

This example demonstrates how to use this package.

package main

import (
	"fmt"
	"log"

	"github.com/suggest-go/suggest/pkg/dictionary"
	"github.com/suggest-go/suggest/pkg/metric"
	"github.com/suggest-go/suggest/pkg/suggest"
)

func main() {
	// we create InMemoryDictionary. Here we can use anything we want,
	// for example SqlDictionary, CDBDictionary and so on
	dict := dictionary.NewInMemoryDictionary([]string{
		"Nissan March",
		"Nissan Juke",
		"Nissan Maxima",
		"Nissan Murano",
		"Nissan Note",
		"Toyota Mark II",
		"Toyota Corolla",
		"Toyota Corona",
	})

	// describe index configuration
	indexDescription := suggest.IndexDescription{
		Name:      "cars",                   // name of the dictionary
		NGramSize: 3,                        // size of the nGram
		Wrap:      [2]string{"$", "$"},      // wrap symbols (front and rear)
		Pad:       "$",                      // pad to replace with forbidden chars
		Alphabet:  []string{"english", "$"}, // alphabet of allowed chars (other chars will be replaced with pad symbol)
	}

	// create runtime search index builder
	builder, err := suggest.NewRAMBuilder(dict, indexDescription)

	if err != nil {
		log.Fatalf("Unexpected error: %v", err)
	}

	service := suggest.NewService()

	// asking our service for adding a new search index with given configuration
	if err := service.AddIndex(indexDescription.Name, dict, builder); err != nil {
		log.Fatalf("Unexpected error: %v", err)
	}

	// declare a search configuration (query, topK elements, type of metric, min similarity)
	searchConf, err := suggest.NewSearchConfig("niss ma", 5, metric.CosineMetric(), 0.4)

	if err != nil {
		log.Fatalf("Unexpected error: %v", err)
	}

	result, err := service.Suggest("cars", searchConf)

	if err != nil {
		log.Fatalf("Unexpected error: %v", err)
	}

	values := make([]string, 0, len(result))

	for _, item := range result {
		values = append(values, item.Value)
	}

	fmt.Println(values)
}
Output:

[Nissan Maxima Nissan March]

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func Index

func Index(
	directory store.Directory,
	dict dictionary.Dictionary,
	config index.WriterConfig,
	tokenizer analysis.Tokenizer,
) error

Index builds a search index by using the given config and the dictionary and persists it the directory

func NewAutocompleteTokenizer

func NewAutocompleteTokenizer(d IndexDescription) analysis.Tokenizer

NewAutocompleteTokenizer creates a tokenizer for autocomplete service

func NewSuggestTokenizer

func NewSuggestTokenizer(d IndexDescription) analysis.Tokenizer

NewSuggestTokenizer creates a tokenizer for suggester service

Types

type Autocomplete

type Autocomplete interface {
	// Autocomplete returns candidates where the query string is a substring of each candidate
	Autocomplete(query string, factory CollectorManagerFactory) ([]Candidate, error)
}

Autocomplete provides autocomplete functionality for candidates search

func NewAutocomplete

func NewAutocomplete(
	indices index.InvertedIndexIndices,
	searcher index.Searcher,
	tokenizer analysis.Tokenizer,
) Autocomplete

NewAutocomplete creates a new instance of Autocomplete

type Builder

type Builder interface {
	// Build configures and returns a new instance of NGramIndex
	Build() (NGramIndex, error)
}

Builder is the entity that is responsible for tuning and creating a NGramIndex

func NewBuilder

func NewBuilder(directory store.Directory, description IndexDescription) (Builder, error)

NewBuilder works with already indexed data

func NewFSBuilder

func NewFSBuilder(description IndexDescription) (Builder, error)

NewFSBuilder works with already indexed data

func NewRAMBuilder

func NewRAMBuilder(dict dictionary.Dictionary, description IndexDescription) (Builder, error)

NewRAMBuilder creates a search index by using the given dictionary and the index description in a RAMDriver directory

type Candidate

type Candidate struct {
	// Key is a position (docId) in posting list
	Key index.Position
	// Score is a float64 number that represents a score of a document
	Score float64
}

Candidate is an item of Collector

func (Candidate) Less

func (c Candidate) Less(o Candidate) bool

Less tells is the given candidate is less that the provided

type Collector

type Collector interface {
	merger.Collector
	// SetScorer sets a scorer before collection starts
	SetScorer(scorer Scorer)
}

Collector collects the doc stream satisfied to a search criteria

type CollectorManager

type CollectorManager interface {
	// Create creates a new collector that will be used for a search segment
	Create() Collector
	// Collect returns back the given collectors.
	Collect(collectors ...Collector) error
	// GetCandidates returns currently collected candidates.
	GetCandidates() []Candidate
}

CollectorManager is responsible for creating collectors and reducing them into the result set

type CollectorManagerFactory

type CollectorManagerFactory func() CollectorManager

CollectorManagerFactory is a factory method for creating a new instance of CollectorManager.

type Driver

type Driver string

Driver represents storage type of an inverted index

const (
	// RAMDriver means that an inverted index is stored in RAM
	RAMDriver Driver = "RAM"
	// DiscDriver means that an inverted index is stored on FS and was indexed before
	DiscDriver Driver = "DISC"
)

type FirstKCollectorManager

type FirstKCollectorManager struct {
	// contains filtered or unexported fields
}

FirstKCollectorManager represents first k collector manager.

func NewFirstKCollectorManager

func NewFirstKCollectorManager(limit int, queue TopKQueue) *FirstKCollectorManager

NewFirstKCollectorManager creates a new instance of CollectorManager with firstK collectors

func (*FirstKCollectorManager) Collect

func (m *FirstKCollectorManager) Collect(collectors ...Collector) error

Collect returns back the given collectors.

func (*FirstKCollectorManager) Create

func (m *FirstKCollectorManager) Create() Collector

Create creates a new collector that will be used for a search segment

func (*FirstKCollectorManager) GetCandidates

func (m *FirstKCollectorManager) GetCandidates() []Candidate

GetCandidates returns currently collected candidates.

type FuzzyCollectorManager

type FuzzyCollectorManager struct {
	// contains filtered or unexported fields
}

FuzzyCollectorManager represents fuzzy collector manager.

func NewFuzzyCollectorManager

func NewFuzzyCollectorManager(queueFactory func() TopKQueue) *FuzzyCollectorManager

NewFuzzyCollectorManager creates a new instance of FuzzyCollectorManager.

func (*FuzzyCollectorManager) Collect

func (m *FuzzyCollectorManager) Collect(collectors ...Collector) error

Collect returns back the given collectors.

func (*FuzzyCollectorManager) Create

func (m *FuzzyCollectorManager) Create() Collector

Create creates a new collector that will be used for a search segment

func (*FuzzyCollectorManager) GetCandidates

func (m *FuzzyCollectorManager) GetCandidates() []Candidate

GetCandidates returns currently collected candidates.

func (*FuzzyCollectorManager) GetLowestScore

func (m *FuzzyCollectorManager) GetLowestScore() float64

GetLowestScore returns the lowest collected score.

type IndexDescription

type IndexDescription struct {
	Driver     Driver    `json:"driver"`
	Name       string    `json:"name"`
	NGramSize  int       `json:"nGramSize"`
	SourcePath string    `json:"source"`
	OutputPath string    `json:"output"`
	Alphabet   []string  `json:"alphabet"`
	Pad        string    `json:"pad"`
	Wrap       [2]string `json:"wrap"`
	// contains filtered or unexported fields
}

IndexDescription is config for NgramIndex structure

func ReadConfigs

func ReadConfigs(configPath string) ([]IndexDescription, error)

ReadConfigs reads and returns a list of IndexDescription from the given reader

func (*IndexDescription) GetDictionaryFile

func (d *IndexDescription) GetDictionaryFile() string

GetDictionaryFile returns a path to a dictionary file from the configuration

func (*IndexDescription) GetIndexPath

func (d *IndexDescription) GetIndexPath() string

GetIndexPath returns a output path of the built index

func (*IndexDescription) GetIndexTokenizer

func (d *IndexDescription) GetIndexTokenizer() analysis.Tokenizer

GetIndexTokenizer returns a tokenizer for indexing

func (*IndexDescription) GetSourcePath

func (d *IndexDescription) GetSourcePath() string

GetSourcePath returns a source path of the index description

func (*IndexDescription) GetWriterConfig

func (d *IndexDescription) GetWriterConfig() index.WriterConfig

GetWriterConfig creates and returns IndexWriter config from the given index description

type NGramIndex

type NGramIndex interface {
	Suggester
	Autocomplete
}

NGramIndex is the interface that provides the access to approximate string search and autocomplete

func NewNGramIndex

func NewNGramIndex(suggester Suggester, autocomplete Autocomplete) NGramIndex

NewNGramIndex creates a new instance of NGramIndex

type ResultItem

type ResultItem struct {
	// Score is a float64 value of a candidate
	Score float64
	// Value is a string value of candidate
	Value string
}

ResultItem represents element of top-k similar strings in dictionary for given query

type Scorer

type Scorer interface {
	// Score returns the score of the given candidate
	Score(position merger.MergeCandidate) float64
}

Scorer is responsible for scoring an index position

func NewMetricScorer

func NewMetricScorer(metric metric.Metric, sizeA, sizeB int) Scorer

NewMetricScorer creates a new scorer that uses metric as a score value

type SearchConfig

type SearchConfig struct {
	// contains filtered or unexported fields
}

SearchConfig is a config for NGramIndex Suggest method

func NewSearchConfig

func NewSearchConfig(query string, topK int, metric metric.Metric, similarity float64) (SearchConfig, error)

NewSearchConfig returns new instance of SearchConfig

type Service

type Service struct {
	sync.RWMutex
	// contains filtered or unexported fields
}

Service provides methods for autocomplete and topK approximate string search

func NewService

func NewService() *Service

NewService creates an empty SuggestService

func (*Service) AddIndex

func (s *Service) AddIndex(name string, dict dictionary.Dictionary, builder Builder) error

AddIndex adds an index with the given name, dictionary and builder

func (*Service) AddIndexByDescription

func (s *Service) AddIndexByDescription(description IndexDescription) error

AddIndexByDescription adds a new search index with given description

func (*Service) AddOnDiscIndex

func (s *Service) AddOnDiscIndex(description IndexDescription) error

AddOnDiscIndex adds a new DISC search index with the given description

func (*Service) AddRunTimeIndex

func (s *Service) AddRunTimeIndex(description IndexDescription) error

AddRunTimeIndex adds a new RAM search index with the given description

func (*Service) Autocomplete

func (s *Service) Autocomplete(dictName string, query string, limit int) ([]ResultItem, error)

Autocomplete returns limit candidates where the query string is a prefix of each candidate

func (*Service) GetDictionaries

func (s *Service) GetDictionaries() []string

GetDictionaries returns the managed list of dictionaries

func (*Service) Suggest

func (s *Service) Suggest(dictName string, config SearchConfig) ([]ResultItem, error)

Suggest returns Top-k approximate strings for the given query in the dict

type Suggester

type Suggester interface {
	// Suggest returns top-k similar candidates
	Suggest(query string, similarity float64, metric metric.Metric, factory CollectorManagerFactory) ([]Candidate, error)
}

Suggester is the interface that provides the access to approximate string search

func NewSuggester

func NewSuggester(
	indices index.InvertedIndexIndices,
	searcher index.Searcher,
	tokenizer analysis.Tokenizer,
) Suggester

NewSuggester returns a new Suggester instance

type TopKQueue

type TopKQueue interface {
	// Add adds item with given position and distance to collection if item belongs to `top k items`
	Add(candidate index.Position, score float64)
	// GetLowestScore returns the lowest score of the collected candidates. If collection is empty, 0 will be returned
	GetLowestScore() float64
	// CanTakeWithScore returns true if a candidate with the given score can be accepted
	CanTakeWithScore(score float64) bool
	// IsFull tells if selector has collected `top k elements`
	IsFull() bool
	// GetCandidates returns `top k items`
	GetCandidates() []Candidate
	// Merge merges the given queue with the current
	Merge(other TopKQueue)
	// Reset resets the given queue with the provided topK
	Reset(topK int)
}

TopKQueue is an accumulator that selects the "top k" elements added to it

func NewTopKQueue

func NewTopKQueue(topK int) TopKQueue

NewTopKQueue returns instance of TopKQueue

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL