engine

package
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 15, 2021 License: MIT Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const (
	XmlStreamBufferSize = 1024 * 1024 * 1 // 1MB
	DocumentCapacity    = 524288          // 2^19
	PageSize            = 25
)

Variables

This section is empty.

Functions

func GetNumberOfPages

func GetNumberOfPages(total int, pageSize int) int

Types

type FilterInterface

type FilterInterface interface {
	Lowercase(tokens []string) []string
	RemoveStopWords(tokens []string) []string
}

type Filterer

type Filterer struct {
	StopWords map[string]int
}

func NewFilterer

func NewFilterer() *Filterer

func (*Filterer) Lowercase

func (f *Filterer) Lowercase(tokens []string) []string

func (*Filterer) RemoveStopWords

func (f *Filterer) RemoveStopWords(tokens []string) []string

type Indexer

type Indexer struct {
	Data       map[uint32]WikiXMLDoc
	Indexes    map[string]*roaring.Bitmap
	Tokenizer  *Tokenizer
	Filterer   *Filterer
	Stemmer    *Stemmer
	Mutex      sync.Mutex
	Cores      int
	Multiplier int
}

func NewIndexer

func NewIndexer() *Indexer

func (*Indexer) AddIndex

func (i *Indexer) AddIndex(tokens []string, index uint32)

func (*Indexer) AddIndexesAsync

func (i *Indexer) AddIndexesAsync(documents []WikiXMLDoc, wg *sync.WaitGroup)

func (*Indexer) Analyze

func (i *Indexer) Analyze(s string) []string

func (*Indexer) DownloadWikimediaDump

func (i *Indexer) DownloadWikimediaDump(path string, url string) error

func (*Indexer) IsFileExists

func (i *Indexer) IsFileExists(path string) bool

func (*Indexer) LoadDataDump

func (i *Indexer) LoadDataDump(path string) error

func (*Indexer) LoadIndexDump

func (i *Indexer) LoadIndexDump(path string) error

func (*Indexer) LoadWikimediaDump

func (i *Indexer) LoadWikimediaDump(path string, save bool, indexPath string, dataPath string) error

func (*Indexer) SaveDataDump

func (i *Indexer) SaveDataDump(path string) error

func (*Indexer) SaveIndexDump

func (i *Indexer) SaveIndexDump(path string) error

func (*Indexer) Search

func (i *Indexer) Search(s string, page uint32) SearchResults

func (*Indexer) UncompressWikimediaDump

func (i *Indexer) UncompressWikimediaDump(path string) error

type IndexerInterface

type IndexerInterface interface {
	DownloadWikimediaDump(path string, url string) error
	UncompressWikimediaDump(path string) error
	LoadWikimediaDump(path string, save bool, indexPath string, dataPath string) error
	LoadIndexDump(path string) error
	LoadDataDump(path string) error
	SaveIndexDump(path string) error
	SaveDataDump(path string) error
	IsFileExists(path string) bool
	Analyze(s string) []string
	AddIndex(tokens []string, index uint32)
	AddIndexesAsync(documents []WikiXMLDoc, wg *sync.WaitGroup)
	Search(s string) SearchResults
}

type Processed

type Processed struct {
	Duration float64 `json:"time"`
	Unit     string  `json:"unit"`
}

type SearchResult

type SearchResult struct {
	Url      string  `json:"url"`
	Rank     float64 `json:"rank"`
	Title    string  `json:"title"`
	Abstract string  `json:"abstract"`
}

func SliceSearchResults

func SliceSearchResults(results []SearchResult, currentPage int) []SearchResult

type SearchResults

type SearchResults struct {
	Processed       Processed      `json:"processed"`
	NumberOfResults int            `json:"number_of_results"`
	CurrentPage     int            `json:"current_page"`
	NumberOfPages   int            `json:"number_of_pages"`
	Results         []SearchResult `json:"results"`
}

type Stemmer

type Stemmer struct{}

func NewStemmer

func NewStemmer() *Stemmer

func (*Stemmer) Stem

func (s *Stemmer) Stem(tokens []string) []string

type StemmerInterface

type StemmerInterface interface {
	Stem(tokens []string) []string
}

type Tokenizer

type Tokenizer struct{}

func NewTokenizer

func NewTokenizer() *Tokenizer

func (*Tokenizer) Tokenize

func (t *Tokenizer) Tokenize(s string) []string

type TokenizerInterface

type TokenizerInterface interface {
	Tokenize(s string) []string
}

type WikiXMLDoc

type WikiXMLDoc struct {
	Index    uint32 `xml:"index" json:"index"`
	Title    string `xml:"title" json:"title"`
	Url      string `xml:"url" json:"url"`
	Abstract string `xml:"abstract" json:"abstract"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL