Documentation ¶
Index ¶
- Constants
- func Index(c config.Config)
- func Merge(srcPath, dstPath string)
- func MergeAll(c config.Config, files []string)
- func Remove(dir string, reg *regexp.Regexp) error
- func Spilt(c config.Config, filePrefix string) (files []string)
- func Walk(dir string, re *regexp.Regexp) ([]string, error)
- type DoubleBuffer
- func (b *DoubleBuffer) Add(doc index.Document)
- func (b *DoubleBuffer) Clear()
- func (b *DoubleBuffer) DoAdd()
- func (b *DoubleBuffer) DoFlush()
- func (b *DoubleBuffer) Flush()
- func (b *DoubleBuffer) ReadIndex() *index.HashMapIndex
- func (b *DoubleBuffer) Start() chan Message
- func (b *DoubleBuffer) Stop()
- func (b *DoubleBuffer) WithDataRange(timestamp int64) *DoubleBuffer
- type IndexArray
- func (b *IndexArray) Add(idx *index.BTreeIndex)
- func (b *IndexArray) Evict(dr index.DataRange) []*index.BTreeIndex
- func (b *IndexArray) Hit(dr index.DataRange) *index.BTreeIndex
- func (b *IndexArray) Indices() []*index.BTreeIndex
- func (b *IndexArray) Swap(old *index.BTreeIndex, new *index.BTreeIndex) bool
- func (b *IndexArray) WithFile(file string) *IndexArray
- type IndexType
- type Indexer
- type Message
- type MsgType
- type Searcher
- func (srh *Searcher) Add(doc index.Document)
- func (srh *Searcher) Clear()
- func (srh *Searcher) Count() int
- func (srh *Searcher) Del(doc index.Document)
- func (srh *Searcher) Drain(timestamp int)
- func (srh *Searcher) Filter(docs []index.Doc) []index.Doc
- func (srh *Searcher) InitParaphrase(file string)
- func (srh *Searcher) Load(file string, flag IndexType)
- func (srh *Searcher) Paraphrase(texts []string, n int) []string
- func (srh *Searcher) Retrieval(terms []string, ext []string, model index.SearchModel) []index.Doc
- func (srh *Searcher) Search(query string) []index.Doc
- func (srh *Searcher) SearchTips() []string
Constants ¶
View Source
const SpiltThresholdDocNum int = 50000
Variables ¶
This section is empty.
Functions ¶
Types ¶
type DoubleBuffer ¶
type DoubleBuffer struct { CurrentIdx uint32 //current write index Indices []*index.HashMapIndex Queues []chan index.Document // contains filtered or unexported fields }
func NewDoubleBuffer ¶
func NewDoubleBuffer() *DoubleBuffer
func (*DoubleBuffer) Add ¶
func (b *DoubleBuffer) Add(doc index.Document)
func (*DoubleBuffer) Clear ¶
func (b *DoubleBuffer) Clear()
func (*DoubleBuffer) DoAdd ¶
func (b *DoubleBuffer) DoAdd()
func (*DoubleBuffer) Flush ¶
func (b *DoubleBuffer) Flush()
func (*DoubleBuffer) ReadIndex ¶
func (b *DoubleBuffer) ReadIndex() *index.HashMapIndex
func (*DoubleBuffer) Start ¶
func (b *DoubleBuffer) Start() chan Message
func (*DoubleBuffer) Stop ¶
func (b *DoubleBuffer) Stop()
func (*DoubleBuffer) WithDataRange ¶
func (b *DoubleBuffer) WithDataRange(timestamp int64) *DoubleBuffer
type IndexArray ¶
type IndexArray struct {
// contains filtered or unexported fields
}
func NewIndexArray ¶
func NewIndexArray() *IndexArray
func (*IndexArray) Add ¶
func (b *IndexArray) Add(idx *index.BTreeIndex)
func (*IndexArray) Evict ¶
func (b *IndexArray) Evict(dr index.DataRange) []*index.BTreeIndex
Evict 淘汰dr范围内的index
func (*IndexArray) Hit ¶
func (b *IndexArray) Hit(dr index.DataRange) *index.BTreeIndex
Hit 查找包含dr的index
func (*IndexArray) Indices ¶
func (b *IndexArray) Indices() []*index.BTreeIndex
func (*IndexArray) Swap ¶
func (b *IndexArray) Swap(old *index.BTreeIndex, new *index.BTreeIndex) bool
func (*IndexArray) WithFile ¶
func (b *IndexArray) WithFile(file string) *IndexArray
type Searcher ¶
type Searcher struct {
// contains filtered or unexported fields
}
func NewSearcher ¶
func (*Searcher) Drain ¶
Drain incremental index to disk 实际的原地更新策略,需要PostingList末尾预留足够空间,否则大量PostingList需要移动效率更低 磁盘空间足够时使用再合并策略,实现简单且不影响并发,但需要足够的内存
func (*Searcher) InitParaphrase ¶
func (*Searcher) Search ¶
Search queries the index for the given text. todo: 检索召回(多路召回) -> 粗排sort(CTR by LR) -> 精排sort(CVR by DNN) -> topN(堆排序)
func (*Searcher) SearchTips ¶
SearchTips todo: 支持搜索提示 Trie 适合英文词典,如果系统中存在大量字符串且这些字符串基本没有公共前缀,则相应的trie树将非常消耗内存(数据结构之trie树) Double Array Trie 适合做中文词典,内存占用小
Click to show internal directories.
Click to hide internal directories.