index

package module
v1.1.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 2, 2024 License: Apache-2.0 Imports: 6 Imported by: 141

README

Bleve Index API

PkgGoDev Tests Lint

Bleve supports a pluggable Index interface.

By placing these interfaces in their own, hopefully slowly evolving module, it frees up Bleve and the underlying index to each introduce new major versions without interfering with one another.

With that in mind, we anticipate introducing non-breaking changes only to this module, and keeping the major version at 1.x for some time.

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AnalysisWorker

func AnalysisWorker(q AnalysisQueue)

Types

type AnalysisQueue

type AnalysisQueue struct {
	// contains filtered or unexported fields
}

func NewAnalysisQueue

func NewAnalysisQueue(numWorkers int) *AnalysisQueue

func (*AnalysisQueue) Close

func (q *AnalysisQueue) Close()

func (*AnalysisQueue) Queue

func (q *AnalysisQueue) Queue(work AnalysisWork)

type AnalysisWork

type AnalysisWork func()

type Batch

type Batch struct {
	IndexOps    map[string]Document
	InternalOps map[string][]byte
	// contains filtered or unexported fields
}

func NewBatch

func NewBatch() *Batch

func (*Batch) Delete

func (b *Batch) Delete(id string)

func (*Batch) DeleteInternal

func (b *Batch) DeleteInternal(key []byte)

func (*Batch) Merge

func (b *Batch) Merge(o *Batch)

func (*Batch) PersistedCallback

func (b *Batch) PersistedCallback() BatchCallback

func (*Batch) Reset

func (b *Batch) Reset()

func (*Batch) SetInternal

func (b *Batch) SetInternal(key, val []byte)

func (*Batch) SetPersistedCallback

func (b *Batch) SetPersistedCallback(f BatchCallback)

func (*Batch) String

func (b *Batch) String() string

func (*Batch) TotalDocSize

func (b *Batch) TotalDocSize() int

func (*Batch) Update

func (b *Batch) Update(doc Document)

type BatchCallback

type BatchCallback func(error)

type BooleanField added in v0.0.6

type BooleanField interface {
	Boolean() (bool, error)
}

type CompositeField

type CompositeField interface {
	Field

	Compose(field string, length int, freq TokenFrequencies)
}

type CompositeFieldVisitor

type CompositeFieldVisitor func(field CompositeField)

type DateTimeField added in v0.0.6

type DateTimeField interface {
	DateTime() (time.Time, string, error)
}

type DictEntry

type DictEntry struct {
	Term  string
	Count uint64
}

type Directory added in v1.0.1

type Directory interface {
	GetWriter(filePath string) (io.WriteCloser, error)
}

type DocIDReader

type DocIDReader interface {
	// Next returns the next document internal identifier in the natural
	// index order, nil when the end of the sequence is reached.
	Next() (IndexInternalID, error)

	// Advance resets the iteration to the first internal identifier greater than
	// or equal to ID. If ID is smaller than the start of the range, the iteration
	// will start there instead. If ID is greater than or equal to the end of
	// the range, Next() call will return io.EOF.
	Advance(ID IndexInternalID) (IndexInternalID, error)

	Size() int

	Close() error
}

DocIDReader is the interface exposing enumeration of documents identifiers. Close the reader to release associated resources.

type DocValueReader

type DocValueReader interface {
	VisitDocValues(id IndexInternalID, visitor DocValueVisitor) error

	BytesRead() uint64
}

type DocValueVisitor added in v0.0.7

type DocValueVisitor func(field string, term []byte)

type Document

type Document interface {
	ID() string
	Size() int

	VisitFields(visitor FieldVisitor)
	VisitComposite(visitor CompositeFieldVisitor)
	HasComposite() bool

	NumPlainTextBytes() uint64

	AddIDField()

	StoredFieldsBytes() uint64
}

type Field

type Field interface {
	Name() string
	Value() []byte
	ArrayPositions() []uint64

	EncodedFieldType() byte

	Analyze()

	Options() FieldIndexingOptions

	AnalyzedLength() int
	AnalyzedTokenFrequencies() TokenFrequencies

	NumPlainTextBytes() uint64
}

type FieldDict

type FieldDict interface {
	Next() (*DictEntry, error)
	Close() error

	BytesRead() uint64
}

type FieldDictContains

type FieldDictContains interface {
	Contains(key []byte) (bool, error)

	BytesRead() uint64
}

type FieldIndexingOptions added in v0.0.5

type FieldIndexingOptions int
const (
	IndexField FieldIndexingOptions = 1 << iota
	StoreField
	IncludeTermVectors
	DocValues
	SkipFreqNorm
)

func (FieldIndexingOptions) IncludeDocValues added in v0.0.5

func (o FieldIndexingOptions) IncludeDocValues() bool

func (FieldIndexingOptions) IncludeTermVectors added in v0.0.5

func (o FieldIndexingOptions) IncludeTermVectors() bool

func (FieldIndexingOptions) IsIndexed added in v0.0.5

func (o FieldIndexingOptions) IsIndexed() bool

func (FieldIndexingOptions) IsStored added in v0.0.5

func (o FieldIndexingOptions) IsStored() bool

func (FieldIndexingOptions) SkipFreqNorm added in v0.0.8

func (o FieldIndexingOptions) SkipFreqNorm() bool

func (FieldIndexingOptions) String added in v0.0.5

func (o FieldIndexingOptions) String() string

type FieldVisitor

type FieldVisitor func(Field)

type GeoJSON added in v1.0.2

type GeoJSON interface {
	// Returns the type of geoJSON shape.
	Type() string

	// Checks whether the given shape intersects with current shape.
	Intersects(GeoJSON) (bool, error)

	// Checks whether the given shape resides within the current shape.
	Contains(GeoJSON) (bool, error)

	// Value returns the byte value for the shape.
	Value() ([]byte, error)
}

GeoJSON is generic interface for any geoJSON shapes like points, polygon etc.

type GeoPointField added in v0.0.6

type GeoPointField interface {
	Lon() (float64, error)
	Lat() (float64, error)
}

type GeoShapeField added in v1.0.3

type GeoShapeField interface {
	GeoShape() (GeoJSON, error)
}

type Index

type Index interface {
	Open() error
	Close() error

	Update(doc Document) error
	Delete(id string) error
	Batch(batch *Batch) error

	SetInternal(key, val []byte) error
	DeleteInternal(key []byte) error

	// Reader returns a low-level accessor on the index data. Close it to
	// release associated resources.
	Reader() (IndexReader, error)

	StatsMap() map[string]interface{}
}

type IndexBuilder

type IndexBuilder interface {
	Index(doc Document) error
	Close() error
}

IndexBuilder is an interface supported by some index schemes to allow direct write-only index building

type IndexInternalID

type IndexInternalID []byte

IndexInternalID is an opaque document identifier interal to the index impl

func (IndexInternalID) Compare

func (id IndexInternalID) Compare(other IndexInternalID) int

func (IndexInternalID) Equals

func (id IndexInternalID) Equals(other IndexInternalID) bool

type IndexReader

type IndexReader interface {
	TermFieldReader(ctx context.Context, term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)

	// DocIDReader returns an iterator over all doc ids
	// The caller must close returned instance to release associated resources.
	DocIDReaderAll() (DocIDReader, error)

	DocIDReaderOnly(ids []string) (DocIDReader, error)

	FieldDict(field string) (FieldDict, error)

	// FieldDictRange is currently defined to include the start and end terms
	FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error)
	FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)

	Document(id string) (Document, error)

	DocValueReader(fields []string) (DocValueReader, error)

	Fields() ([]string, error)

	GetInternal(key []byte) ([]byte, error)

	DocCount() (uint64, error)

	ExternalID(id IndexInternalID) (string, error)
	InternalID(id string) (IndexInternalID, error)

	Close() error
}

type IndexReaderContains

type IndexReaderContains interface {
	FieldDictContains(field string) (FieldDictContains, error)
}

type IndexReaderFuzzy

type IndexReaderFuzzy interface {
	FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
}

type IndexReaderRegexp

type IndexReaderRegexp interface {
	FieldDictRegexp(field string, regex string) (FieldDict, error)
}

type NumericField added in v0.0.6

type NumericField interface {
	Number() (float64, error)
}

type Optimizable

type Optimizable interface {
	Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
}

Optimizable represents an optional interface that implementable by optimizable resources (e.g., TermFieldReaders, Searchers). These optimizable resources are provided the same OptimizableContext instance, so that they can coordinate via dynamic interface casting.

type OptimizableContext

type OptimizableContext interface {
	// Once all the optimzable resources have been provided the same
	// OptimizableContext instance, the optimization preparations are
	// finished or completed via the Finish() method.
	//
	// Depending on the optimization being performed, the Finish()
	// method might return a non-nil Optimized instance.  For example,
	// the Optimized instance might represent an optimized
	// TermFieldReader instance.
	Finish() (Optimized, error)
}

type Optimized

type Optimized interface{}

Represents a result of optimization -- see the Finish() method.

type SpatialAnalyzerPlugin added in v1.0.2

type SpatialAnalyzerPlugin interface {
	// Type returns the plugin type. eg: "s2".
	Type() string

	// GetIndexTokens returns the tokens to be indexed for the
	// given GeoJSON type data in the document.
	GetIndexTokens(GeoJSON) []string

	// GetQueryTokens returns the tokens to be queried for the
	// given GeoJSON type data in the document.
	GetQueryTokens(GeoJSON) []string
}

SpatialAnalyzerPlugin is an interface for the custom spatial tokenizer implementations that supports the generation of spatial hierarchial tokens for both indexing and querying of geoJSON data.

type SpatialIndexPlugin added in v1.0.2

type SpatialIndexPlugin interface {
	GetSpatialAnalyzerPlugin(typ string) (SpatialAnalyzerPlugin, error)
}

SpatialIndexPlugin is an optional interface for exposing the support for any custom analyzer plugins that are capable of generating hierarchial spatial tokens for both indexing and query purposes from the geo location data.

type TermFieldDoc

type TermFieldDoc struct {
	Term    string
	ID      IndexInternalID
	Freq    uint64
	Norm    float64
	Vectors []*TermFieldVector
}

func (*TermFieldDoc) Reset

func (tfd *TermFieldDoc) Reset() *TermFieldDoc

Reset allows an already allocated TermFieldDoc to be reused

func (*TermFieldDoc) Size

func (tfd *TermFieldDoc) Size() int

type TermFieldReader

type TermFieldReader interface {
	// Next returns the next document containing the term in this field, or nil
	// when it reaches the end of the enumeration.  The preAlloced TermFieldDoc
	// is optional, and when non-nil, will be used instead of allocating memory.
	Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)

	// Advance resets the enumeration at specified document or its immediate
	// follower.
	Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)

	// Count returns the number of documents contains the term in this field.
	Count() uint64
	Close() error

	Size() int
}

TermFieldReader is the interface exposing the enumeration of documents containing a given term in a given field. Documents are returned in byte lexicographic order over their identifiers.

type TermFieldVector

type TermFieldVector struct {
	Field          string
	ArrayPositions []uint64
	Pos            uint64
	Start          uint64
	End            uint64
}

func (*TermFieldVector) Size

func (tfv *TermFieldVector) Size() int

type TextField added in v0.0.6

type TextField interface {
	Text() string
}

type TokenFreq

type TokenFreq struct {
	Term      []byte
	Locations []*TokenLocation
	// contains filtered or unexported fields
}

TokenFreq represents all the occurrences of a term in all fields of a document.

func (*TokenFreq) Frequency

func (tf *TokenFreq) Frequency() int

func (*TokenFreq) SetFrequency

func (tf *TokenFreq) SetFrequency(frequency int)

func (*TokenFreq) Size

func (tf *TokenFreq) Size() int

type TokenFrequencies

type TokenFrequencies map[string]*TokenFreq

TokenFrequencies maps document terms to their combined frequencies from all fields.

func (TokenFrequencies) MergeAll

func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies)

func (TokenFrequencies) Size

func (tfs TokenFrequencies) Size() int

type TokenLocation

type TokenLocation struct {
	Field          string
	ArrayPositions []uint64
	Start          int
	End            int
	Position       int
}

TokenLocation represents one occurrence of a term at a particular location in a field. Start, End and Position have the same meaning as in analysis.Token. Field and ArrayPositions identify the field value in the source document. See document.Field for details.

func (*TokenLocation) Size

func (tl *TokenLocation) Size() int

type TokenizableSpatialField added in v1.0.2

type TokenizableSpatialField interface {
	// SetSpatialAnalyzerPlugin lets the index implementations to
	// initialise relevant spatial analyzer plugins for the field
	// to override the spatial token generations during the analysis phase.
	SetSpatialAnalyzerPlugin(SpatialAnalyzerPlugin)
}

TokenizableSpatialField is an optional interface for fields that supports pluggable custom hierarchial spatial token generation.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL