bleve: github.com/blevesearch/bleve/index/scorch/segment/zap Index | Files

package zap

import "github.com/blevesearch/bleve/index/scorch/segment/zap"

Index

Package Files

build.go contentcoder.go count.go dict.go docvalues.go enumerator.go intcoder.go merge.go new.go posting.go read.go segment.go write.go

Constants

const DocNum1HitFinished = math.MaxUint64
const FSTValEncoding1Hit = uint64(0x8000000000000000)
const FSTValEncodingGeneral = uint64(0x0000000000000000)
const FSTValEncodingMask = uint64(0xc000000000000000)
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8

FooterSize is the size of the footer record in bytes crc + ver + chunk + field offset + stored offset + num docs + docValueOffset

const Type string = "zap"
const Version uint32 = 11

Variables

var DefaultFileMergerBufferSize = 1024 * 1024
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NormBits1Hit = uint64(math.Float32bits(float32(1)))
var ValidateDocFields = func(field document.Field) error {
    return nil
}

ValidateDocFields can be set by applications to perform additional checks on fields in a document being added to a new segment, by default it does nothing. This API is experimental and may be removed at any time.

var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
    return nil
}

ValidateMerge can be set by applications to perform additional checks on a new segment produced by a merge, by default this does nothing. Caller should provide EITHER segments or memSegments, but not both. This API is experimental and may be removed at any time.

func FSTValDecode1Hit Uses

func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64)

func FSTValEncode1Hit Uses

func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64

func Merge Uses

func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
    chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
    [][]uint64, uint64, error)

Merge takes a slice of zap segments and bit masks describing which documents may be dropped, and creates a new segment containing the remaining data. This new segment is built at the specified path, with the provided chunkFactor.

func MergeSegmentBases Uses

func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
    chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
    [][]uint64, uint64, error)

func MergeToWriter Uses

func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
    chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) (
    newDocNums [][]uint64,
    numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
    dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
    err error)

func Open Uses

func Open(path string) (segment.Segment, error)

Open returns a zap impl of a segment

func PersistSegmentBase Uses

func PersistSegmentBase(sb *SegmentBase, path string) error

PersistSegmentBase persists SegmentBase in the zap file format.

func ReadDocValueBoundary Uses

func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64)

ReadDocValueBoundary elicits the start, end offsets from a metaData header slice

type CountHashWriter Uses

type CountHashWriter struct {
    // contains filtered or unexported fields
}

CountHashWriter is a wrapper around a Writer which counts the number of bytes which have been written and computes a crc32 hash

func NewCountHashWriter Uses

func NewCountHashWriter(w io.Writer) *CountHashWriter

NewCountHashWriter returns a CountHashWriter which wraps the provided Writer

func NewCountHashWriterWithStatsReporter Uses

func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter

func (*CountHashWriter) Count Uses

func (c *CountHashWriter) Count() int

Count returns the number of bytes written

func (*CountHashWriter) Sum32 Uses

func (c *CountHashWriter) Sum32() uint32

Sum32 returns the CRC-32 hash of the content written to this writer

func (*CountHashWriter) Write Uses

func (c *CountHashWriter) Write(b []byte) (int, error)

Write writes the provided bytes to the wrapped writer and counts the bytes

type Dictionary Uses

type Dictionary struct {
    // contains filtered or unexported fields
}

Dictionary is the zap representation of the term dictionary

func (*Dictionary) AutomatonIterator Uses

func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
    startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator

AutomatonIterator returns an iterator which only visits terms having the the vellum automaton and start/end key range

func (*Dictionary) Contains Uses

func (d *Dictionary) Contains(key []byte) (bool, error)

func (*Dictionary) Iterator Uses

func (d *Dictionary) Iterator() segment.DictionaryIterator

Iterator returns an iterator for this dictionary

func (*Dictionary) OnlyIterator Uses

func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
    includeCount bool) segment.DictionaryIterator

func (*Dictionary) PostingsList Uses

func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
    prealloc segment.PostingsList) (segment.PostingsList, error)

PostingsList returns the postings list for the specified term

func (*Dictionary) PrefixIterator Uses

func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator

PrefixIterator returns an iterator which only visits terms having the the specified prefix

func (*Dictionary) RangeIterator Uses

func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator

RangeIterator returns an iterator which only visits terms between the start and end terms. NOTE: bleve.index API specifies the end is inclusive.

type DictionaryIterator Uses

type DictionaryIterator struct {
    // contains filtered or unexported fields
}

DictionaryIterator is an iterator for term dictionary

func (*DictionaryIterator) Next Uses

func (i *DictionaryIterator) Next() (*index.DictEntry, error)

Next returns the next entry in the dictionary

type Location Uses

type Location struct {
    // contains filtered or unexported fields
}

Location represents the location of a single occurrence

func (*Location) ArrayPositions Uses

func (l *Location) ArrayPositions() []uint64

ArrayPositions returns the array position vector associated with this occurrence

func (*Location) End Uses

func (l *Location) End() uint64

End returns the end byte offset of this occurrence

func (*Location) Field Uses

func (l *Location) Field() string

Field returns the name of the field (useful in composite fields to know which original field the value came from)

func (*Location) Pos Uses

func (l *Location) Pos() uint64

Pos returns the 1-based phrase position of this occurrence

func (*Location) Size Uses

func (l *Location) Size() int

func (*Location) Start Uses

func (l *Location) Start() uint64

Start returns the start byte offset of this occurrence

type MetaData Uses

type MetaData struct {
    DocNum      uint64 // docNum of the data inside the chunk
    DocDvOffset uint64 // offset of data inside the chunk for the given docid
}

MetaData represents the data information inside a chunk.

type Posting Uses

type Posting struct {
    // contains filtered or unexported fields
}

Posting is a single entry in a postings list

func (*Posting) Frequency Uses

func (p *Posting) Frequency() uint64

Frequency returns the frequencies of occurrence of this term in this doc/field

func (*Posting) Locations Uses

func (p *Posting) Locations() []segment.Location

Locations returns the location information for each occurrence

func (*Posting) Norm Uses

func (p *Posting) Norm() float64

Norm returns the normalization factor for this posting

func (*Posting) Number Uses

func (p *Posting) Number() uint64

Number returns the document number of this posting in this segment

func (*Posting) Size Uses

func (p *Posting) Size() int

type PostingsIterator Uses

type PostingsIterator struct {
    Actual   roaring.IntPeekable
    ActualBM *roaring.Bitmap
    // contains filtered or unexported fields
}

PostingsIterator provides a way to iterate through the postings list

func PostingsIteratorFrom1Hit Uses

func PostingsIteratorFrom1Hit(docNum1Hit, normBits1Hit uint64,
    includeFreqNorm, includeLocs bool) (*PostingsIterator, error)

PostingsIteratorFrom1Hit constructs a PostingsIterator given a 1-hit docNum.

func PostingsIteratorFromBitmap Uses

func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
    includeFreqNorm, includeLocs bool) (*PostingsIterator, error)

PostingsIteratorFromBitmap constructs a PostingsIterator given an "actual" bitmap.

func (*PostingsIterator) Advance Uses

func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error)

Advance returns the posting at the specified docNum or it is not present the next posting, or if the end is reached, nil

func (*PostingsIterator) DocNum1Hit Uses

func (p *PostingsIterator) DocNum1Hit() (uint64, bool)

DocNum1Hit returns the docNum and true if this is "1-hit" optimized and the docNum is available.

func (*PostingsIterator) Next Uses

func (i *PostingsIterator) Next() (segment.Posting, error)

Next returns the next posting on the postings list, or nil at the end

func (*PostingsIterator) Size Uses

func (i *PostingsIterator) Size() int

type PostingsList Uses

type PostingsList struct {
    // contains filtered or unexported fields
}

PostingsList is an in-memory representation of a postings list

func (*PostingsList) Count Uses

func (p *PostingsList) Count() uint64

Count returns the number of items on this postings list

func (*PostingsList) Iterator Uses

func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
    prealloc segment.PostingsIterator) segment.PostingsIterator

Iterator returns an iterator for this postings list

func (*PostingsList) OrInto Uses

func (p *PostingsList) OrInto(receiver *roaring.Bitmap)

func (*PostingsList) Size Uses

func (p *PostingsList) Size() int

type Segment Uses

type Segment struct {
    SegmentBase
    // contains filtered or unexported fields
}

Segment implements a persisted segment.Segment interface, by embedding an mmap()'ed SegmentBase.

func (*Segment) AddRef Uses

func (s *Segment) AddRef()

func (*Segment) CRC Uses

func (s *Segment) CRC() uint32

CRC returns the CRC value stored in the file footer

func (*Segment) ChunkFactor Uses

func (s *Segment) ChunkFactor() uint32

ChunkFactor returns the chunk factor in the file footer

func (*Segment) Close Uses

func (s *Segment) Close() (err error)

Close releases all resources associated with this segment

func (*Segment) Data Uses

func (s *Segment) Data() []byte

Data returns the underlying mmaped data slice

func (*Segment) DecRef Uses

func (s *Segment) DecRef() (err error)

func (*Segment) DictAddr Uses

func (s *Segment) DictAddr(field string) (uint64, error)

DictAddr is a helper function to compute the file offset where the dictionary is stored for the specified field.

func (*Segment) DocValueOffset Uses

func (s *Segment) DocValueOffset() uint64

DocValueOffset returns the docValue offset in the file footer

func (*Segment) FieldsIndexOffset Uses

func (s *Segment) FieldsIndexOffset() uint64

FieldsIndexOffset returns the fields index offset in the file footer

func (*Segment) NumDocs Uses

func (s *Segment) NumDocs() uint64

NumDocs returns the number of documents in the file footer

func (*Segment) Path Uses

func (s *Segment) Path() string

Path returns the path of this segment on disk

func (*Segment) Size Uses

func (s *Segment) Size() int

func (*Segment) StoredIndexOffset Uses

func (s *Segment) StoredIndexOffset() uint64

StoredIndexOffset returns the stored value index offset in the file footer

func (*Segment) Version Uses

func (s *Segment) Version() uint32

Version returns the file version in the file footer

type SegmentBase Uses

type SegmentBase struct {
    // contains filtered or unexported fields
}

SegmentBase is a memory only, read-only implementation of the segment.Segment interface, using zap's data representation.

func AnalysisResultsToSegmentBase Uses

func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
    chunkFactor uint32) (*SegmentBase, uint64, error)

AnalysisResultsToSegmentBase produces an in-memory zap-encoded SegmentBase from analysis results

func InitSegmentBase Uses

func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
    fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
    storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
    dictLocs []uint64) (*SegmentBase, error)

func (*SegmentBase) AddRef Uses

func (sb *SegmentBase) AddRef()

func (*SegmentBase) Close Uses

func (sb *SegmentBase) Close() (err error)

func (*SegmentBase) Count Uses

func (s *SegmentBase) Count() uint64

Count returns the number of documents in this segment.

func (*SegmentBase) DecRef Uses

func (sb *SegmentBase) DecRef() (err error)

func (*SegmentBase) Dictionary Uses

func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)

Dictionary returns the term dictionary for the specified field

func (*SegmentBase) DocID Uses

func (s *SegmentBase) DocID(num uint64) ([]byte, error)

DocID returns the value of the _id field for the given docNum

func (*SegmentBase) DocNumbers Uses

func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)

DocNumbers returns a bitset corresponding to the doc numbers of all the provided _id strings

func (*SegmentBase) Fields Uses

func (s *SegmentBase) Fields() []string

Fields returns the field names used in this segment

func (*SegmentBase) Size Uses

func (sb *SegmentBase) Size() int

func (*SegmentBase) VisitDocument Uses

func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error

VisitDocument invokes the DocFieldValueVistor for each stored field for the specified doc number

func (*SegmentBase) VisitDocumentFieldTerms Uses

func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
    visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
    segment.DocVisitState, error)

VisitDocumentFieldTerms is an implementation of the DocumentFieldTermVisitable interface

func (*SegmentBase) VisitableDocValueFields Uses

func (s *SegmentBase) VisitableDocValueFields() ([]string, error)

VisitableDocValueFields returns the list of fields with persisted doc value terms ready to be visitable using the VisitDocumentFieldTerms method.

Package zap imports 21 packages (graph) and is imported by 10 packages. Updated 2019-08-21. Refresh now. Tools for package owners.