index

package

v0.0.0-...-f9ccc49 Latest Latest Go to latest Published: Feb 17, 2015 License: Apache-2.0 Imports: 27 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/balzaczyy/hamlet

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func GenerationFromSegmentsFileName(fileName string) int64
func GetMultiFields(r IndexReader) Fields
func GetMultiTerms(r IndexReader, field string) Terms
func IsIndexExists(directory store.Directory) (ok bool, err error)
func IsIndexFileExists(files []string) bool
func LastCommitGeneration(files []string) int64
func MergeTriggerName(trigger MergeTrigger) string
func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error)
func SubIndex(n int, leaves []*AtomicReaderContext) int
type ARFieldsReader
type ApplyDeletesResult
type AtomicReader
type AtomicReaderContext
- func (ctx *AtomicReaderContext) Children() []IndexReaderContext
- func (ctx *AtomicReaderContext) Leaves() []*AtomicReaderContext
- func (ctx *AtomicReaderContext) Reader() IndexReader
- func (ctx *AtomicReaderContext) String() string
type AtomicReaderImpl
- func (r *AtomicReaderImpl) Context() IndexReaderContext
- func (r *AtomicReaderImpl) DocCount(field string) (n int, err error)
- func (r *AtomicReaderImpl) DocFreq(term *Term) (int, error)
- func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error)
- func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error)
- func (r *AtomicReaderImpl) Terms(field string) Terms
- func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error)
type AtomicReaderImplSPI
type BaseCompositeReader
- func (r *BaseCompositeReader) DocCount(field string) int
- func (r *BaseCompositeReader) DocFreq(term *Term) (int, error)
- func (r *BaseCompositeReader) MaxDoc() int
- func (r *BaseCompositeReader) NumDocs() int
- func (r *BaseCompositeReader) SumDocFreq(field string) int64
- func (r *BaseCompositeReader) SumTotalTermFreq(field string) int64
- func (r *BaseCompositeReader) TermVectors(docID int) error
- func (r *BaseCompositeReader) TotalTermFreq(term *Term) int64
- func (r *BaseCompositeReader) VisitDocument(docID int, visitor StoredFieldVisitor) error
type BaseCompositeReaderSPI
type BlockedFlush
type BufferedUpdates
- func (bd *BufferedUpdates) String() string
type BufferedUpdatesStream
- func (ds *BufferedUpdatesStream) RamBytesUsed() int64
type BySizeDescendingSegments
- func (a *BySizeDescendingSegments) Len() int
- func (a *BySizeDescendingSegments) Less(i, j int) bool
- func (a *BySizeDescendingSegments) Swap(i, j int)
type ByteSliceReader
- func (r *ByteSliceReader) ReadByte() (byte, error)
- func (r *ByteSliceReader) ReadBytes(buf []byte) error
type CheckAbort
type CheckAbortNone
type CheckIndex
- func NewCheckIndex(dir store.Directory, crossCheckTermVectors bool, infoStream io.Writer) *CheckIndex
- func (ch *CheckIndex) CheckIndex(onlySegments []string) *CheckIndexStatus
type CheckIndexStatus
type ClosingControl
type CoalescedUpdates
- func (cd *CoalescedUpdates) String() string
type CommitPoint
- func (cp *CommitPoint) Delete()
- func (cp *CommitPoint) Directory() store.Directory
- func (cp *CommitPoint) FileNames() []string
- func (cp *CommitPoint) Generation() int64
- func (cp *CommitPoint) IsDeleted() bool
- func (cp *CommitPoint) SegmentCount() int
- func (cp *CommitPoint) SegmentsFileName() string
- func (cp *CommitPoint) String() string
- func (cp *CommitPoint) UserData() map[string]string
type CompositeReader
type CompositeReaderContext
- func (ctx *CompositeReaderContext) Children() []IndexReaderContext
- func (ctx *CompositeReaderContext) Leaves() []*AtomicReaderContext
- func (ctx *CompositeReaderContext) Reader() IndexReader
- func (ctx *CompositeReaderContext) String() string
type CompositeReaderContextBuilder
type CompositeReaderImpl
- func (r *CompositeReaderImpl) Context() IndexReaderContext
- func (r *CompositeReaderImpl) String() string
type CompositeReaderSPI
type ConcurrentMergeScheduler
- func NewConcurrentMergeScheduler() *ConcurrentMergeScheduler
- func (cms *ConcurrentMergeScheduler) Close() error
- func (cms *ConcurrentMergeScheduler) Merge(writer *IndexWriter, trigger MergeTrigger, newMergesFound bool) error
- func (cms *ConcurrentMergeScheduler) SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount int)
- func (cms *ConcurrentMergeScheduler) String() string
type CoreClosedListener
type DefaultIndexingChain
type DeleteSlice
type DirectoryReader
- func OpenDirectoryReader(directory store.Directory) (r DirectoryReader, err error)
type DirectoryReaderImpl
type DocConsumer
type DocValuesFieldUpdates
type DocValuesFieldUpdatesContainer
- func (c *DocValuesFieldUpdatesContainer) String() string
type DocValuesStatus
type DocValuesUpdate
- func (u *DocValuesUpdate) String() string
type DocValuesWriter
type DocumentsWriter
type DocumentsWriterDeleteQueue
- func (q *DocumentsWriterDeleteQueue) RamBytesUsed() int64
- func (dq *DocumentsWriterDeleteQueue) String() string
type DocumentsWriterFlushControl
- func (fc *DocumentsWriterFlushControl) String() string
type DocumentsWriterFlushQueue
type DocumentsWriterPerThread
- func (w *DocumentsWriterPerThread) String() string
type DocumentsWriterPerThreadPool
- func NewDocumentsWriterPerThreadPool(maxNumThreadStates int) *DocumentsWriterPerThreadPool
type DocumentsWriterStallControl
type Event
type FieldInvertState
- func (st *FieldInvertState) Boost() float32
- func (st *FieldInvertState) Length() int
- func (st *FieldInvertState) Name() string
- func (st *FieldInvertState) NumOverlap() int
type FieldNormStatus
type FindSegmentsFile
- func NewFindSegmentsFile(directory store.Directory, ...) *FindSegmentsFile
type FlushByRamOrCountsPolicy
type FlushPolicy
type FlushPolicyImpl
type FlushPolicyImplSPI
type FlushTicket
type FlushTicketImpl
type FlushedSegment
type FreqProxPostingsArray
type FreqProxTermsWriter
type FreqProxTermsWriterPerField
type FreqProxTermsWriterPerFields
- func (a FreqProxTermsWriterPerFields) Len() int
- func (a FreqProxTermsWriterPerFields) Less(i, j int) bool
- func (a FreqProxTermsWriterPerFields) Swap(i, j int)
type FrozenBufferedUpdates
- func (bd *FrozenBufferedUpdates) String() string
type IndexCommit
type IndexCommits
- func (s IndexCommits) Len() int
- func (s IndexCommits) Less(i, j int) bool
- func (s IndexCommits) Swap(i, j int)
type IndexDeletionPolicy
type IndexFileDeleter
- func (fd *IndexFileDeleter) Close() error
type IndexReader
type IndexReaderContext
- func TopLevelContext(ctx IndexReaderContext) IndexReaderContext
type IndexReaderContextImpl
- func (ctx *IndexReaderContextImpl) Parent() *CompositeReaderContext
type IndexReaderImpl
- func (r *IndexReaderImpl) Close() error
- func (r *IndexReaderImpl) Document(docID int) (doc *docu.Document, err error)
- func (r *IndexReaderImpl) Leaves() []*AtomicReaderContext
type IndexReaderImplSPI
type IndexReaderWarmer
type IndexWriter
- func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error)
- func (w *IndexWriter) AddDocument(doc []IndexableField) error
- func (w *IndexWriter) AddDocumentWithAnalyzer(doc []IndexableField, analyzer analysis.Analyzer) error
- func (w *IndexWriter) Close() error
- func (w *IndexWriter) Commit() error
- func (w *IndexWriter) Directory() store.Directory
- func (w *IndexWriter) MergingSegments() map[*SegmentCommitInfo]bool
- func (w *IndexWriter) Rollback() error
- func (w *IndexWriter) UpdateDocument(term *Term, doc []IndexableField, analyzer analysis.Analyzer) error
type IndexWriterConfig
- func NewIndexWriterConfig(matchVersion util.Version, analyzer analysis.Analyzer) *IndexWriterConfig
- func (conf *IndexWriterConfig) InfoStream() util.InfoStream
- func (conf *IndexWriterConfig) MergePolicy() MergePolicy
- func (conf *IndexWriterConfig) SetIndexDeletionPolicy(delPolicy IndexDeletionPolicy) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetInfoStream(infoStream util.InfoStream) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetMaxBufferedDocs(maxBufferedDocs int) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetMergeScheduler(mergeScheduler MergeScheduler) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetReaderPooling(readerPooling bool) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetReaderTermsIndexDivisor(divisor int) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetSimilarity(similarity Similarity) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetUseCompoundFile(useCompoundFile bool) *IndexWriterConfig
- func (conf *IndexWriterConfig) String() string
type IndexingChain
type IntBlockAllocator
- func (alloc *IntBlockAllocator) Recycle(blocks [][]int)
type KeepOnlyLastCommitDeletionPolicy
- func (p KeepOnlyLastCommitDeletionPolicy) Clone() IndexDeletionPolicy
type LiveIndexWriterConfig
type LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) Codec() Codec
- func (conf *LiveIndexWriterConfigImpl) InfoStream() util.InfoStream
- func (conf *LiveIndexWriterConfigImpl) MaxBufferedDocs() int
- func (conf *LiveIndexWriterConfigImpl) MergePolicy() MergePolicy
- func (conf *LiveIndexWriterConfigImpl) RAMBufferSizeMB() float64
- func (conf *LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB() int
- func (conf *LiveIndexWriterConfigImpl) SetMaxBufferedDocs(maxBufferedDocs int) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetMergePolicy(mergePolicy MergePolicy) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor(divisor int) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetUseCompoundFile(useCompoundFile bool) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) Similarity() Similarity
- func (conf *LiveIndexWriterConfigImpl) String() string
- func (conf *LiveIndexWriterConfigImpl) TermIndexInterval() int
- func (conf *LiveIndexWriterConfigImpl) UseCompoundFile() bool
type LogByteSizeMergePolicy
- func (p *LogByteSizeMergePolicy) Size(info *SegmentCommitInfo, w *IndexWriter) (int64, error)
type LogDocMergePolicy
- func (p *LogDocMergePolicy) Size(info *SegmentCommitInfo, w *IndexWriter) (int64, error)
type LogMergePolicy
- func NewLogByteSizeMergePolicy() *LogMergePolicy
- func NewLogDocMergePolicy() *LogMergePolicy
- func NewLogMergePolicy(min, max int64) *LogMergePolicy
- func (mp *LogMergePolicy) FindForcedMerges(infos *SegmentInfos, maxSegmentCount int, ...) (MergeSpecification, error)
- func (mp *LogMergePolicy) FindMerges(mergeTrigger MergeTrigger, infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)
- func (mp *LogMergePolicy) SetCalbrateSizeByDeletes(calibrateSizeByDeletes bool)
- func (mp *LogMergePolicy) SetMergeFactor(mergeFactor int)
- func (mp *LogMergePolicy) String() string
type MergeAbortedError
- func (err MergeAbortedError) Error() string
type MergeControl
type MergeJob
type MergePolicy
type MergePolicyImpl
- func NewDefaultMergePolicyImpl(self MergeSpecifier) *MergePolicyImpl
- func (mp *MergePolicyImpl) SetMaxCFSSegmentSizeMB(v float64)
- func (mp *MergePolicyImpl) SetNoCFSRatio(noCFSRatio float64)
- func (mp *MergePolicyImpl) Size(info *SegmentCommitInfo, w *IndexWriter) (n int64, err error)
type MergePolicyImplSPI
type MergeScheduler
type MergeScore
type MergeSpecification
type MergeSpecifier
type MergeTrigger
type MultiFields
- func NewMultiFields(subs []Fields, subSlices []ReaderSlice) MultiFields
- func (mf MultiFields) Terms(field string) Terms
type MultiTerms
- func NewMultiTerms(subs []Terms, subSlices []ReaderSlice) *MultiTerms
- func (mt *MultiTerms) DocCount() int
- func (mt *MultiTerms) Iterator(reuse TermsEnum) TermsEnum
- func (mt *MultiTerms) SumDocFreq() int64
- func (mt *MultiTerms) SumTotalTermFreq() int64
type NoDeletionPolicy
- func (p NoDeletionPolicy) Clone() IndexDeletionPolicy
type Node
type NumericDocValuesWriter
type NumericIterator
type OneMerge
- func NewOneMerge(segments []*SegmentCommitInfo) *OneMerge
type OpenMode
type ParallelPostingsArray
type PerField
type PostingsArray
type PostingsBytesStartArray
- func (ss *PostingsBytesStartArray) BytesUsed() util.Counter
- func (ss *PostingsBytesStartArray) Clear() []int
- func (ss *PostingsBytesStartArray) Grow() []int
- func (ss *PostingsBytesStartArray) Init() []int
type PrefixCodedTerms
- func (terms *PrefixCodedTerms) RamBytesUsed() int64
type PrefixCodedTermsBuilder
type Query
type QueryAndLimit
type RandomCodec
- func NewRandomCodec(r *rand.Rand, avoidCodecs map[string]bool) *RandomCodec
type ReaderClosedListener
type ReaderPool
- func (pool *ReaderPool) Close() error
type ReaderSlice
- func (rs ReaderSlice) String() string
type ReadersAndUpdates
- func (rld *ReadersAndUpdates) String() string
type RefCount
type SegInfoByDelGen
- func (a SegInfoByDelGen) Len() int
- func (a SegInfoByDelGen) Less(i, j int) bool
- func (a SegInfoByDelGen) Swap(i, j int)
type SegmentCoreReaders
type SegmentFlushTicket
type SegmentInfoAndLevel
type SegmentInfoAndLevels
- func (ss SegmentInfoAndLevels) Len() int
- func (ss SegmentInfoAndLevels) Less(i, j int) bool
- func (ss SegmentInfoAndLevels) Swap(i, j int)
type SegmentInfoStatus
type SegmentInfos
- func (sis *SegmentInfos) Clear()
- func (sis *SegmentInfos) Clone() *SegmentInfos
- func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error)
- func (sis *SegmentInfos) ReadAll(directory store.Directory) error
- func (sis *SegmentInfos) SegmentsFileName() string
type SegmentReader
- func NewSegmentReader(si *SegmentCommitInfo, termInfosIndexDivisor int, context store.IOContext) (r *SegmentReader, err error)
- func (r *SegmentReader) BinaryDocValues(field string) (v BinaryDocValues, err error)
- func (r *SegmentReader) CombinedCoreAndDeletesKey() interface{}
- func (r *SegmentReader) CoreCacheKey() interface{}
- func (r *SegmentReader) Directory() store.Directory
- func (r *SegmentReader) FieldInfos() FieldInfos
- func (r *SegmentReader) Fields() Fields
- func (r *SegmentReader) FieldsReader() StoredFieldsReader
- func (r *SegmentReader) LiveDocs() util.Bits
- func (r *SegmentReader) MaxDoc() int
- func (r *SegmentReader) NormValues(field string) (v NumericDocValues, err error)
- func (r *SegmentReader) NumDocs() int
- func (r *SegmentReader) NumericDocValues(field string) (v NumericDocValues, err error)
- func (r *SegmentReader) SegmentInfos() *SegmentCommitInfo
- func (r *SegmentReader) SegmentName() string
- func (r *SegmentReader) SortedDocValues(field string) (v SortedDocValues, err error)
- func (r *SegmentReader) SortedSetDocValues(field string) (v SortedSetDocValues, err error)
- func (r *SegmentReader) String() string
- func (r *SegmentReader) TermInfosIndexDivisor() int
- func (r *SegmentReader) TermVectors(docID int) (fs Fields, err error)
- func (r *SegmentReader) TermVectorsReader() TermVectorsReader
- func (r *SegmentReader) VisitDocument(docID int, visitor StoredFieldVisitor) error
type SerialMergeScheduler
- func NewSerialMergeScheduler() *SerialMergeScheduler
- func (ms *SerialMergeScheduler) Close() error
- func (ms *SerialMergeScheduler) Merge(writer *IndexWriter, trigger MergeTrigger, newMergesFound bool) (err error)
type Similarity
type SimpleMergedSegmentWarmer
- func NewSimpleMergedSegmentWarmer(infoStream util.InfoStream) *SimpleMergedSegmentWarmer
type StandardDirectoryReader
- func (r *StandardDirectoryReader) IsCurrent() bool
- func (r *StandardDirectoryReader) String() string
- func (r *StandardDirectoryReader) Version() int64
type StoredFieldStatus
type Term
- func NewEmptyTerm(fld string) *Term
- func NewTerm(fld string, text string) *Term
- func NewTermFromBytes(fld string, bytes []byte) *Term
- func (t *Term) String() string
type TermContext
- func NewTermContext(ctx IndexReaderContext) *TermContext
- func NewTermContextFromTerm(ctx IndexReaderContext, t *Term) (tc *TermContext, err error)
- func (tc *TermContext) State(ord int) TermState
type TermIndexStatus
type TermSorter
- func (s TermSorter) Len() int
- func (s TermSorter) Less(i, j int) bool
- func (s TermSorter) Swap(i, j int)
type TermVectorStatus
type TermVectorsConsumer
type TermVectorsConsumerPerField
type TermVectorsConsumerPerFields
- func (a TermVectorsConsumerPerFields) Len() int
- func (a TermVectorsConsumerPerFields) Less(i, j int) bool
- func (a TermVectorsConsumerPerFields) Swap(i, j int)
type TermVectorsPostingArray
type TermsHash
type TermsHashImpl
type TermsHashImplSPI
type TermsHashPerField
type TermsHashPerFieldImpl
type TermsHashPerFieldSPI
type ThreadState
type TieredMergePolicy
- func NewTieredMergePolicy() *TieredMergePolicy
- func (tmp *TieredMergePolicy) FindForcedMerges(infos *SegmentInfos, maxSegmentCount int, ...) (MergeSpecification, error)
- func (tmp *TieredMergePolicy) FindMerges(mergeTrigger MergeTrigger, infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)
- func (tmp *TieredMergePolicy) SetFloorSegmentMB(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetForceMergeDeletesPctAllowed(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetMaxMergeAtOnce(v int) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetMaxMergeAtOnceExplicit(v int) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetMaxMergedSegmentMB(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetReclaimDeletesWeight(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetSegmentsPerTier(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) String() string

Constants ¶

View Source

const (
	// Creates a new index or overwrites an existing one.
	OPEN_MODE_CREATE = OpenMode(1)
	// Opens an existing index.
	OPEN_MODE_APPEND = OpenMode(2)
	// Creates a new index if one does not exist,
	// otherwise it opens the index and documents will be appended.
	OPEN_MODE_CREATE_OR_APPEND = OpenMode(3)
)

View Source

const (
	INDEX_FILENAME_SEGMENTS     = "segments"
	INDEX_FILENAME_SEGMENTS_GEN = "segments.gen"
)

View Source

const (
	// Merge was triggered by a segment flush.
	MERGE_TRIGGER_SEGMENT_FLUSH = MergeTrigger(1)
	// Merge was triggered by a full flush. Full flushes can be caused
	// by a commit, NRT reader reopen or close call on the index writer
	MERGE_TRIGGER_FULL_FLUSH = MergeTrigger(2)
	/* Merge has been triggerd explicitly by the user. */
	MERGE_TRIGGER_EXPLICIT = MergeTrigger(3)
	/* Merge was triggered by a successfully finished merge. */
	MERGE_FINISHED = MergeTrigger(4)
	// Merge was triggered by a closing IndexWriter.
	MERGE_CLOSING = MergeTrigger(5)
)

View Source

const (
	VERSION_40 = 0
	VERSION_46 = 1
	VERSION_48 = 2
	VERSION_49 = 3

	// Used for the segments.gen file only!
	// Whenver you add a new format, make it 1 smaller (negative version logic)!
	FORMAT_SEGMENTS_GEN_47       = -2
	FORMAT_SEGMENTS_GEN_CHECKSUM = -3
	FORMAT_SEGMENTS_GEN_START    = FORMAT_SEGMENTS_GEN_47
	// Current format of segments.gen
	FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM
)

View Source

const BYTES_PER_DEL_DOCID = 2 * util.NUM_BYTES_INT

Go slice consumes two int for an extra doc ID, assuming 50% pre-allocation.

View Source

const BYTES_PER_DEL_QUERY = 40 + util.NUM_BYTES_OBJECT_REF + util.NUM_BYTES_INT

Go map (amd64) consumes about 40 bytes for an extra entry.

View Source

const BYTES_PER_POSTING = 3 * util.NUM_BYTES_INT

View Source

const DEFAULT_CHECK_INTEGRITY_AT_MERGE = false

Default value for calling checkIntegrity() before merging segments (set to false). You can set this to true for additional safety.

View Source

const DEFAULT_DELETION_POLICY = KeepOnlyLastCommitDeletionPolicy(true)

View Source

const DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH

Disabled by default (because IndexWriter flushes by RAM usage by default).

View Source

const DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH

Disabled by default (because IndexWriter flushes by RAM usage by default).

View Source

const DEFAULT_MAX_CFS_SEGMENT_SIZE = math.MaxInt64

Default max segment size in order to use compound file system. Set to maxInt64.

View Source

const DEFAULT_MAX_MERGE_COUNT = 2

Default maxMergeCount.

View Source

const DEFAULT_MAX_MERGE_MB = 2048

Default maximum segment size. A segment of this size or larger will never be merged.

View Source

const DEFAULT_MAX_ROUTINE_COUNT = 1

Default maxThreadCount. We default to 1: tests on spinning-magnet drives showed slower indexing performance if more than one merge routine runs at once (though on an SSD it was faster)

View Source

const DEFAULT_MAX_THREAD_STATES = 8

The maximum number of simultaneous threads that may be indexing documents at once in IndexWriter; if more than this many threads arrive they will wait for others to finish. Default value is 8.

View Source

const DEFAULT_MERGE_FACTOR = 10

Default merge factor, which is how many segments are merged at a time

View Source

const DEFAULT_MIN_MERGE_DOCS = 1000

Default minimum segment size.

View Source

const DEFAULT_NO_CFS_RATIO = 0.1

Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it.

View Source

const DEFAULT_RAM_BUFFER_SIZE_MB = 16

Default value is 16 MB (which means flush when buffered docs consume approximately 16 MB RAM)

View Source

const DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945

Default value is 1945.

View Source

const DEFAULT_READER_POOLING = false

View Source

const DEFAULT_READER_TERMS_INDEX_DIVISOR = DEFAULT_TERMS_INDEX_DIVISOR

Default value is 1.

View Source

const DEFAULT_TERMS_INDEX_DIVISOR = 1

View Source

const DEFAULT_TERM_INDEX_INTERVAL = 32 // TODO: this should be private to the codec, not settable here

Default value is 32.

View Source

const DEFAULT_USE_COMPOUND_FILE_SYSTEM = true

Default value for compound file system for newly written segments (set to true). For batch indexing with very large ram buffers use false.

View Source

const DISABLE_AUTO_FLUSH = -1

Denotes a flush trigger is disabled.

View Source

const DWPT_VERBOSE = false

View Source

const HASH_INIT_SIZE = 4

View Source

const LEVEL_LOG_SPAN = 0.75

Defines the allowed range of log(size) for each level. A level is computed by taking the max segment log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range.

View Source

const MAX_DOCS = math.MaxInt32 - 128

Hard limit on maximum number of documents that may be added to the index. If you try to add more than this, you'll hit panic.

View Source

const MAX_INT = int(math.MaxInt32)

View Source

const MAX_TERM_LENGTH = MAX_TERM_LENGTH_UTF8

Absolute hard maximum length for a term, in bytes once encoded as UTF8. If a term arrives from the analyzer longer than this length, it panics and a message is printed to infoStream, if set (see SetInfoStream()).

View Source

const MAX_TERM_LENGTH_UTF8 = util.BYTE_BLOCK_SIZE - 2

L600 if you increase this, you must fix field cache impl for Terms/TermsIndex requires <= 32768

View Source

const MISSING int64 = 0

View Source

const NO_DELETION_POLICY = NoDeletionPolicy(true)

View Source

const SOURCE_FLUSH = "flush"

Source of a segment which results from a flush.

View Source

const UNBOUNDED_MAX_MERGE_SEGMENTS = -1

View Source

const VERBOSE = false

View Source

const VERBOSE_REF_COUNT = false

View Source

const WRITE_LOCK_NAME = "write.lock"

Name of the write lock in the index.

View Source

const WRITE_LOCK_TIMEOUT = 1000

Default value for the write lock timeout (1,000 ms)

Variables ¶

View Source

var DEFAULT_MAX_MERGE_MB_FOR_FORCED_MERGE int64 = math.MaxInt64

Default maximum segment size. A segment of this size or larger will never be merged during forceMerge.

View Source

var DEFAULT_MIN_MERGE_MB = 1.6

Default minimum segment size.

View Source

var DefaultSimilarity func() Similarity

Used by search package to assign a default similarity

View Source

var (
	EMPTY_ARRAY = []ReaderSlice{}
)

Functions ¶

func GenerationFromSegmentsFileName ¶

func GenerationFromSegmentsFileName(fileName string) int64

func GetMultiFields ¶

func GetMultiFields(r IndexReader) Fields

func GetMultiTerms ¶

func GetMultiTerms(r IndexReader, field string) Terms

func IsIndexExists ¶

func IsIndexExists(directory store.Directory) (ok bool, err error)

Returns true if an index likely exists at the specified directory. Note that if a corrupt index exists, or if an index in the process of committing

func IsIndexFileExists ¶

func IsIndexFileExists(files []string) bool

No lock is required

func LastCommitGeneration ¶

func LastCommitGeneration(files []string) int64

func MergeTriggerName ¶

func MergeTriggerName(trigger MergeTrigger) string

func ReadFieldInfos ¶

func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error)

Reads the most recent FieldInfos of the given segment info.

func SubIndex ¶

func SubIndex(n int, leaves []*AtomicReaderContext) int

Returns index of the searcher/reader for document n in the slice used to construct this searcher/reader.

Types ¶

type ARFieldsReader ¶

type ARFieldsReader interface {
	Terms(field string) Terms
	Fields() Fields
	LiveDocs() util.Bits
	/** Returns {@link NumericDocValues} representing norms
	 *  for this field, or null if no {@link NumericDocValues}
	 *  were indexed. The returned instance should only be
	 *  used by a single thread. */
	NormValues(field string) (ndv NumericDocValues, err error)
}

type ApplyDeletesResult ¶

type ApplyDeletesResult struct {
	// contains filtered or unexported fields
}

type AtomicReader ¶

type AtomicReader interface {
	IndexReader
	ARFieldsReader
}

type AtomicReaderContext ¶

type AtomicReaderContext struct {
	*IndexReaderContextImpl
	Ord, DocBase int
	// contains filtered or unexported fields
}

func (*AtomicReaderContext) Children ¶

func (ctx *AtomicReaderContext) Children() []IndexReaderContext

func (*AtomicReaderContext) Leaves ¶

func (ctx *AtomicReaderContext) Leaves() []*AtomicReaderContext

func (*AtomicReaderContext) Reader ¶

func (ctx *AtomicReaderContext) Reader() IndexReader

func (*AtomicReaderContext) String ¶

func (ctx *AtomicReaderContext) String() string

type AtomicReaderImpl ¶

type AtomicReaderImpl struct {
	*IndexReaderImpl
	ARFieldsReader
	// contains filtered or unexported fields
}

func (*AtomicReaderImpl) Context ¶

func (r *AtomicReaderImpl) Context() IndexReaderContext

func (*AtomicReaderImpl) DocCount ¶

func (r *AtomicReaderImpl) DocCount(field string) (n int, err error)

func (*AtomicReaderImpl) DocFreq ¶

func (r *AtomicReaderImpl) DocFreq(term *Term) (int, error)

func (*AtomicReaderImpl) SumDocFreq ¶

func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error)

func (*AtomicReaderImpl) SumTotalTermFreq ¶

func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error)

func (*AtomicReaderImpl) Terms ¶

func (r *AtomicReaderImpl) Terms(field string) Terms

func (*AtomicReaderImpl) TotalTermFreq ¶

func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error)

type AtomicReaderImplSPI ¶

type AtomicReaderImplSPI interface {
	IndexReaderImplSPI
	ARFieldsReader
}

type BaseCompositeReader ¶

type BaseCompositeReader struct {
	*CompositeReaderImpl
	// contains filtered or unexported fields
}

func (*BaseCompositeReader) DocCount ¶

func (r *BaseCompositeReader) DocCount(field string) int

func (*BaseCompositeReader) DocFreq ¶

func (r *BaseCompositeReader) DocFreq(term *Term) (int, error)

func (*BaseCompositeReader) MaxDoc ¶

func (r *BaseCompositeReader) MaxDoc() int

func (*BaseCompositeReader) NumDocs ¶

func (r *BaseCompositeReader) NumDocs() int

func (*BaseCompositeReader) SumDocFreq ¶

func (r *BaseCompositeReader) SumDocFreq(field string) int64

func (*BaseCompositeReader) SumTotalTermFreq ¶

func (r *BaseCompositeReader) SumTotalTermFreq(field string) int64

func (*BaseCompositeReader) TermVectors ¶

func (r *BaseCompositeReader) TermVectors(docID int) error

func (*BaseCompositeReader) TotalTermFreq ¶

func (r *BaseCompositeReader) TotalTermFreq(term *Term) int64

func (*BaseCompositeReader) VisitDocument ¶

func (r *BaseCompositeReader) VisitDocument(docID int, visitor StoredFieldVisitor) error

type BaseCompositeReaderSPI ¶

type BaseCompositeReaderSPI interface {
	IndexReaderImplSPI
	CompositeReaderSPI
}

type BlockedFlush ¶

type BlockedFlush struct {
	// contains filtered or unexported fields
}

type BufferedUpdates ¶

type BufferedUpdates struct {
	// contains filtered or unexported fields
}

Holds buffered deletes, by docID, term or query for a single segment. This is used to hold buffered pending deletes against the to-be-flushed segment. Once the deletes are pushed (on flush in DW), these deletes are converted to a FronzenDeletes instance.

NOTE: instances of this class are accessed either via a private instance on DocumentsWriterPerThread, or via sync'd code by DocumentsWriterDeleteQueue

func (*BufferedUpdates) String ¶

func (bd *BufferedUpdates) String() string

type BufferedUpdatesStream ¶

type BufferedUpdatesStream struct {
	sync.Locker
	// contains filtered or unexported fields
}

Tracks the stream of BufferedUpdates. When DocumentsWriterPerThread flushes, its buffered deletes and updates are appended to this stream. We later apply them (resolve them to the actual docIDs, per segment) when a merge is started (only to the to-be-merged segments). We also apply to all segments when NRT reader is pulled, commit/close is called, or when too many deletes or updates are buffered and must be flushed (by RAM usage or by count).

Each packet is assigned a generation, and each flushed or merged segment is also assigned a generation, so we can track when BufferedUpdates packets to apply to any given segment.

func (*BufferedUpdatesStream) RamBytesUsed ¶

func (ds *BufferedUpdatesStream) RamBytesUsed() int64

type BySizeDescendingSegments ¶

type BySizeDescendingSegments struct {
	// contains filtered or unexported fields
}

func (*BySizeDescendingSegments) Len ¶

func (a *BySizeDescendingSegments) Len() int

func (*BySizeDescendingSegments) Less ¶

func (a *BySizeDescendingSegments) Less(i, j int) bool

func (*BySizeDescendingSegments) Swap ¶

func (a *BySizeDescendingSegments) Swap(i, j int)

type ByteSliceReader ¶

type ByteSliceReader struct {
	*util.DataInputImpl
	// contains filtered or unexported fields
}

IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read the bytes in each slice until we hit the end of that slice at which point we read the forwarding address of the next slice and then jump to it.

func (*ByteSliceReader) ReadByte ¶

func (r *ByteSliceReader) ReadByte() (byte, error)

func (*ByteSliceReader) ReadBytes ¶

func (r *ByteSliceReader) ReadBytes(buf []byte) error

type CheckAbort ¶

type CheckAbort interface {
	// contains filtered or unexported methods
}

Recording units of work when merging segments.

type CheckAbortNone ¶

type CheckAbortNone int

If you use this: IW.close(false) cannot abort your merge!

type CheckIndex ¶

type CheckIndex struct {
	// contains filtered or unexported fields
}

Basic tool and API to check the health of an index and write a new segments file that removes reference to problematic segments.

As this tool checks every byte in the index, on a large index it can take a long time to run.

func NewCheckIndex ¶

func NewCheckIndex(dir store.Directory, crossCheckTermVectors bool, infoStream io.Writer) *CheckIndex

func (*CheckIndex) CheckIndex ¶

func (ch *CheckIndex) CheckIndex(onlySegments []string) *CheckIndexStatus

Returns a Status instance detailing the state of the index.

As this method checks every byte in the specified segments, on a large index it can take quite a long time to run.

WARNING: make sure you only call this when the index is not opened by any writer.

type CheckIndexStatus ¶

type CheckIndexStatus struct {
	// True if no problems found with the index.
	Clean bool

	// True if we were unable to locate and load the segments_N file.
	MissingSegments bool
	// contains filtered or unexported fields
}

Returned from checkIndex() detailing the health and status of the index

type ClosingControl ¶

type ClosingControl struct {
	// contains filtered or unexported fields
}

Use a seprate goroutine to protect closing control

type CoalescedUpdates ¶

type CoalescedUpdates struct {
	// contains filtered or unexported fields
}

func (*CoalescedUpdates) String ¶

func (cd *CoalescedUpdates) String() string

type CommitPoint ¶

type CommitPoint struct {
	// contains filtered or unexported fields
}

Holds details for each commit point. This class is also passed to the deletion policy. Note: this class has a natural ordering that is inconsistent with equals.

func (*CommitPoint) Delete ¶

func (cp *CommitPoint) Delete()

func (*CommitPoint) Directory ¶

func (cp *CommitPoint) Directory() store.Directory

func (*CommitPoint) FileNames ¶

func (cp *CommitPoint) FileNames() []string

func (*CommitPoint) Generation ¶

func (cp *CommitPoint) Generation() int64

func (*CommitPoint) IsDeleted ¶

func (cp *CommitPoint) IsDeleted() bool

func (*CommitPoint) SegmentCount ¶

func (cp *CommitPoint) SegmentCount() int

func (*CommitPoint) SegmentsFileName ¶

func (cp *CommitPoint) SegmentsFileName() string

func (*CommitPoint) String ¶

func (cp *CommitPoint) String() string

func (*CommitPoint) UserData ¶

func (cp *CommitPoint) UserData() map[string]string

type CompositeReader ¶

type CompositeReader interface {
	IndexReader
	CompositeReaderSPI
}

type CompositeReaderContext ¶

type CompositeReaderContext struct {
	*IndexReaderContextImpl
	// contains filtered or unexported fields
}

func (*CompositeReaderContext) Children ¶

func (ctx *CompositeReaderContext) Children() []IndexReaderContext

func (*CompositeReaderContext) Leaves ¶

func (ctx *CompositeReaderContext) Leaves() []*AtomicReaderContext

func (*CompositeReaderContext) Reader ¶

func (ctx *CompositeReaderContext) Reader() IndexReader

func (*CompositeReaderContext) String ¶

func (ctx *CompositeReaderContext) String() string

type CompositeReaderContextBuilder ¶

type CompositeReaderContextBuilder struct {
	// contains filtered or unexported fields
}

type CompositeReaderImpl ¶

type CompositeReaderImpl struct {
	*IndexReaderImpl
	CompositeReaderSPI
	// contains filtered or unexported fields
}

func (*CompositeReaderImpl) Context ¶

func (r *CompositeReaderImpl) Context() IndexReaderContext

func (*CompositeReaderImpl) String ¶

func (r *CompositeReaderImpl) String() string

type CompositeReaderSPI ¶

type CompositeReaderSPI interface {
	// contains filtered or unexported methods
}

type ConcurrentMergeScheduler ¶

type ConcurrentMergeScheduler struct {
	sync.Locker
	// contains filtered or unexported fields
}

A MergeScheduler that runs each merge using a separate goroutine.

Specify the max number of goroutines that may run at once, and the maximum number of simultaneous merges with SetMaxMergesAndRoutines().

If the number of merges exceeds the max number of threads then the largest merges are paused until one of the smaller merges completes.

If more than MaxMergeCount() merges are requested then this class will forcefully throttle the incoming goroutines by pausing until one or more merges complete.

func NewConcurrentMergeScheduler ¶

func NewConcurrentMergeScheduler() *ConcurrentMergeScheduler

func (*ConcurrentMergeScheduler) Close ¶

func (cms *ConcurrentMergeScheduler) Close() error

func (*ConcurrentMergeScheduler) Merge ¶

func (cms *ConcurrentMergeScheduler) Merge(writer *IndexWriter,
	trigger MergeTrigger, newMergesFound bool) error

func (*ConcurrentMergeScheduler) SetMaxMergesAndRoutines ¶

func (cms *ConcurrentMergeScheduler) SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount int)

Sets the maximum number of merge goroutines and simultaneous merges allowed.

func (*ConcurrentMergeScheduler) String ¶

func (cms *ConcurrentMergeScheduler) String() string

type CoreClosedListener ¶

type CoreClosedListener interface {
	// contains filtered or unexported methods
}

type DefaultIndexingChain ¶

type DefaultIndexingChain struct {
	// contains filtered or unexported fields
}

Default general purpose indexing chain, which handles indexing all types of fields

type DeleteSlice ¶

type DeleteSlice struct {
	// contains filtered or unexported fields
}

type DirectoryReader ¶

type DirectoryReader interface {
	IndexReader
	// doOpenIfChanged() error
	// doOpenIfChanged(c IndexCommit) error
	// doOpenIfChanged(w IndexWriter, c IndexCommit) error
	Version() int64
	IsCurrent() bool
}

func OpenDirectoryReader ¶

func OpenDirectoryReader(directory store.Directory) (r DirectoryReader, err error)

type DirectoryReaderImpl ¶

type DirectoryReaderImpl struct {
	*BaseCompositeReader
	// contains filtered or unexported fields
}

type DocConsumer ¶

type DocConsumer interface {
	// contains filtered or unexported methods
}

type DocValuesFieldUpdates ¶

type DocValuesFieldUpdates struct {
}

Holds updates of a single DocValues field, for a set of documents.

type DocValuesFieldUpdatesContainer ¶

type DocValuesFieldUpdatesContainer struct {
}

func (*DocValuesFieldUpdatesContainer) String ¶

func (c *DocValuesFieldUpdatesContainer) String() string

type DocValuesStatus ¶

type DocValuesStatus struct {
	// contains filtered or unexported fields
}

type DocValuesUpdate ¶

type DocValuesUpdate struct {
	// contains filtered or unexported fields
}

An in-place update to a DocValues field.

func (*DocValuesUpdate) String ¶

func (u *DocValuesUpdate) String() string

type DocValuesWriter ¶

type DocValuesWriter interface {
	// contains filtered or unexported methods
}

type DocumentsWriter ¶

type DocumentsWriter struct {
	sync.Locker
	// contains filtered or unexported fields
}

This class accepts multiple added documents and directly writes segment files.

Each added document is passed to the indexing chain, which in turn processes the document into the different codec formats. Some format write bytes to files immediately, e.g. stored fields and term vectors, while others are buffered by the indexing chain and written only on flush.

Other consumers e.g. FreqProxTermsWriter and NormsConsumer, buffer bytes in RAM and flush only when a new segment is produced.

Once we have used our allowed RAM buffer, or the number of aded docs is large enough (in the case we are flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory.

Goroutines:

Multiple Goroutines are allowed into AddDocument at once. There is an initial synchronized call to ThreadState() which allocates a TheadState for this goroutine. The same goroutine will get the same ThreadState over time (goroutine affinity) so that if there are consistent patterns (for example each goroutine is indexing a different content source) then we make better use of RAM. Then processDocument() is called on tha tThreadState without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized "finishDocument" is called to flush changes to the directory.

When flush is called by IndexWriter we forcefully idle all goroutines and flush only once they are all idle. This means you can call flush with a given goroutine even while other goroutines are actively adding/deleting documents.

Exceptions:

Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors directly to files in the directory, there are certain limited times when an error can corrupt this state. For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, a memory issue while appending to the in-memory posting lists can corrupt that posting list. We call such errors "aborting errors". In these cases we must call abort() to discard all docs added since the last flush.

All other errors ("non-aborting errors") can still partially update the index structures. These updates are consistent, but, they represent only a part of the document seen up until the error was hit. When this happens, we immediately mark the document as deleted so that the document is always atomically ("all or none") added to the index.

type DocumentsWriterDeleteQueue ¶

type DocumentsWriterDeleteQueue struct {
	// contains filtered or unexported fields
}

DocumentsWriterDeleteQueue is a non-blocking linked pending deletes queue. In contrast to other queue implementation we only maintain the tail of the queue. A delete queue is always used in a context of a set of DWPTs and a global delete pool. Each of the DWPT and the global pool need to maintain their 'own' head of the queue (as a DeleteSlice instance per DWPT). The difference between the DWPT and the global pool is that the DWPT starts maintaining a head once it has added its first document since for its segments private deletes only the deletes after that document are relevant. The global pool instead starts maintaining the head once this instance is created by taking the sentinel instance as its initial head.

Since each DeleteSlice maintains its own head and list is only single linked, the garbage collector takes care of pruning the list for us. All nodes in the list that are still relevant should be either directly or indirectly referenced by one of the DWPT's private DeleteSlice or by the global BufferedUpdates slice.

Each DWPT as well as the global delete pool maintain their private DeleteSlice instance. In the DWPT case, updating a slice is equivalent to atomically finishing the document. The slice update guarantees a "happens before" relationship to all other updates in the same indexing session. When a DWPT updates a document it:

consumes a document and finishes its processing
updates its private DeleteSlice either by calling updateSlice() or addTermToDeleteSlice() (if the document has a delTerm)
applies all deletes in the slice to its private BufferedUpdates and resets it
increments its internal document id

The DWPT also doesn't apply its current docments delete term until it has updated its delete slice which ensures the consistency of the update. If the update fails before the DeleteSlice could have been updated the deleteTerm will also not be added to its private deletes neither to the global deletes.

func (*DocumentsWriterDeleteQueue) RamBytesUsed ¶

func (q *DocumentsWriterDeleteQueue) RamBytesUsed() int64

func (*DocumentsWriterDeleteQueue) String ¶

func (dq *DocumentsWriterDeleteQueue) String() string

type DocumentsWriterFlushControl ¶

type DocumentsWriterFlushControl struct {
	sync.Locker

	*DocumentsWriterStallControl // mixin
	// contains filtered or unexported fields
}

This class controls DocumentsWriterPerThread (DWPT) flushing during indexing. It tracks the memory consumption per DWPT and uses a configured FlushPolicy to decide if a DWPT must flush.

In addition to the FlushPolicy the flush control might set certain DWPT as flush pending iff a DWPT exceeds the RAMPerThreadHardLimitMB() to prevent address space exhaustion.

func (*DocumentsWriterFlushControl) String ¶

func (fc *DocumentsWriterFlushControl) String() string

type DocumentsWriterFlushQueue ¶

type DocumentsWriterFlushQueue struct {
	sync.Locker
	// contains filtered or unexported fields
}

type DocumentsWriterPerThread ¶

type DocumentsWriterPerThread struct {
	// contains filtered or unexported fields
}

func (*DocumentsWriterPerThread) String ¶

func (w *DocumentsWriterPerThread) String() string

type DocumentsWriterPerThreadPool ¶

type DocumentsWriterPerThreadPool struct {
	sync.Locker
	// contains filtered or unexported fields
}

DocumentsWriterPerThreadPool controls ThreadState instances and their goroutine assignment during indexing. Each TheadState holds a reference to a DocumentsWriterPerThread that is once a ThreadState is obtained from the pool exclusively used for indexing a single document by the obtaining thread. Each indexing thread must obtain such a ThreadState to make progress. Depending on the DocumentsWriterPerThreadPool implementation ThreadState assingments might differ from document to document.

Once a DocumentWriterPerThread is selected for flush the thread pool is reusing the flushing DocumentsWriterPerthread's ThreadState with a new DocumentsWriterPerThread instance.

GoRoutine is different from Java's thread. So intead of thread affinity, I will use channels and concurrent running goroutines to hold individual DocumentsWriterPerThread instances and states.

func NewDocumentsWriterPerThreadPool ¶

func NewDocumentsWriterPerThreadPool(maxNumThreadStates int) *DocumentsWriterPerThreadPool

type DocumentsWriterStallControl ¶

type DocumentsWriterStallControl struct {
	sync.Locker
	*sync.Cond
	// contains filtered or unexported fields
}

Controls the health status of a DocumentsWriter sessions. This class used to block incoming index threads if flushing significantly slower than indexing to ensure the DocumentsWriter's healthiness. If flushing is significantly slower than indexing the net memory used within an IndexWriter session can increase very quickly and easily exceed the JVM's available memory.

To prevent OOM errors and ensure IndexWriter's stability this class blocks incoming threads from indexing once 2 x number of available ThreadState(s) in DocumentsWriterPerThreadPool is exceeded. Once flushing catches up and number of flushing DWPT is equal of lower than the number of active ThreadState(s) threads are released and can continue indexing.

type Event ¶

type Event func(writer *IndexWriter, triggerMerge, clearBuffers bool) error

Interface for internal atomic events. See DocumentsWriter fo details. Events are executed concurrently and no order is guaranteed. Each event should only rely on the serializeability within its process method. All actions that must happen before or after a certain action must be encoded inside the process() method.

type FieldInvertState ¶

type FieldInvertState struct {
	// contains filtered or unexported fields
}

Tracks the number and position / offset parameters of terms being added to the index. The information collected in this class is also used to calculate the normalization factor for a field

func (*FieldInvertState) Boost ¶

func (st *FieldInvertState) Boost() float32

Get boost value. This is the cumulative product of document boost and field boost for all field instances sharing the same field name.

func (*FieldInvertState) Length ¶

func (st *FieldInvertState) Length() int

Get total number of terms in this field.

func (*FieldInvertState) Name ¶

func (st *FieldInvertState) Name() string

Return the field's name

func (*FieldInvertState) NumOverlap ¶

func (st *FieldInvertState) NumOverlap() int

Get the number of terms with positionIncrement == 0.

type FieldNormStatus ¶

type FieldNormStatus struct {
	// contains filtered or unexported fields
}

type FindSegmentsFile ¶

type FindSegmentsFile struct {
	// contains filtered or unexported fields
}

func NewFindSegmentsFile ¶

func NewFindSegmentsFile(directory store.Directory,
	doBody func(segmentFileName string) (interface{}, error)) *FindSegmentsFile

type FlushByRamOrCountsPolicy ¶

type FlushByRamOrCountsPolicy struct {
	*FlushPolicyImpl
}

Default FlushPolicy implementation that flushes new segments based on RAM used and document count depending on the IndexWriter's IndexWriterConfig. It also applies pending deletes based on the number of buffered delete terms.

1. onDelete() - applies pending delete operations based on the global number of buffered delete terms iff MaxBufferedDeleteTerms() is enabled 2. onInsert() - flushes either on the number of documents per DocumentsWriterPerThread (NumDocsInRAM()) or on the global active memory consumption in the current indexing session iff MaxBufferedDocs() or RAMBufferSizeMB() is enabled respectively 3. onUpdate() - calls onInsert() and onDelete() in order

All IndexWriterConfig settings are used to mark DocumentsWriterPerThread as flush pending during indexing with respect to their live updates.

If SetRAMBufferSizeMB() is enabled, the largest ram consuming DocumentsWriterPerThread will be marked as pending iff the global active RAM consumption is >= the configured max RAM buffer.

type FlushPolicy ¶

type FlushPolicy interface {
	// contains filtered or unexported methods
}

FlushPlicy controls when segments are flushed from a RAM resident internal data-structure to the IndexWriter's Directory.

Segments are traditionally flushed by: 1. RAM consumption - configured via IndexWriterConfig.SetRAMBufferSizeMB() 2. Number of RAM resident documents - configured via IndexWriterConfig.SetMaxBufferedDocs()

The policy also applies pending delete operations (by term and/or query), given the threshold set in IndexcWriterConfig.SetMaxBufferedDeleteTerms().

IndexWriter consults the provided FlushPolicy to control the flushing process. The policy is informed for each added or updated document as well as for each delete term. Based on the FlushPolicy, the information provided via ThreadState and DocumentsWriterFlushControl, the FlushPolicy decides if a DocumentsWriterPerThread needs flushing and mark it as flush-pending via DocumentsWriterFlushControl.SetFLushingPending(), or if deletes need to be applied.

type FlushPolicyImpl ¶

type FlushPolicyImpl struct {
	sync.Locker
	// contains filtered or unexported fields
}

type FlushPolicyImplSPI ¶

type FlushPolicyImplSPI interface {
	// contains filtered or unexported methods
}

type FlushTicket ¶

type FlushTicket interface {
	// contains filtered or unexported methods
}

type FlushTicketImpl ¶

type FlushTicketImpl struct {
	// contains filtered or unexported fields
}

type FlushedSegment ¶

type FlushedSegment struct {
	// contains filtered or unexported fields
}

type FreqProxPostingsArray ¶

type FreqProxPostingsArray struct {
	*ParallelPostingsArray
	// contains filtered or unexported fields
}

type FreqProxTermsWriter ¶

type FreqProxTermsWriter struct {
	*TermsHashImpl
}

type FreqProxTermsWriterPerField ¶

type FreqProxTermsWriterPerField struct {
	*TermsHashPerFieldImpl
	// contains filtered or unexported fields
}

TODO: break into separate freq and prox writers as codes; make separate container (tii/tis/skip/*) that can be configured as any number of files 1..N

type FreqProxTermsWriterPerFields ¶

type FreqProxTermsWriterPerFields []*FreqProxTermsWriterPerField

func (FreqProxTermsWriterPerFields) Len ¶

func (a FreqProxTermsWriterPerFields) Len() int

func (FreqProxTermsWriterPerFields) Less ¶

func (a FreqProxTermsWriterPerFields) Less(i, j int) bool

func (FreqProxTermsWriterPerFields) Swap ¶

func (a FreqProxTermsWriterPerFields) Swap(i, j int)

type FrozenBufferedUpdates ¶

type FrozenBufferedUpdates struct {
	// contains filtered or unexported fields
}

Holds buffered deletes and updates by term or query, once pushed. Pushed deletes/updates are write-once, so we shift to more memory efficient data structure to hold them. We don't hold docIDs because these are applied on flush.

func (*FrozenBufferedUpdates) String ¶

func (bd *FrozenBufferedUpdates) String() string

type IndexCommit ¶

type IndexCommit interface {
	// Get the segments file (segments_N) associated with the commit point.
	SegmentsFileName() string
	// Returns all index files referenced by this commit point.
	FileNames() []string
	// Returns the Directory for the index.
	Directory() store.Directory
	/*
		Delete this commit point. This only applies when using the commit
		point in the context of IndexWriter's IndexDeletionPolicy.

		Upon calling this, the writer is notified that this commit point
		should be deleted.

		Decision that a commit-point should be deleted is taken by the
		IndexDeletionPolicy in effect and therefore this should only be
		called by its onInit() or onCommit() methods.
	*/
	Delete()
	// Returns true if this commit should be deleted; this is only used
	// by IndexWriter after invoking the IndexDeletionPolicy.
	IsDeleted() bool
	// returns number of segments referenced by this commit.
	SegmentCount() int
	// Returns the generation (the _N in segments_N) for this IndexCommit
	Generation() int64
	// Returns userData, previously passed to SetCommitData(map) for this commit.
	UserData() map[string]string
}

Expert: represents a single commit into an index as seen by the IndexDeletionPolicy or IndexReader.

Changes to the content of an index are made visible only after the writer who made that change commits by writing a new segments file (segments_N). This point in time, when the action of writing of a new segments file to the directory is completed, is an index commit.

Each index commit oint has a unique segments file associated with it. The segments file associated with a later index commit point would have a larger N.

type IndexCommits ¶

type IndexCommits []IndexCommit

func (IndexCommits) Len ¶

func (s IndexCommits) Len() int

func (IndexCommits) Less ¶

func (s IndexCommits) Less(i, j int) bool

func (IndexCommits) Swap ¶

func (s IndexCommits) Swap(i, j int)

type IndexDeletionPolicy ¶

type IndexDeletionPolicy interface {
	// contains filtered or unexported methods
}

Expert: policy for deletion of stale index commits.

Implement this interface, and pass it to one of the IndexWriter or IndexReader constructors, to customize when older point-in-time-commits are deleted from the index directory. The default deletion policy is KeepOnlyLastCommitDeletionPolicy, which always remove old commits as soon as a new commit is done (this matches the behavior before 2.2).

One expected use case for this ( and the reason why it was first created) is to work around problems with an index directory accessed via filesystems like NFS because NFS does not provide the "delete on last close" semantics that Lucene's "point in time" search normally relies on. By implementing a custom deletion policy, such as "a commit is only removed once it has been stale for more than X minutes", you can give your readers time to refresh to the new commit before IndexWriter removes the old commits. Note that doing so will increase the storage requirements of the index. See [LUCENE-710] for details.

Implementers of sub-classes should make sure that Clone() returns an independent instance able to work with any other IndexWriter or Directory instance.

type IndexFileDeleter ¶

type IndexFileDeleter struct {
	// contains filtered or unexported fields
}

This class keeps track of each SegmentInfos instance that is still "live", either because it corresponds to a segments_N file in the Directory (a "commit", i.e. a commited egmentInfos) or because it's an in-memory SegmentInfos that a writer is actively updating but has not yet committed. This class uses simple reference counting to map the live SegmentInfos instances to individual files in the Directory.

The same directory file maybe referenced by more than one IndexCommit, i.e. more than one SegmentInfos. Therefore we count how many commits reference each file. When all the commits referencing a certain file have been deleted, the refcount for that file becomes zero, and the file is deleted.

A separate deletion policy interface (IndexDeletionPolicy) is consulted on creation (onInit) and once per commit (onCommit), to decide when a commit should be removed.

It is the business of the IndexDeletionPolicy to choose when to delete commit points. The actual mechanics of file deletion, retrying, etc, derived from the deletion of commit points is the business of the IndexFileDeleter.

The current default deletion policy is KeepOnlyLastCommitDeletionPolicy, which removes all prior commits when a new commit has completed. This matches the bahavior before 2.2.

Note that you must hold the write.lock before instantiating this class. It opens segments_N file(s) directly with no retry logic.

func (*IndexFileDeleter) Close ¶

func (fd *IndexFileDeleter) Close() error

type IndexReader ¶

type IndexReader interface {
	io.Closer

	NumDocs() int
	MaxDoc() int
	/** Expert: visits the fields of a stored document, for
	 *  custom processing/loading of each field.  If you
	 *  simply want to load all fields, use {@link
	 *  #document(int)}.  If you want to load a subset, use
	 *  {@link DocumentStoredFieldVisitor}.  */
	VisitDocument(docID int, visitor StoredFieldVisitor) error
	/**
	 * Returns the stored fields of the <code>n</code><sup>th</sup>
	 * <code>Document</code> in this index.  This is just
	 * sugar for using {@link DocumentStoredFieldVisitor}.
	 * <p>
	 * <b>NOTE:</b> for performance reasons, this method does not check if the
	 * requested document is deleted, and therefore asking for a deleted document
	 * may yield unspecified results. Usually this is not required, however you
	 * can test if the doc is deleted by checking the {@link
	 * Bits} returned from {@link MultiFields#getLiveDocs}.
	 *
	 * <b>NOTE:</b> only the content of a field is returned,
	 * if that field was stored during indexing.  Metadata
	 * like boost, omitNorm, IndexOptions, tokenized, etc.,
	 * are not preserved.
	 *
	 * @throws IOException if there is a low-level IO error
	 */
	// TODO: we need a separate StoredField, so that the
	// Document returned here contains that class not
	//model.IndexableField
	Document(docID int) (doc *docu.Document, err error)

	Context() IndexReaderContext
	Leaves() []*AtomicReaderContext
	// Returns the number of documents containing the term. This method
	// returns 0 if the term of field does not exists. This method does
	// not take into account deleted documents that have not yet been
	// merged away.
	DocFreq(*Term) (int, error)
	// contains filtered or unexported methods
}

type IndexReaderContext ¶

type IndexReaderContext interface {
	Reader() IndexReader
	Parent() *CompositeReaderContext
	Leaves() []*AtomicReaderContext
	Children() []IndexReaderContext
}

func TopLevelContext ¶

func TopLevelContext(ctx IndexReaderContext) IndexReaderContext

Walks up the reader tree and return the given context's top level reader context, or in other words the reader tree's root context.

type IndexReaderContextImpl ¶

type IndexReaderContextImpl struct {
	// contains filtered or unexported fields
}

func (*IndexReaderContextImpl) Parent ¶

func (ctx *IndexReaderContextImpl) Parent() *CompositeReaderContext

type IndexReaderImpl ¶

type IndexReaderImpl struct {
	IndexReaderImplSPI
	// contains filtered or unexported fields
}

func (*IndexReaderImpl) Close ¶

func (r *IndexReaderImpl) Close() error

func (*IndexReaderImpl) Document ¶

func (r *IndexReaderImpl) Document(docID int) (doc *docu.Document, err error)

func (*IndexReaderImpl) Leaves ¶

func (r *IndexReaderImpl) Leaves() []*AtomicReaderContext

type IndexReaderImplSPI ¶

type IndexReaderImplSPI interface {
	NumDocs() int
	MaxDoc() int
	VisitDocument(int, StoredFieldVisitor) error

	Context() IndexReaderContext
	DocFreq(*Term) (int, error)
	// contains filtered or unexported methods
}

type IndexReaderWarmer ¶

type IndexReaderWarmer interface {
	// contains filtered or unexported methods
}

If openDirectoryReader() has been called (ie, this writer is in near real-time mode), then after a merge comletes, this class can be invoked to warm the reader on the newly merged segment, before the merge commits. This is not required for near real-time search, but will reduce search latency on opening a new near real-time reader after a merge completes.

NOTE: warm is called before any deletes have been carried over to the merged segment.

type IndexWriter ¶

type IndexWriter struct {
	sync.Locker
	*ClosingControl
	*MergeControl
	// contains filtered or unexported fields
}

An IndexWriter creates and maintains an index.

The OpenMode option on IndexWriterConfig.SetOpenMode() determines whether a new index is created, or whether an existing index is opened. Note that you can open an index with OPEN_MODE_CREATE even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, and won't see the newly created index until they re-open. If OPEN_MODE_CREATE_OR_APPEND is used, IndexWriter will create a new index if there is not already an index at the provided path and otherwise open th existing index.

In either case, documents are added with AddDocument() and removed with DeleteDocumentsByTerm() or DeleteDocumentsByQuery(). A document can be updated with UpdateDocuments() (which just deletes and then adds the entire document). When finished adding, deleting and updating documents, Close() should be called.

...

func NewIndexWriter ¶

func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error)

Constructs a new IndexWriter per the settings given in conf. If you want to make "live" changes to this writer instance, use Config().

NOTE: after this writer is created, the given configuration instance cannot be passed to another writer. If you intend to do so, you should clone it beforehand.

func (*IndexWriter) AddDocument ¶

func (w *IndexWriter) AddDocument(doc []IndexableField) error

L1201

Adds a document to this index.

Note that if an Error is hit (for example disk full) then the index will be consistent, but this document may not have been added. Furthermore, it's possible the index will have one segment in non-compound format even when using compound files (when a merge has partially succeeded).

This method periodically flushes pending documents to the Directory (see flush()), and also periodically triggers segment merges in the index according to the MergePolicy in use.

Merges temporarily consume space in the directory. The amount of space required is up to 1X the size of all segments being merged, when no readers/searchers are open against the index, and up to 2X the size of all segments being merged when readers/searchers are open against the index (see forceMerge() for details). The sequence of primitive merge operations performed is governed by the merge policy.

Note that each term in the document can be no longer than MAX_TERM_LENGTH in bytes, otherwise error will be returned.

Note that it's possible to creat an invalid Unicode string in Java if a UTF16 surrogate pair is malformed. In this case, the invalid characters are silently replaced with the Unicode replacement character U+FFFD.

func (*IndexWriter) AddDocumentWithAnalyzer ¶

func (w *IndexWriter) AddDocumentWithAnalyzer(doc []IndexableField, analyzer analysis.Analyzer) error

Adds a document to this index, using the provided analyzer instead of the value of Analyzer().

See AddDocument() for details on index and IndexWriter state after an error, and flushing/merging temporary free space requirements.

NOTE: if this method hits a memory issue, you hsould immediately close the writer. See above for details.

func (*IndexWriter) Close ¶

func (w *IndexWriter) Close() error

Commits all changes to an index, wait for pending merges to complete, and closes all associate files.

Note that:

If you called prepare Commit but failed to call commit, this method will panic and the IndexWriter will not be closed.
If this method throws any other exception, the IndexWriter will be closed, but changes may have been lost.

Note that this may be a costly operation, so, try to re-use a single writer instead of closing and opening a new one. See commit() for caveats about write caching done by some IO devices.

NOTE: You must ensure no other threads are still making changes at the same time that this method is invoked.

func (*IndexWriter) Commit ¶

func (w *IndexWriter) Commit() error

Commits all pending changes (added & deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss. Note that this does not wait for any running background merges to finish. This may be a costly operation, so you should test the cost in your application and do it only when really necessary.

Note that this operation calls Directory.sync on the index files. That call should not return until the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. But, beware: some hardware devices may in fact cache writes even during fsync, and return before the bits are actually on stable storage, to give the appearance of faster performance. If you have such a device, and it does not hav a battery backup (for example) then on power loss it may still lose data. Lucene cannot guarantee consistency on such devices.

func (*IndexWriter) Directory ¶

func (w *IndexWriter) Directory() store.Directory

Retuns the Directory used by this index.

func (*IndexWriter) MergingSegments ¶

func (w *IndexWriter) MergingSegments() map[*SegmentCommitInfo]bool

Experts: to be used by a MergePolicy to avoid selecting merges for segments already being merged. The returned collection is not cloned, and thus is only safe to access if you hold IndexWriter's lock (which you do when IndexWriter invokes the MergePolicy).

func (*IndexWriter) Rollback ¶

func (w *IndexWriter) Rollback() error

Close the IndexWriter without committing any changes that have occurred since the last commit (or since it was opened, if commit hasn't been called). This removes any temporary files that had been created, after which the state of the index will be the same as it was when commit() was last called or when this writer was first opened. This also clears a previous call to prepareCommit()

func (*IndexWriter) UpdateDocument ¶

func (w *IndexWriter) UpdateDocument(term *Term, doc []IndexableField, analyzer analysis.Analyzer) error

L1545

Updates a document by first deleting the document(s) containing term and then adding the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only after the add).

type IndexWriterConfig ¶

type IndexWriterConfig struct {
	*LiveIndexWriterConfigImpl
	// contains filtered or unexported fields
}

Holds all the configuration that is used to create an IndexWriter. Once IndexWriter has been created with this object, changes to this object will not affect the IndexWriter instance. For that, use LiveIndexWriterConfig that is returned from IndexWriter.Config().

All setter methods return IndexWriterConfig to allow chaining settings conveniently, for example:

conf := NewIndexWriterConfig(analyzer)
				.setter1()
				.setter2()

func NewIndexWriterConfig ¶

func NewIndexWriterConfig(matchVersion util.Version, analyzer analysis.Analyzer) *IndexWriterConfig

Creates a new config that with defaults that match the specified Version as well as the default Analyzer. If matchVersion is >= 3.2, TieredMergePolicy is used for merging; else LogByteSizeMergePolicy. Note that TieredMergePolicy is free to select non-contiguous merges, which means docIDs may not remain monotonic over time. If this is a problem, you should switch to LogByteSizeMergePolicy or LogDocMergePolicy.

func (*IndexWriterConfig) InfoStream ¶

func (conf *IndexWriterConfig) InfoStream() util.InfoStream

L478

func (*IndexWriterConfig) MergePolicy ¶

func (conf *IndexWriterConfig) MergePolicy() MergePolicy

L310

func (*IndexWriterConfig) SetIndexDeletionPolicy ¶

func (conf *IndexWriterConfig) SetIndexDeletionPolicy(delPolicy IndexDeletionPolicy) *IndexWriterConfig

Expert: allows an optional IndexDeletionPolicy implementation to be specified. You can use this to control when prior commits are deleted from the index. The default policy is KeepOnlyLastCommitDeletionPolicy which removes all prior commits as soon as a new commit is done (this matches behavior before 2.2). Creating your own policy can allow you to explicitly keep previous "point in time" commits alive in the index for some time, to allow readers to refresh to the new commit without having the old commit deleted out from under them. This is necessary on filesystems like NFS that do not support "delete on last close" semantics, which Lucene's "point in time" search normally relies on.

NOTE: the deletion policy can not be nil

func (*IndexWriterConfig) SetInfoStream ¶

func (conf *IndexWriterConfig) SetInfoStream(infoStream util.InfoStream) *IndexWriterConfig

L523

Information about merges, deletes and a message when maxFieldLength is reached will be printed to this. Must not be nil, but NO_OUTPUT may be used to surpress output.

func (*IndexWriterConfig) SetMaxBufferedDocs ¶

func (conf *IndexWriterConfig) SetMaxBufferedDocs(maxBufferedDocs int) *IndexWriterConfig

L548

func (*IndexWriterConfig) SetMergeScheduler ¶

func (conf *IndexWriterConfig) SetMergeScheduler(mergeScheduler MergeScheduler) *IndexWriterConfig

Expert: sets the merge scheduler used by this writer. The default is ConcurentMergeScheduler.

NOTE: the merge scheduler cannot be nil.

Only takes effect when IndexWriter is first created.

func (*IndexWriterConfig) SetMergedSegmentWarmer ¶

func (conf *IndexWriterConfig) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *IndexWriterConfig

func (*IndexWriterConfig) SetReaderPooling ¶

func (conf *IndexWriterConfig) SetReaderPooling(readerPooling bool) *IndexWriterConfig

L406

By default, IndexWriter does not pool the SegmentReaders it must open for deletions and merging, unless a near-real-time reader has been obtained by calling openDirectoryReader(IndexWriter, bool). This method lets you enable pooling without getting a near-real-time reader. NOTE: if you set this to false, IndexWriter will still pool readers once openDirectoryReader(IndexWriter, bool) is called.

func (*IndexWriterConfig) SetReaderTermsIndexDivisor ¶

func (conf *IndexWriterConfig) SetReaderTermsIndexDivisor(divisor int) *IndexWriterConfig

func (*IndexWriterConfig) SetSimilarity ¶

func (conf *IndexWriterConfig) SetSimilarity(similarity Similarity) *IndexWriterConfig

L259

Expert: set the Similarity implementation used by this IndexWriter.

NOTE: the similarity cannot be nil.

Only takes effect when IndexWriter is first created.

func (*IndexWriterConfig) SetUseCompoundFile ¶

func (conf *IndexWriterConfig) SetUseCompoundFile(useCompoundFile bool) *IndexWriterConfig

func (*IndexWriterConfig) String ¶

func (conf *IndexWriterConfig) String() string

type IndexingChain ¶

type IndexingChain func(documentsWriterPerThread *DocumentsWriterPerThread) DocConsumer

Returns the DocConsumer that the DocumentsWriter calls to process the documents.

type IntBlockAllocator ¶

type IntBlockAllocator struct {
	*util.IntAllocatorImpl
	// contains filtered or unexported fields
}

func (*IntBlockAllocator) Recycle ¶

func (alloc *IntBlockAllocator) Recycle(blocks [][]int)

type KeepOnlyLastCommitDeletionPolicy ¶

type KeepOnlyLastCommitDeletionPolicy bool

This IndexDeletionPolicy implementation that keeps only the most recent commit and immediately removes all prior commits after a new commit is done. This is the default deletion policy.

func (KeepOnlyLastCommitDeletionPolicy) Clone ¶

func (p KeepOnlyLastCommitDeletionPolicy) Clone() IndexDeletionPolicy

type LiveIndexWriterConfig ¶

type LiveIndexWriterConfig interface {
	TermIndexInterval() int
	MaxBufferedDocs() int
	RAMBufferSizeMB() float64
	Similarity() Similarity
	Codec() Codec
	MergePolicy() MergePolicy

	RAMPerThreadHardLimitMB() int

	InfoStream() util.InfoStream

	UseCompoundFile() bool
	// contains filtered or unexported methods
}

Holds all the configuration used by IndexWriter with few setters for settings that can be changed on an IndexWriter instance "live".

All the fields are either readonly or volatile.

type LiveIndexWriterConfigImpl ¶

type LiveIndexWriterConfigImpl struct {
	// contains filtered or unexported fields
}

func (*LiveIndexWriterConfigImpl) Codec ¶

func (conf *LiveIndexWriterConfigImpl) Codec() Codec

Returns the current Codec.

func (*LiveIndexWriterConfigImpl) InfoStream ¶

func (conf *LiveIndexWriterConfigImpl) InfoStream() util.InfoStream

Returns InfoStream used for debugging.

func (*LiveIndexWriterConfigImpl) MaxBufferedDocs ¶

func (conf *LiveIndexWriterConfigImpl) MaxBufferedDocs() int

Returns the number of buffered added documents that will trigger a flush if enabled.

func (*LiveIndexWriterConfigImpl) MergePolicy ¶

func (conf *LiveIndexWriterConfigImpl) MergePolicy() MergePolicy

L477

Returns the current MergePolicy in use by this writer.

func (*LiveIndexWriterConfigImpl) RAMBufferSizeMB ¶

func (conf *LiveIndexWriterConfigImpl) RAMBufferSizeMB() float64

func (*LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB ¶

func (conf *LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB() int

func (*LiveIndexWriterConfigImpl) SetMaxBufferedDocs ¶

func (conf *LiveIndexWriterConfigImpl) SetMaxBufferedDocs(maxBufferedDocs int) *LiveIndexWriterConfigImpl

L358

Determines the minimal number of documents required before the buffered in-memory documents are flushed as a new Segment. Large values generally give faster indexing.

When this is set, the writer will flush every maxBufferedDocs added documents. Pass in DISABLE_AUTO_FLUSH to prevent triggering a flush due to number of buffered documents. Note that if flushing by RAM usage is also enabled, then the flush will be triggered by whichever comes first.

Disabled by default (writer flushes by RAM usage).

Takes effect immediately, but only the next time a document is added, updated or deleted.

func (*LiveIndexWriterConfigImpl) SetMergePolicy ¶

func (conf *LiveIndexWriterConfigImpl) SetMergePolicy(mergePolicy MergePolicy) *LiveIndexWriterConfigImpl

Expert: MergePolicy is invoked whenver there are changes to the segments in the index. Its role is to select which merges to do, if any, and return a MergeSpecification describing the merges. It also selects merges to do for forceMerge.

func (*LiveIndexWriterConfigImpl) SetMergedSegmentWarmer ¶

func (conf *LiveIndexWriterConfigImpl) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *LiveIndexWriterConfigImpl

Sets the merged segment warmer.

Take effect on the next merge.

func (*LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor ¶

func (conf *LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor(divisor int) *LiveIndexWriterConfigImpl

Sets the termsIndeDivisor passed to any readers that IndexWriter opens, for example when applying deletes or creating a near-real-time reader in OpenDirectoryReader(). If you pass -1, the terms index won't be loaded by the readers. This is only useful in advanced siguations when you will only .Next() through all terms; attempts to seek will hit an error.

takes effect immediately, but only applies to readers opened after this call

NOTE: divisor settings > 1 do not apply to all PostingsFormat implementation, including the default one in this release. It only makes sense for terms indexes that can efficiently re-sample terms at load time.

func (*LiveIndexWriterConfigImpl) SetUseCompoundFile ¶

func (conf *LiveIndexWriterConfigImpl) SetUseCompoundFile(useCompoundFile bool) *LiveIndexWriterConfigImpl

Sets if the IndexWriter should pack newly written segments in a compound file. Default is true.

Use false for batch indexing with very large ram buffer settings.

Note: To control compound file usage during segment merges see SetNoCFSRatio() and SetMaxCFSSegmentSizeMB(). This setting only applies to newly created segment.

func (*LiveIndexWriterConfigImpl) Similarity ¶

func (conf *LiveIndexWriterConfigImpl) Similarity() Similarity

func (*LiveIndexWriterConfigImpl) String ¶

func (conf *LiveIndexWriterConfigImpl) String() string

func (*LiveIndexWriterConfigImpl) TermIndexInterval ¶

func (conf *LiveIndexWriterConfigImpl) TermIndexInterval() int

func (*LiveIndexWriterConfigImpl) UseCompoundFile ¶

func (conf *LiveIndexWriterConfigImpl) UseCompoundFile() bool

type LogByteSizeMergePolicy ¶

type LogByteSizeMergePolicy struct {
	*LogMergePolicy
}

this is a LogMergePolicy that measures size of a segment as the total byte size of the segment's files.

func (*LogByteSizeMergePolicy) Size ¶

func (p *LogByteSizeMergePolicy) Size(info *SegmentCommitInfo, w *IndexWriter) (int64, error)

type LogDocMergePolicy ¶

type LogDocMergePolicy struct {
	*LogMergePolicy
}

This is a LogMergePolicy that measures size of a segment as the number of documents (not taking deletions into account).

func (*LogDocMergePolicy) Size ¶

func (p *LogDocMergePolicy) Size(info *SegmentCommitInfo, w *IndexWriter) (int64, error)

type LogMergePolicy ¶

type LogMergePolicy struct {
	*MergePolicyImpl
	// contains filtered or unexported fields
}

This class implements a MergePolicy that tries to merge segments into levels of exponentially increasing size, where each level has fewer segments than the value of the merge factor. Whenver extra segments (beyond the merge factor upper bound) are encountered, all segments within the level are merged. You can get or set the merge factor using MergeFactor() and SetMergeFactor() repectively.

This class is abstract and required a subclass to define the Size() method which specifies how a segment's size is determined. LogDocMergePolicy is one subclass that measures size by document count in the segment. LogByteSizeMergePolicy is another subclass that measures size as the total byte size of the file(s) for the segment.

func NewLogByteSizeMergePolicy ¶

func NewLogByteSizeMergePolicy() *LogMergePolicy

func NewLogDocMergePolicy ¶

func NewLogDocMergePolicy() *LogMergePolicy

func NewLogMergePolicy ¶

func NewLogMergePolicy(min, max int64) *LogMergePolicy

func (*LogMergePolicy) FindForcedMerges ¶

func (mp *LogMergePolicy) FindForcedMerges(infos *SegmentInfos,
	maxSegmentCount int, segmentsToMerge map[*SegmentCommitInfo]bool,
	w *IndexWriter) (MergeSpecification, error)

func (*LogMergePolicy) FindMerges ¶

func (mp *LogMergePolicy) FindMerges(mergeTrigger MergeTrigger,
	infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)

Checks if any merges are now necessary and returns a MergeSpecification if so. A merge is necessary when there are more than SetMergeFactor() segments at a given level. When multiple levels have too many segments, this method will return multiple merges, allowing the MergeScheduler to use concurrency.

func (*LogMergePolicy) SetCalbrateSizeByDeletes ¶

func (mp *LogMergePolicy) SetCalbrateSizeByDeletes(calibrateSizeByDeletes bool)

Sets whether the segment size should be calibrated by the number of delets when choosing segments to merge

func (*LogMergePolicy) SetMergeFactor ¶

func (mp *LogMergePolicy) SetMergeFactor(mergeFactor int)

Determines how often segment indices are merged by AdDocument(). With smaller values, less RAM is used while indexing, and searches are faster, but indexing speed is slower. With larger values, more RAM is used during indexing, and while searches is slower, indexing is faster. Thus larger values (> 10) are best for batch index creation, and smaller values (< 10) for indces that are interactively maintained.

func (*LogMergePolicy) String ¶

func (mp *LogMergePolicy) String() string

type MergeAbortedError ¶

type MergeAbortedError string

Thrown when a merge was explicitly aborted because IndexWriter.close() was called with false. Normally this error is privately caught and suppressed by IndexWriter.

func (MergeAbortedError) Error ¶

func (err MergeAbortedError) Error() string

type MergeControl ¶

type MergeControl struct {
	sync.Locker
	// contains filtered or unexported fields
}

type MergeJob ¶

type MergeJob struct {
	// contains filtered or unexported fields
}

type MergePolicy ¶

type MergePolicy interface {
	SetNoCFSRatio(noCFSRatio float64)
	SetMaxCFSSegmentSizeMB(v float64)
	MergeSpecifier
}

Expert: a MergePolicy determines the sequence of primitive merge operations.

Whenever the segments in an index have been altered by IndexWriter, either the addition of a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that may now seed to cascade, IndexWriter invokes findMerges() to give the MergePolicy a chance to pick merges that are now required. This method returns a MergeSpecification instance describing the set of merges that should be done, or nil if no merges are necessary. When IndexWriter.forceMerge() is called, it calls findForcedMerges() and the MergePolicy should then return the necessary merges.

Note that the policy can return more than one merge at a time. In this case, if the writer is using SerialMergeScheduler, the merges will be run sequentially but if it is using ConcurrentMergeScheduler they will be run concurrently.

The default MergePolicy is TieredMergePolicy.

type MergePolicyImpl ¶

type MergePolicyImpl struct {
	SizeSPI MergePolicyImplSPI
	// contains filtered or unexported fields
}

func NewDefaultMergePolicyImpl ¶

func NewDefaultMergePolicyImpl(self MergeSpecifier) *MergePolicyImpl

Creates a new merge policy instance. Note that if you intend to use it without passing it to IndexWriter, you should call SetIndexWriter()

func (*MergePolicyImpl) SetMaxCFSSegmentSizeMB ¶

func (mp *MergePolicyImpl) SetMaxCFSSegmentSizeMB(v float64)

If a merged segment will be more than this value, leave the segment as non-compound file even if compound file is enabled. Set this to math.Inf(1) (default) and noCFSRatio to 1.0 to always use CFS regardless of merge size.

func (*MergePolicyImpl) SetNoCFSRatio ¶

func (mp *MergePolicyImpl) SetNoCFSRatio(noCFSRatio float64)

If a merged segment will be more than this percentage of the total size of the index, leave the segment as non-compound file even if compound file is enabled. Set to 1.0 to always use CFS regardless or merge size.

func (*MergePolicyImpl) Size ¶

func (mp *MergePolicyImpl) Size(info *SegmentCommitInfo, w *IndexWriter) (n int64, err error)

type MergePolicyImplSPI ¶

type MergePolicyImplSPI interface {
	// Return the byte size of the provided SegmentCommitInfo,
	// pro-rated by percentage of non-deleted documents if
	// SetCalibrateSizeByDeletes() is set.
	Size(*SegmentCommitInfo, *IndexWriter) (int64, error)
}

type MergeScheduler ¶

type MergeScheduler interface {
	io.Closer
	Merge(*IndexWriter, MergeTrigger, bool) error
}

Expert: IndexWriter uses an instance implementing this interface to execute the merges selected by a MergePolicy. The default MergeScheduler is ConcurrentMergeScheduler.

Implementers of sub-classes shold make sure that Clone() returns an independent instance able to work with any IndexWriter instance.

type MergeScore ¶

type MergeScore interface{}

type MergeSpecification ¶

type MergeSpecification []*OneMerge

A MergeSpecification instance provides the information necessary to perform multiple merges. It simply contains a list of OneMerge instances.

type MergeSpecifier ¶

type MergeSpecifier interface {
	// Determine what set of merge operations are now necessary on the
	// index. IndexWriter calls this whenever there is a change to the
	// segments. This call is always synchronized on the IndexWriter
	// instance so only one thread at a time will call this method.
	FindMerges(MergeTrigger, *SegmentInfos, *IndexWriter) (MergeSpecification, error)
	// Determine what set of merge operations is necessary in order to
	// merge to <= the specified segment count. IndexWriter calls this
	// when its forceMerge() method is called. This call is always
	// synchronized on the IndexWriter instance so only one thread at a
	// time will call this method.
	FindForcedMerges(*SegmentInfos, int,
		map[*SegmentCommitInfo]bool, *IndexWriter) (MergeSpecification, error)
}

type MergeTrigger ¶

type MergeTrigger int

Passed to MergePolicy.FindMerges(MergeTrigger, SegmentInfos) to indicate the event that triggered the merge

type MultiFields ¶

type MultiFields struct {
	// contains filtered or unexported fields
}

func NewMultiFields ¶

func NewMultiFields(subs []Fields, subSlices []ReaderSlice) MultiFields

func (MultiFields) Terms ¶

func (mf MultiFields) Terms(field string) Terms

type MultiTerms ¶

type MultiTerms struct {
	// contains filtered or unexported fields
}

func NewMultiTerms ¶

func NewMultiTerms(subs []Terms, subSlices []ReaderSlice) *MultiTerms

func (*MultiTerms) DocCount ¶

func (mt *MultiTerms) DocCount() int

func (*MultiTerms) Iterator ¶

func (mt *MultiTerms) Iterator(reuse TermsEnum) TermsEnum

func (*MultiTerms) SumDocFreq ¶

func (mt *MultiTerms) SumDocFreq() int64

func (*MultiTerms) SumTotalTermFreq ¶

func (mt *MultiTerms) SumTotalTermFreq() int64

type NoDeletionPolicy ¶

type NoDeletionPolicy bool

An IndexDeletionPolicy which keeps all index commits around, never deleting them. This class is a singleton and can be accessed by referencing INSTANCE.

func (NoDeletionPolicy) Clone ¶

func (p NoDeletionPolicy) Clone() IndexDeletionPolicy

type Node ¶

type Node struct {
	// contains filtered or unexported fields
}

type NumericDocValuesWriter ¶

type NumericDocValuesWriter struct {
	// contains filtered or unexported fields
}

Buffers up pending long per doc, then flushes when segment flushes.

type NumericIterator ¶

type NumericIterator struct{}

Iterates over the values we have in ram

type OneMerge ¶

type OneMerge struct {
	sync.Locker
	// contains filtered or unexported fields
}

OneMerge provides the information necessary to perform an individual primitive merge operation, resulting in a single new segment. The merge spec includes the subset of segments to be merged as well as whether the new segment should use the compound file format.

func NewOneMerge ¶

func NewOneMerge(segments []*SegmentCommitInfo) *OneMerge

type OpenMode ¶

type OpenMode int

Specifies the open mode for IndeWriter

type ParallelPostingsArray ¶

type ParallelPostingsArray struct {
	PostingsArray
	// contains filtered or unexported fields
}

type PerField ¶

type PerField struct {
	*DefaultIndexingChain // acess at least docState, termsHash.
	// contains filtered or unexported fields
}

type PostingsArray ¶

type PostingsArray interface {
	// contains filtered or unexported methods
}

type PostingsBytesStartArray ¶

type PostingsBytesStartArray struct {
	// contains filtered or unexported fields
}

func (*PostingsBytesStartArray) BytesUsed ¶

func (ss *PostingsBytesStartArray) BytesUsed() util.Counter

func (*PostingsBytesStartArray) Clear ¶

func (ss *PostingsBytesStartArray) Clear() []int

func (*PostingsBytesStartArray) Grow ¶

func (ss *PostingsBytesStartArray) Grow() []int

func (*PostingsBytesStartArray) Init ¶

func (ss *PostingsBytesStartArray) Init() []int

type PrefixCodedTerms ¶

type PrefixCodedTerms struct {
	// contains filtered or unexported fields
}

Prefix codes term instances (prefixes are shared)

func (*PrefixCodedTerms) RamBytesUsed ¶

func (terms *PrefixCodedTerms) RamBytesUsed() int64

type PrefixCodedTermsBuilder ¶

type PrefixCodedTermsBuilder struct {
	// contains filtered or unexported fields
}

Builds a PrefixCodedTerms: call add repeatedly, then finish.

type Query ¶

type Query interface{}

type QueryAndLimit ¶

type QueryAndLimit struct {
}

type RandomCodec ¶

type RandomCodec struct {
	*CodecImpl
}

Codec that assigns per-field random postings format.

The same field/format assignment will happen regardless of order, a hash is computed up front that determines the mapping. This means fields can be put into things like HashSets and added to documents in different orders and the tests will still be deterministic and reproducible.

func NewRandomCodec ¶

func NewRandomCodec(r *rand.Rand, avoidCodecs map[string]bool) *RandomCodec

type ReaderClosedListener ¶

type ReaderClosedListener interface {
	// contains filtered or unexported methods
}

A custom listener that's invoked when the IndexReader is closed.

type ReaderPool ¶

type ReaderPool struct {
	sync.Locker
	// contains filtered or unexported fields
}

func (*ReaderPool) Close ¶

func (pool *ReaderPool) Close() error

type ReaderSlice ¶

type ReaderSlice struct {
	// contains filtered or unexported fields
}

func (ReaderSlice) String ¶

func (rs ReaderSlice) String() string

type ReadersAndUpdates ¶

type ReadersAndUpdates struct {
	sync.Locker
	// contains filtered or unexported fields
}

Used by IndexWriter to hold open SegmentReaders (for searching or merging), plus pending deletes and updates, for a given segment.

func (*ReadersAndUpdates) String ¶

func (rld *ReadersAndUpdates) String() string

type RefCount ¶

type RefCount struct {
	// contains filtered or unexported fields
}

Tracks the reference count for a single index file:

type SegInfoByDelGen ¶

type SegInfoByDelGen []*SegmentCommitInfo

func (SegInfoByDelGen) Len ¶

func (a SegInfoByDelGen) Len() int

func (SegInfoByDelGen) Less ¶

func (a SegInfoByDelGen) Less(i, j int) bool

func (SegInfoByDelGen) Swap ¶

func (a SegInfoByDelGen) Swap(i, j int)

type SegmentCoreReaders ¶

type SegmentCoreReaders struct {
	// contains filtered or unexported fields
}

type SegmentFlushTicket ¶

type SegmentFlushTicket struct {
	*FlushTicketImpl
	// contains filtered or unexported fields
}

type SegmentInfoAndLevel ¶

type SegmentInfoAndLevel struct {
	// contains filtered or unexported fields
}

type SegmentInfoAndLevels ¶

type SegmentInfoAndLevels []SegmentInfoAndLevel

func (SegmentInfoAndLevels) Len ¶

func (ss SegmentInfoAndLevels) Len() int

func (SegmentInfoAndLevels) Less ¶

func (ss SegmentInfoAndLevels) Less(i, j int) bool

func (SegmentInfoAndLevels) Swap ¶

func (ss SegmentInfoAndLevels) Swap(i, j int)

type SegmentInfoStatus ¶

type SegmentInfoStatus struct {
	// contains filtered or unexported fields
}

Holds the status of each segment in the index.

type SegmentInfos ¶

type SegmentInfos struct {
	Segments []*SegmentCommitInfo
	// contains filtered or unexported fields
}

A collection of segmentInfo objects with methods for operating on those segments in relation to the file system.

The active segments in the index are stored in the segment into file, segments_N. There may be one or more segments_N files in the index; however, hte one with the largest generation is the activbe one (when older segments_N files are present it's because they temporarily cannot be deleted, or, a writer is in the process of committing, or a custom IndexDeletionPolicy is in use). This file lists each segment by name and has details about the codec and generation of deletes.

There is also a file segments.gen. This file contains the current generation (the _N in segments_N) of the index. This is used only as a fallback in case the current generation cannot be accurately determined by directory listing alone (as is the case for some NFS clients with time-based directory cache expiration). This file simply contains an Int32 version header (FORMAT_SEGMENTS_GEN_CURRENT), followed by the generation recorded as int64, written twice.

Files:

segments.gen: GenHeader, Generation, Generation, Footer
segments_N: Header, Version, NameCounter, SegCount, <SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen, UpdatesFiles>^SegCount, CommitUserData, Footer

Data types:

- Header --> CodecHeader - Genheader, NameCounter, SegCount, DeletionCount --> int32 - Generation, Version, DelGen, Checksum --> int64 - SegName, SegCodec --> string - CommitUserData --> map[string]string - UpdatesFiles --> map[int32]map[string]bool> - Footer --> CodecFooter

Field Descriptions:

Version counts how often the index has been changed by adding or deleting docments.
NameCounter is used to generate names for new segment files.
SegName is the name of the segment, and is used as the file name prefix for all of the files that compose the segment's index.
DelGen is the generation count of the deletes file. If this is -1, there are no deletes. Anything above zero means there are deletes stored by LiveDocsFormat.
DeletionCount records the number of deleted documents in this segment.
SegCodec is the nme of the Codec that encoded this segment.
CommitUserData stores an optional user-spplied opaue map[string]string that was passed to SetCommitData().
FieldInfosGen is the generation count of the fieldInfos file. If this is -1, there are no updates to the fieldInfos in that segment. Anything above zero means there are updates to the fieldInfos stored by FieldInfosFormat.
DocValuesGen is the generation count of the updatable DocValues. If this is -1, there are no udpates to DocValues in that segment. Anything above zero means there are updates to DocValues stored by DocvaluesFormat.
UpdatesFiles stores the set of files that were updated in that segment per file.

func (*SegmentInfos) Clear ¶

func (sis *SegmentInfos) Clear()

func (*SegmentInfos) Clone ¶

func (sis *SegmentInfos) Clone() *SegmentInfos

Returns a copy of this instance, also copying each SegmentInfo.

func (*SegmentInfos) Read ¶

func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error)

Read a particular segmentFileName. Note that this may return IO error if a commit is in process.

func (*SegmentInfos) ReadAll ¶

func (sis *SegmentInfos) ReadAll(directory store.Directory) error

func (*SegmentInfos) SegmentsFileName ¶

func (sis *SegmentInfos) SegmentsFileName() string

type SegmentReader ¶

type SegmentReader struct {
	*AtomicReaderImpl
	// contains filtered or unexported fields
}

*

IndexReader implementation over a single segment.
<p>
Instances pointing to the same segment (but with different deletes, etc)
may share the same core data.
@lucene.experimental

func NewSegmentReader ¶

func NewSegmentReader(si *SegmentCommitInfo,
	termInfosIndexDivisor int, context store.IOContext) (r *SegmentReader, err error)

*

Constructs a new SegmentReader with a new core.
@throws CorruptIndexException if the index is corrupt
@throws IOException if there is a low-level IO error

TODO: why is this public?

func (*SegmentReader) BinaryDocValues ¶

func (r *SegmentReader) BinaryDocValues(field string) (v BinaryDocValues, err error)

func (*SegmentReader) CombinedCoreAndDeletesKey ¶

func (r *SegmentReader) CombinedCoreAndDeletesKey() interface{}

func (*SegmentReader) CoreCacheKey ¶

func (r *SegmentReader) CoreCacheKey() interface{}

func (*SegmentReader) Directory ¶

func (r *SegmentReader) Directory() store.Directory

func (*SegmentReader) FieldInfos ¶

func (r *SegmentReader) FieldInfos() FieldInfos

func (*SegmentReader) Fields ¶

func (r *SegmentReader) Fields() Fields

func (*SegmentReader) FieldsReader ¶

func (r *SegmentReader) FieldsReader() StoredFieldsReader

Expert: retrieve thread-private StoredFieldsReader

func (*SegmentReader) LiveDocs ¶

func (r *SegmentReader) LiveDocs() util.Bits

func (*SegmentReader) MaxDoc ¶

func (r *SegmentReader) MaxDoc() int

func (*SegmentReader) NormValues ¶

func (r *SegmentReader) NormValues(field string) (v NumericDocValues, err error)

func (*SegmentReader) NumDocs ¶

func (r *SegmentReader) NumDocs() int

func (*SegmentReader) NumericDocValues ¶

func (r *SegmentReader) NumericDocValues(field string) (v NumericDocValues, err error)

func (*SegmentReader) SegmentInfos ¶

func (r *SegmentReader) SegmentInfos() *SegmentCommitInfo

func (*SegmentReader) SegmentName ¶

func (r *SegmentReader) SegmentName() string

func (*SegmentReader) SortedDocValues ¶

func (r *SegmentReader) SortedDocValues(field string) (v SortedDocValues, err error)

func (*SegmentReader) SortedSetDocValues ¶

func (r *SegmentReader) SortedSetDocValues(field string) (v SortedSetDocValues, err error)

func (*SegmentReader) String ¶

func (r *SegmentReader) String() string

SegmentReader.java L179

func (*SegmentReader) TermInfosIndexDivisor ¶

func (r *SegmentReader) TermInfosIndexDivisor() int

func (*SegmentReader) TermVectors ¶

func (r *SegmentReader) TermVectors(docID int) (fs Fields, err error)

func (*SegmentReader) TermVectorsReader ¶

func (r *SegmentReader) TermVectorsReader() TermVectorsReader

func (*SegmentReader) VisitDocument ¶

func (r *SegmentReader) VisitDocument(docID int, visitor StoredFieldVisitor) error

type SerialMergeScheduler ¶

type SerialMergeScheduler struct {
	sync.Locker
}

A MergeScheduler that simply does each merge sequentially, using the current thread.

func NewSerialMergeScheduler ¶

func NewSerialMergeScheduler() *SerialMergeScheduler

func (*SerialMergeScheduler) Close ¶

func (ms *SerialMergeScheduler) Close() error

func (*SerialMergeScheduler) Merge ¶

func (ms *SerialMergeScheduler) Merge(writer *IndexWriter,
	trigger MergeTrigger, newMergesFound bool) (err error)

type Similarity ¶

type Similarity interface {
	ComputeNorm(fs *FieldInvertState) int64
}

type SimpleMergedSegmentWarmer ¶

type SimpleMergedSegmentWarmer struct {
	// contains filtered or unexported fields
}

A very simple meged segment warmer that just ensures data structures are initialized.

func NewSimpleMergedSegmentWarmer ¶

func NewSimpleMergedSegmentWarmer(infoStream util.InfoStream) *SimpleMergedSegmentWarmer

Creates a new SimpleMergedSegmentWarmer

type StandardDirectoryReader ¶

type StandardDirectoryReader struct {
	*DirectoryReaderImpl
	// contains filtered or unexported fields
}

func (*StandardDirectoryReader) IsCurrent ¶

func (r *StandardDirectoryReader) IsCurrent() bool

func (*StandardDirectoryReader) String ¶

func (r *StandardDirectoryReader) String() string

func (*StandardDirectoryReader) Version ¶

func (r *StandardDirectoryReader) Version() int64

type StoredFieldStatus ¶

type StoredFieldStatus struct {
	// contains filtered or unexported fields
}

type Term ¶

type Term struct {
	Field string
	Bytes []byte
}

A Term represents a word from text. This is the unit of search. It is composed of two elements, the text of the word, as a string, and the name of the field that the text occurred in.

Note that terms may represents more than words from text fields, but also things like dates, email addresses, urls, etc.

func NewEmptyTerm ¶

func NewEmptyTerm(fld string) *Term

Constructs a Term with the given field and empty text. This serves two purposes: 1) reuse of a Term with the same field. 2) pattern for a query.

func NewTerm ¶

func NewTerm(fld string, text string) *Term

func NewTermFromBytes ¶

func NewTermFromBytes(fld string, bytes []byte) *Term

func (*Term) String ¶

func (t *Term) String() string

type TermContext ¶

type TermContext struct {
	TopReaderContext IndexReaderContext

	DocFreq       int
	TotalTermFreq int64
	// contains filtered or unexported fields
}

func NewTermContext ¶

func NewTermContext(ctx IndexReaderContext) *TermContext

*

Creates an empty {@link TermContext} from a {@link IndexReaderContext}

func NewTermContextFromTerm ¶

func NewTermContextFromTerm(ctx IndexReaderContext, t *Term) (tc *TermContext, err error)

*

Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the
given {@link Term}. This method will lookup the given term in all context's leaf readers
and register each of the readers containing the term in the returned {@link TermContext}
using the leaf reader's ordinal.
<p>
Note: the given context must be a top-level context.

func (*TermContext) State ¶

func (tc *TermContext) State(ord int) TermState

type TermIndexStatus ¶

type TermIndexStatus struct {
	// contains filtered or unexported fields
}

type TermSorter ¶

type TermSorter []*Term

func (TermSorter) Len ¶

func (s TermSorter) Len() int

func (TermSorter) Less ¶

func (s TermSorter) Less(i, j int) bool

func (TermSorter) Swap ¶

func (s TermSorter) Swap(i, j int)

type TermVectorStatus ¶

type TermVectorStatus struct {
	// contains filtered or unexported fields
}

type TermVectorsConsumer ¶

type TermVectorsConsumer struct {
	*TermsHashImpl
	// contains filtered or unexported fields
}

type TermVectorsConsumerPerField ¶

type TermVectorsConsumerPerField struct {
	*TermsHashPerFieldImpl
	// contains filtered or unexported fields
}

type TermVectorsConsumerPerFields ¶

type TermVectorsConsumerPerFields []*TermVectorsConsumerPerField

func (TermVectorsConsumerPerFields) Len ¶

func (a TermVectorsConsumerPerFields) Len() int

func (TermVectorsConsumerPerFields) Less ¶

func (a TermVectorsConsumerPerFields) Less(i, j int) bool

func (TermVectorsConsumerPerFields) Swap ¶

func (a TermVectorsConsumerPerFields) Swap(i, j int)

type TermVectorsPostingArray ¶

type TermVectorsPostingArray struct {
	// contains filtered or unexported fields
}

type TermsHash ¶

type TermsHash interface {
	TermsHashImplSPI
	// contains filtered or unexported methods
}

This class is passed each token produced by the analyzer on each field during indexing, and it stores these tokens in a hash table, and allocates separate byte streams per token. Consumers of this class, eg FreqProxTermsWriter and TermVectorsConsumer, write their own byte streams under each term.

type TermsHashImpl ¶

type TermsHashImpl struct {
	// contains filtered or unexported fields
}

type TermsHashImplSPI ¶

type TermsHashImplSPI interface {
	// contains filtered or unexported methods
}

type TermsHashPerField ¶

type TermsHashPerField interface {
	// contains filtered or unexported methods
}

type TermsHashPerFieldImpl ¶

type TermsHashPerFieldImpl struct {
	// contains filtered or unexported fields
}

type TermsHashPerFieldSPI ¶

type TermsHashPerFieldSPI interface {
	// contains filtered or unexported methods
}

type ThreadState ¶

type ThreadState struct {
	// contains filtered or unexported fields
}

ThreadState references and guards a DocumentsWriterPerThread instance that is used during indexing to build a in-memory index segment. ThreadState also holds all flush related per-thread data controlled by DocumentsWriterFlushControl.

A ThreadState, its methods and members should only accessed by one goroutine a time. users must acquire the lock via lock() and release the lock in a finally block via unlock() before accesing the state.

type TieredMergePolicy ¶

type TieredMergePolicy struct {
	*MergePolicyImpl
	// contains filtered or unexported fields
}

Merges segments of approximately equal size, subject to an allowed number of segments per tier. This is similar to LogByteSizeMergePolicy, except this merge policy is able to merge non-adjacent segment, and separates how many segments are merged at once (SetMaxMergeAtOnce()) from how many segments are allowed per tier (SetSegmentsPerTier()). This merge policy also does not over-merge (i.e. cascade merges).

For normal merging, this policy first computes a "budget" of how many segments are allowed to be in the index. If the index is over-budget, then the policy sorts segments by decreasing size (pro-rating by percent deletes), and then finds the least-cost merge. Merge cost is measured by a combination of the "skew" of the merge (size of largest segments divided by smallest segment), total merge size and percent deletes reclaimed, so tha tmerges with lower skew, smaller size and those reclaiming more deletes, are flavored.

If a merge wil produce a segment that's larger than SetMaxMergedSegmentMB(), then the policy will merge fewer segments (down to 1 at once, if that one has deletions) to keep the segment size under budget.

NOTE: this policy freely merges non-adjacent segments; if this is a problem, use LogMergePolicy.

NOTE: This policy always merges by byte size of the segments, always pro-rates by percent deletes, and does not apply any maximum segment size duirng forceMerge (unlike LogByteSizeMergePolicy).

func NewTieredMergePolicy ¶

func NewTieredMergePolicy() *TieredMergePolicy

func (*TieredMergePolicy) FindForcedMerges ¶

func (tmp *TieredMergePolicy) FindForcedMerges(infos *SegmentInfos,
	maxSegmentCount int, segmentsToMerge map[*SegmentCommitInfo]bool,
	w *IndexWriter) (MergeSpecification, error)

func (*TieredMergePolicy) FindMerges ¶

func (tmp *TieredMergePolicy) FindMerges(mergeTrigger MergeTrigger,
	infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)

func (*TieredMergePolicy) SetFloorSegmentMB ¶

func (tmp *TieredMergePolicy) SetFloorSegmentMB(v float64) *TieredMergePolicy

Segments smaller than this are "rounded up" to this size, ie treated as equal (floor) size for merge selection. This is to prevent frequent flushing of tiny segments from allowing a long tail in the index. Default is 2 MB.

func (*TieredMergePolicy) SetForceMergeDeletesPctAllowed ¶

func (tmp *TieredMergePolicy) SetForceMergeDeletesPctAllowed(v float64) *TieredMergePolicy

When forceMergeDeletes is called, we only merge away a segment if its delete percentage is over this threshold. Default is 10%.

func (*TieredMergePolicy) SetMaxMergeAtOnce ¶

func (tmp *TieredMergePolicy) SetMaxMergeAtOnce(v int) *TieredMergePolicy

Maximum number of segments to be merged at a time during "normal" merging. For explicit merging (e.g., forceMerge or forceMergeDeletes was called), see SetMaxMergeAtonceExplicit(). Default is 10.

func (*TieredMergePolicy) SetMaxMergeAtOnceExplicit ¶

func (tmp *TieredMergePolicy) SetMaxMergeAtOnceExplicit(v int) *TieredMergePolicy

Maximum number of segments to be merged at a time, during forceMerge or forceMergeDeletes. Default is 30.

func (*TieredMergePolicy) SetMaxMergedSegmentMB ¶

func (tmp *TieredMergePolicy) SetMaxMergedSegmentMB(v float64) *TieredMergePolicy

Maximum sized segment to produce during normal merging. This setting is approximate: the estimate of the merged segment size is made by summing sizes of to-be-merged segments(compensating for percent deleted docs). Default is 5 GB.

func (*TieredMergePolicy) SetReclaimDeletesWeight ¶

func (tmp *TieredMergePolicy) SetReclaimDeletesWeight(v float64) *TieredMergePolicy

Controls how aggressively merges that reclaim more deletions are favored. Higher values will more aggresively target merges that reclaim deletions, but be careful not to go so high that way too much merging takes place; a value of 3.0 is probably nearly too high. A value of 0.0 means deletions don't impact merge selection.

func (*TieredMergePolicy) SetSegmentsPerTier ¶

func (tmp *TieredMergePolicy) SetSegmentsPerTier(v float64) *TieredMergePolicy

Sets the allowed number of segments per tier. Smaller values mean more merging but fewer segments.

NOTE: this value should be >= the SetMaxMergeAtOnce otherwise you'll force too much merging to occur.

func (*TieredMergePolicy) String ¶

func (tmp *TieredMergePolicy) String() string

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
model

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL