statistics

package

v0.1.6 Latest Latest Go to latest Published: Aug 16, 2022 License: Apache-2.0, Apache-2.0 Imports: 33 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/squareup/pranadb

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func GetIndexPrefixLens(data []byte, numCols int) (prefixLens []int, err error)
func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int
func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, ...) (float64, error)
func ValueToString(vars *variable.SessionVars, value *types.Datum, idxCols int, ...) (string, error)
type Bucket
type CMSketch
- func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64)
- func (c *CMSketch) Copy() *CMSketch
- func (c *CMSketch) Equal(rc *CMSketch) bool
- func (c *CMSketch) GetWidthAndDepth() (int32, int32)
- func (c *CMSketch) InsertBytes(bytes []byte)
- func (c *CMSketch) InsertBytesByCount(bytes []byte, count uint64)
- func (c *CMSketch) MemoryUsage() (sum int64)
- func (c *CMSketch) MergeCMSketch(rc *CMSketch) error
- func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32) error
- func (c *CMSketch) QueryBytes(d []byte) uint64
- func (c *CMSketch) SubValue(h1, h2 uint64, count uint64)
- func (c *CMSketch) TotalCount() uint64
type Column
- func (c *Column) AvgColSize(count int64, isKey bool) float64
- func (c *Column) AvgColSizeChunkFormat(count int64) float64
- func (c *Column) AvgColSizeListInDisk(count int64) float64
- func (c *Column) BetweenRowCount(sc *stmtctx.StatementContext, l, r types.Datum, lowEncoded, highEncoded []byte) float64
- func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, ...) (float64, error)
- func (c *Column) GetIncreaseFactor(totalCount int64) float64
- func (c *Column) IsInvalid(sc *stmtctx.StatementContext, collPseudo bool) bool
- func (c *Column) MemoryUsage() (sum int64)
- func (c *Column) String() string
- func (c *Column) TotalRowCount() float64
type ErrorRate
- func (e *ErrorRate) Merge(rate *ErrorRate)
- func (e *ErrorRate) NotAccurate() bool
- func (e *ErrorRate) Update(rate float64)
type FMSketch
- func (s *FMSketch) Copy() *FMSketch
- func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error
- func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error
- func (s *FMSketch) MemoryUsage() (sum int64)
- func (s *FMSketch) MergeFMSketch(rs *FMSketch)
- func (s *FMSketch) NDV() int64
type HistColl
- func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl
- func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, ...) (size float64)
- func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)
- func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)
- func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, handleInCols bool) (size float64)
- func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl
- func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, statsNodes []*StatsNode) *HistColl
- func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, ...) (float64, []*StatsNode, error)
type Histogram
- func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, ...) *Histogram
- func (hg *Histogram) AddIdxVals(idxValCntPairs []TopNMeta)
- func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)
- func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, count, repeat, ndv int64)
- func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64
- func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64
- func (hg *Histogram) BucketToString(bktID, idxCols int) string
- func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
- func (hg *Histogram) Copy() *Histogram
- func (hg *Histogram) ExtractTopN(cms *CMSketch, topN *TopN, numCols int, numTopN uint32) error
- func (hg *Histogram) GetIncreaseFactor(totalCount int64) float64
- func (hg *Histogram) GetLower(idx int) *types.Datum
- func (hg *Histogram) GetUpper(idx int) *types.Datum
- func (hg *Histogram) IsIndexHist() bool
- func (hg *Histogram) Len() int
- func (hg *Histogram) LessRowCountWithBktIdx(value types.Datum) (float64, int)
- func (hg *Histogram) MemoryUsage() (sum int64)
- func (hg *Histogram) PreCalculateScalar()
- func (hg *Histogram) RemoveUpperBound() *Histogram
- func (hg *Histogram) RemoveVals(valCntPairs []TopNMeta)
- func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)
- func (hg *Histogram) ToString(idxCols int) string
- func (hg *Histogram) TotalRowCount() float64
- func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram
type Index
- func (idx *Index) BetweenRowCount(l, r types.Datum) float64
- func (idx *Index) GetIncreaseFactor(totalCount int64) float64
- func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, indexRanges []*ranger.Range, ...) (float64, error)
- func (idx *Index) IsInvalid(collPseudo bool) bool
- func (idx *Index) MemoryUsage() (sum int64)
- func (idx *Index) QueryBytes(d []byte) uint64
- func (idx *Index) String() string
- func (idx *Index) TotalRowCount() float64
type StatsNode
- func GetUsableSetsByGreedy(nodes []*StatsNode) (newBlocks []*StatsNode)
type Table
- func PseudoTable(tblInfo *model.TableInfo) *Table
- func (t *Table) PseudoAvgCountPerValue() float64
type TopN
- func (c *TopN) AppendTopN(data []byte, count uint64)
- func (c *TopN) BetweenCount(l, r []byte) uint64
- func (c *TopN) Copy() *TopN
- func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error)
- func (c *TopN) Equal(cc *TopN) bool
- func (c *TopN) LowerBound(d []byte) (idx int, match bool)
- func (c *TopN) Num() int
- func (c *TopN) QueryTopN(d []byte) (uint64, bool)
- func (c *TopN) RemoveVal(val []byte)
- func (c *TopN) Sort()
- func (c *TopN) String() string
- func (c *TopN) TotalCount() uint64
type TopNMeta

Constants ¶

View Source

const (
	IndexType = iota
	PkType
	ColType
)

The type of the StatsNode.

View Source

const (
	// PseudoVersion means the pseudo statistics version is 0.
	PseudoVersion uint64 = 0

	// PseudoRowCount export for other pkg to use.
	// When we haven't analyzed a table, we use pseudo statistics to estimate costs.
	// It has row count 10000, equal condition selects 1/1000 of total rows, less condition selects 1/3 of total rows,
	// between condition selects 1/40 of total rows.
	PseudoRowCount = 10000
)

View Source

const MaxErrorRate = 0.25

MaxErrorRate is the max error rate of estimate row count of a not pseudo column. If the table is pseudo, but the average error rate is less than MaxErrorRate, then the column is not pseudo.

Variables ¶

View Source

var HistogramNeededColumns = neededColumnMap{/* contains filtered or unexported fields */}

HistogramNeededColumns stores the columns whose Histograms need to be loaded from physical kv layer. Currently, we only load index/pk's Histogram from kv automatically. Columns' are loaded by needs.

Functions ¶

func GetIndexPrefixLens ¶

func GetIndexPrefixLens(data []byte, numCols int) (prefixLens []int, err error)

GetIndexPrefixLens returns an array representing

func GetOrdinalOfRangeCond ¶

func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int

GetOrdinalOfRangeCond gets the ordinal of the position range condition, if not exist, it returns the end position.

func GetPseudoRowCountByColumnRanges ¶

func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, columnRanges []*ranger.Range, colIdx int) (float64, error)

GetPseudoRowCountByColumnRanges calculate the row count by the ranges if there's no statistics information for this column.

func ValueToString ¶

func ValueToString(vars *variable.SessionVars, value *types.Datum, idxCols int, idxColumnTypes []byte) (string, error)

ValueToString converts a possible encoded value to a formatted string. If the value is encoded, then idxCols equals to number of origin values, else idxCols is 0.

Types ¶

type Bucket ¶

type Bucket struct {
	Count  int64
	Repeat int64
	NDV    int64
}

Bucket store the bucket count and repeat.

type CMSketch ¶

type CMSketch struct {
	// contains filtered or unexported fields
}

CMSketch is used to estimate point queries. Refer: https://en.wikipedia.org/wiki/Count-min_sketch

func (*CMSketch) CalcDefaultValForAnalyze ¶

func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64)

CalcDefaultValForAnalyze calculate the default value for Analyze. The value of it is count / NDV in CMSketch. This means count and NDV are not include topN.

func (*CMSketch) Copy ¶

func (c *CMSketch) Copy() *CMSketch

Copy makes a copy for current CMSketch.

func (*CMSketch) Equal ¶

func (c *CMSketch) Equal(rc *CMSketch) bool

Equal tests if two CM Sketch equal, it is only used for test.

func (*CMSketch) GetWidthAndDepth ¶

func (c *CMSketch) GetWidthAndDepth() (int32, int32)

GetWidthAndDepth returns the width and depth of CM Sketch.

func (*CMSketch) InsertBytes ¶

func (c *CMSketch) InsertBytes(bytes []byte)

InsertBytes inserts the bytes value into the CM Sketch.

func (*CMSketch) InsertBytesByCount ¶

func (c *CMSketch) InsertBytesByCount(bytes []byte, count uint64)

InsertBytesByCount adds the bytes value into the TopN (if value already in TopN) or CM Sketch by delta, this does not updates c.defaultValue.

func (*CMSketch) MemoryUsage ¶

func (c *CMSketch) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a CMSketch. only calc the hashtable size(CMSketch.table) and the CMSketch.topN data are not tracked because size of CMSketch.topN take little influence We ignore the size of other metadata in CMSketch.

func (*CMSketch) MergeCMSketch ¶

func (c *CMSketch) MergeCMSketch(rc *CMSketch) error

MergeCMSketch merges two CM Sketch.

func (*CMSketch) MergeCMSketch4IncrementalAnalyze ¶

func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32) error

MergeCMSketch4IncrementalAnalyze merges two CM Sketch for incremental analyze. Since there is no value that appears partially in `c` and `rc` for incremental analyze, it uses `max` to merge them. Here is a simple proof: when we query from the CM sketch, we use the `min` to get the answer:

(1): For values that only appears in `c, using `max` to merge them affects the `min` query result less than using `sum`;
(2): For values that only appears in `rc`, it is the same as condition (1);
(3): For values that appears both in `c` and `rc`, if they do not appear partially in `c` and `rc`, for example,
     if `v` appears 5 times in the table, it can appears 5 times in `c` and 3 times in `rc`, then `max` also gives the correct answer.

So in fact, if we can know the number of appearances of each value in the first place, it is better to use `max` to construct the CM sketch rather than `sum`.

func (*CMSketch) QueryBytes ¶

func (c *CMSketch) QueryBytes(d []byte) uint64

QueryBytes is used to query the count of specified bytes.

func (*CMSketch) SubValue ¶

func (c *CMSketch) SubValue(h1, h2 uint64, count uint64)

SubValue remove a value from the CMSketch.

func (*CMSketch) TotalCount ¶

func (c *CMSketch) TotalCount() uint64

TotalCount returns the total count in the sketch, it is only used for test.

type Column ¶

type Column struct {
	Histogram
	*CMSketch
	*TopN
	*FMSketch
	PhysicalID int64
	Count      int64
	Info       *model.ColumnInfo
	IsHandle   bool
	ErrorRate
	Flag           int64
	LastAnalyzePos types.Datum
	StatsVer       int64 // StatsVer is the version of the current stats, used to maintain compatibility
}

Column represents a column histogram.

func (*Column) AvgColSize ¶

func (c *Column) AvgColSize(count int64, isKey bool) float64

AvgColSize is the average column size of the histogram. These sizes are derived from function `encode` and `Datum::ConvertTo`, so we need to update them if those 2 functions are changed.

func (*Column) AvgColSizeChunkFormat ¶

func (c *Column) AvgColSizeChunkFormat(count int64) float64

AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode` and `DecodeToChunk`, so we need to update them if those 2 functions are changed.

func (*Column) AvgColSizeListInDisk ¶

func (c *Column) AvgColSizeListInDisk(count int64) float64

AvgColSizeListInDisk is the average column size of the histogram. These sizes are derived from `chunk.ListInDisk` so we need to update them if those 2 functions are changed.

func (*Column) BetweenRowCount ¶

func (c *Column) BetweenRowCount(sc *stmtctx.StatementContext, l, r types.Datum, lowEncoded, highEncoded []byte) float64

BetweenRowCount estimates the row count for interval [l, r).

func (*Column) GetColumnRowCount ¶

func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error)

GetColumnRowCount estimates the row count by a slice of Range.

func (*Column) GetIncreaseFactor ¶

func (c *Column) GetIncreaseFactor(totalCount int64) float64

GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified.

func (*Column) IsInvalid ¶

func (c *Column) IsInvalid(sc *stmtctx.StatementContext, collPseudo bool) bool

IsInvalid checks if this column is invalid. If this column has histogram but not loaded yet, then we mark it as need histogram.

func (*Column) MemoryUsage ¶

func (c *Column) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of Histogram and CMSketch in Column. We ignore the size of other metadata in Column

func (*Column) String ¶

func (c *Column) String() string

func (*Column) TotalRowCount ¶

func (c *Column) TotalRowCount() float64

TotalRowCount returns the total count of this column.

type ErrorRate ¶

type ErrorRate struct {
	ErrorTotal float64
	QueryTotal int64
}

ErrorRate is the error rate of estimate row count by bucket and cm sketch.

func (*ErrorRate) Merge ¶

func (e *ErrorRate) Merge(rate *ErrorRate)

Merge range merges two ErrorRate.

func (*ErrorRate) NotAccurate ¶

func (e *ErrorRate) NotAccurate() bool

NotAccurate is true when the total of query is zero or the average error rate is greater than MaxErrorRate.

func (*ErrorRate) Update ¶

func (e *ErrorRate) Update(rate float64)

Update updates the ErrorRate.

type FMSketch ¶

type FMSketch struct {
	// contains filtered or unexported fields
}

FMSketch is used to count the number of distinct elements in a set.

func (*FMSketch) Copy ¶

func (s *FMSketch) Copy() *FMSketch

Copy makes a copy for current FMSketch.

func (*FMSketch) InsertRowValue ¶

func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error

InsertRowValue inserts multi-column values to the sketch.

func (*FMSketch) InsertValue ¶

func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error

InsertValue inserts a value into the FM sketch.

func (*FMSketch) MemoryUsage ¶

func (s *FMSketch) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a FMSketch.

func (*FMSketch) MergeFMSketch ¶

func (s *FMSketch) MergeFMSketch(rs *FMSketch)

MergeFMSketch merges two FM Sketch.

func (*FMSketch) NDV ¶

func (s *FMSketch) NDV() int64

NDV returns the ndv of the sketch.

type HistColl ¶

type HistColl struct {
	PhysicalID int64
	Columns    map[int64]*Column
	Indices    map[int64]*Index
	// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
	Idx2ColumnIDs map[int64][]int64
	// ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner.
	ColID2IdxID map[int64]int64
	Count       int64
	ModifyCount int64 // Total modify count in a table.

	// HavePhysicalID is true means this HistColl is from single table and have its ID's information.
	// The physical id is used when try to load column stats from storage.
	HavePhysicalID bool
	Pseudo         bool
}

HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.

func (*HistColl) GenerateHistCollFromColumnInfo ¶

func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl

GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter.

func (*HistColl) GetAvgRowSize ¶

func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, isForScan bool) (size float64)

GetAvgRowSize computes average row size for given columns.

func (*HistColl) GetAvgRowSizeListInDisk ¶

func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)

GetAvgRowSizeListInDisk computes average row size for given columns.

func (*HistColl) GetIndexAvgRowSize ¶

func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)

GetIndexAvgRowSize computes average row size for a index scan.

func (*HistColl) GetRowCountByColumnRanges ¶

func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)

GetRowCountByColumnRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIndexRanges ¶

func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)

GetRowCountByIndexRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIntColumnRanges ¶

func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)

GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.

func (*HistColl) GetTableAvgRowSize ¶

func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, handleInCols bool) (size float64)

GetTableAvgRowSize computes average row size for a table scan, exclude the index key-value pairs.

func (*HistColl) ID2UniqueID ¶

func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl

ID2UniqueID generates a new HistColl whose `Columns` is built from UniqueID of given columns.

func (*HistColl) NewHistCollBySelectivity ¶

func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, statsNodes []*StatsNode) *HistColl

NewHistCollBySelectivity creates new HistColl by the given statsNodes.

func (*HistColl) Selectivity ¶

func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, filledPaths []*planutil.AccessPath) (float64, []*StatsNode, error)

Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. Currently the time complexity is o(n^2).

type Histogram ¶

type Histogram struct {
	ID        int64 // Column ID.
	NDV       int64 // Number of distinct values.
	NullCount int64 // Number of null values.
	// LastUpdateVersion is the version that this histogram updated last time.
	LastUpdateVersion uint64

	Tp *types.FieldType

	// Histogram elements.
	//
	// A bucket bound is the smallest and greatest values stored in the bucket. The lower and upper bound
	// are stored in one column.
	//
	// A bucket count is the number of items stored in all previous buckets and the current bucket.
	// Bucket counts are always in increasing order.
	//
	// A bucket repeat is the number of repeats of the bucket value, it can be used to find popular values.
	Bounds  *chunk.Chunk
	Buckets []Bucket

	// TotColSize is the total column size for the histogram.
	// For unfixed-len types, it includes LEN and BYTE.
	TotColSize int64

	// Correlation is the statistical correlation between physical row ordering and logical ordering of
	// the column values. This ranges from -1 to +1, and it is only valid for Column histogram, not for
	// Index histogram.
	Correlation float64
	// contains filtered or unexported fields
}

Histogram represents statistics for a column or index.

func NewHistogram ¶

func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, totColSize int64) *Histogram

NewHistogram creates a new histogram.

func (*Histogram) AddIdxVals ¶

func (hg *Histogram) AddIdxVals(idxValCntPairs []TopNMeta)

AddIdxVals adds the given values to the histogram.

func (*Histogram) AppendBucket ¶

func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)

AppendBucket appends a bucket into `hg`.

func (*Histogram) AppendBucketWithNDV ¶

func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, count, repeat, ndv int64)

AppendBucketWithNDV appends a bucket into `hg` and set value for field `NDV`.

func (*Histogram) AvgCountPerNotNullValue ¶

func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64

AvgCountPerNotNullValue gets the average row count per value by the data of histogram.

func (*Histogram) BetweenRowCount ¶

func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64

BetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Histogram) BucketToString ¶

func (hg *Histogram) BucketToString(bktID, idxCols int) string

BucketToString change the given bucket to string format.

func (*Histogram) ConvertTo ¶

func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)

ConvertTo converts the histogram bucket values into `Tp`.

func (*Histogram) Copy ¶

func (hg *Histogram) Copy() *Histogram

Copy deep copies the histogram.

func (*Histogram) ExtractTopN ¶

func (hg *Histogram) ExtractTopN(cms *CMSketch, topN *TopN, numCols int, numTopN uint32) error

ExtractTopN extracts topn from histogram.

func (*Histogram) GetIncreaseFactor ¶

func (hg *Histogram) GetIncreaseFactor(totalCount int64) float64

GetIncreaseFactor will return a factor of data increasing after the last analysis.

func (*Histogram) GetLower ¶

func (hg *Histogram) GetLower(idx int) *types.Datum

GetLower gets the lower bound of bucket `idx`.

func (*Histogram) GetUpper ¶

func (hg *Histogram) GetUpper(idx int) *types.Datum

GetUpper gets the upper bound of bucket `idx`.

func (*Histogram) IsIndexHist ¶

func (hg *Histogram) IsIndexHist() bool

IsIndexHist checks whether current histogram is one for index.

func (*Histogram) Len ¶

func (hg *Histogram) Len() int

Len is the number of buckets in the histogram.

func (*Histogram) LessRowCountWithBktIdx ¶

func (hg *Histogram) LessRowCountWithBktIdx(value types.Datum) (float64, int)

LessRowCountWithBktIdx estimates the row count where the column less than value.

func (*Histogram) MemoryUsage ¶

func (hg *Histogram) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of this Histogram. everytime changed the Histogram of the table, it will cost O(n) complexity so calculate the memoryUsage might cost little time. We ignore the size of other metadata in Histogram.

func (*Histogram) PreCalculateScalar ¶

func (hg *Histogram) PreCalculateScalar()

PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also calculate their common prefix length, because when a value falls between lower and upper, the common prefix of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`, we do not convert it because we can directly infer the scalar value.

func (*Histogram) RemoveUpperBound ¶

func (hg *Histogram) RemoveUpperBound() *Histogram

RemoveUpperBound removes the upper bound from histogram. It is used when merge stats for incremental analyze.

func (*Histogram) RemoveVals ¶

func (hg *Histogram) RemoveVals(valCntPairs []TopNMeta)

RemoveVals remove the given values from the histogram. This function contains an **ASSUMPTION**: valCntPairs is sorted in ascending order.

func (*Histogram) SplitRange ¶

func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)

SplitRange splits the range according to the histogram lower bound. Note that we treat first bucket's lower bound as -inf and last bucket's upper bound as +inf, so all the split ranges will totally fall in one of the (-inf, l(1)), [l(1), l(2)),...[l(n-2), l(n-1)), [l(n-1), +inf), where n is the number of buckets, l(i) is the i-th bucket's lower bound.

func (*Histogram) ToString ¶

func (hg *Histogram) ToString(idxCols int) string

ToString gets the string representation for the histogram.

func (*Histogram) TotalRowCount ¶

func (hg *Histogram) TotalRowCount() float64

TotalRowCount returns the total count of this histogram.

func (*Histogram) TruncateHistogram ¶

func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram

TruncateHistogram truncates the histogram to `numBkt` buckets.

type Index ¶

type Index struct {
	Histogram
	*CMSketch
	*TopN
	FMSketch *FMSketch
	ErrorRate
	StatsVer       int64 // StatsVer is the version of the current stats, used to maintain compatibility
	Info           *model.IndexInfo
	Flag           int64
	LastAnalyzePos types.Datum
}

Index represents an index histogram.

func (*Index) BetweenRowCount ¶

func (idx *Index) BetweenRowCount(l, r types.Datum) float64

BetweenRowCount estimates the row count for interval [l, r).

func (*Index) GetIncreaseFactor ¶

func (idx *Index) GetIncreaseFactor(totalCount int64) float64

GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified.

func (*Index) GetRowCount ¶

func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, indexRanges []*ranger.Range, modifyCount int64) (float64, error)

GetRowCount returns the row count of the given ranges. It uses the modifyCount to adjust the influence of modifications on the table.

func (*Index) IsInvalid ¶

func (idx *Index) IsInvalid(collPseudo bool) bool

IsInvalid checks if this index is invalid.

func (*Index) MemoryUsage ¶

func (idx *Index) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a Histogram and CMSketch in Index. We ignore the size of other metadata in Index.

func (*Index) QueryBytes ¶

func (idx *Index) QueryBytes(d []byte) uint64

QueryBytes is used to query the count of specified bytes.

func (*Index) String ¶

func (idx *Index) String() string

func (*Index) TotalRowCount ¶

func (idx *Index) TotalRowCount() float64

TotalRowCount returns the total count of this index.

type StatsNode ¶

type StatsNode struct {
	Tp int
	ID int64

	// Ranges contains all the Ranges we got.
	Ranges []*ranger.Range
	// Selectivity indicates the Selectivity of this column/index.
	Selectivity float64
	// contains filtered or unexported fields
}

StatsNode is used for calculating selectivity.

func GetUsableSetsByGreedy ¶

func GetUsableSetsByGreedy(nodes []*StatsNode) (newBlocks []*StatsNode)

GetUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.

type Table ¶

type Table struct {
	HistColl
	Version uint64
}

Table represents statistics for a table.

func PseudoTable ¶

func PseudoTable(tblInfo *model.TableInfo) *Table

PseudoTable creates a pseudo table statistics.

func (*Table) PseudoAvgCountPerValue ¶

func (t *Table) PseudoAvgCountPerValue() float64

PseudoAvgCountPerValue gets a pseudo average count if histogram not exists.

type TopN ¶

type TopN struct {
	TopN []TopNMeta
}

TopN stores most-common values, which is used to estimate point queries.

func (*TopN) AppendTopN ¶

func (c *TopN) AppendTopN(data []byte, count uint64)

AppendTopN appends a topn into the TopN struct.

func (*TopN) BetweenCount ¶

func (c *TopN) BetweenCount(l, r []byte) uint64

BetweenCount estimates the row count for interval [l, r).

func (*TopN) Copy ¶

func (c *TopN) Copy() *TopN

Copy makes a copy for current TopN.

func (*TopN) DecodedString ¶

func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error)

DecodedString returns the value with decoded result.

func (*TopN) Equal ¶

func (c *TopN) Equal(cc *TopN) bool

Equal checks whether the two TopN are equal.

func (*TopN) LowerBound ¶

func (c *TopN) LowerBound(d []byte) (idx int, match bool)

LowerBound searches on the sorted top-n items, returns the smallest index i such that the value at element i is not less than `d`.

func (*TopN) Num ¶

func (c *TopN) Num() int

Num returns the ndv of the TopN.

TopN is declared directly in Histogram. So the Len is occupied by the Histogram. We use Num instead.

func (*TopN) QueryTopN ¶

func (c *TopN) QueryTopN(d []byte) (uint64, bool)

QueryTopN returns the results for (h1, h2) in murmur3.Sum128(), if not exists, return (0, false).

func (*TopN) RemoveVal ¶

func (c *TopN) RemoveVal(val []byte)

RemoveVal remove the val from TopN if it exists.

func (*TopN) Sort ¶

func (c *TopN) Sort()

Sort sorts the topn items.

func (*TopN) String ¶

func (c *TopN) String() string

func (*TopN) TotalCount ¶

func (c *TopN) TotalCount() uint64

TotalCount returns how many data is stored in TopN.

type TopNMeta ¶

type TopNMeta struct {
	Encoded []byte
	Count   uint64
}

TopNMeta stores the unit of the TopN.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL