tidb: github.com/pingcap/tidb/statistics Index | Files | Directories

package statistics

import "github.com/pingcap/tidb/statistics"


Package Files

analyze_jobs.go builder.go cmsketch.go estimate.go feedback.go fmsketch.go histogram.go sample.go scalar.go selectivity.go table.go


const (
    CurStatsVersion = Version1
    Version1        = 1

constants for stats version. These const can be used for solving compatibility issue.

const (
    IndexType = iota

The type of the StatsNode.

const (
    // PseudoVersion means the pseudo statistics version is 0.
    PseudoVersion uint64 = 0

    // PseudoRowCount export for other pkg to use.
    // When we haven't analyzed a table, we use pseudo statistics to estimate costs.
    // It has row count 10000, equal condition selects 1/1000 of total rows, less condition selects 1/3 of total rows,
    // between condition selects 1/40 of total rows.
    PseudoRowCount = 10000
const AnalyzeFlag = 1

AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback.

const MaxErrorRate = 0.25

MaxErrorRate is the max error rate of estimate row count of a not pseudo column. If the table is pseudo, but the average error rate is less than MaxErrorRate, then the column is not pseudo.


var (
    // MaxNumberOfRanges is the max number of ranges before split to collect feedback.
    MaxNumberOfRanges = 20
    // FeedbackProbability is the probability to collect the feedback.
    FeedbackProbability = atomic.NewFloat64(0)
var HistogramNeededColumns = neededColumnMap{/* contains filtered or unexported fields */}

HistogramNeededColumns stores the columns whose Histograms need to be loaded from physical kv layer. Currently, we only load index/pk's Histogram from kv automatically. Columns' are loaded by needs.

var RatioOfPseudoEstimate = atomic.NewFloat64(0.7)

RatioOfPseudoEstimate means if modifyCount / statsTblCount is greater than this ratio, we think the stats is invalid and use pseudo estimation.

func AddNewAnalyzeJob Uses

func AddNewAnalyzeJob(job *AnalyzeJob)

AddNewAnalyzeJob adds new analyze job.

func CMSketchToProto Uses

func CMSketchToProto(c *CMSketch) *tipb.CMSketch

CMSketchToProto converts CMSketch to its protobuf representation.

func ClearHistoryJobs Uses

func ClearHistoryJobs()

ClearHistoryJobs clears all history jobs.

func ConvertDatumsType Uses

func ConvertDatumsType(vals []types.Datum, ft *types.FieldType, loc *time.Location) error

ConvertDatumsType converts the datums type to `ft`.

func DecodeFeedback Uses

func DecodeFeedback(val []byte, q *QueryFeedback, c *CMSketch, ft *types.FieldType) error

DecodeFeedback decodes a byte slice to feedback.

func EncodeCMSketchWithoutTopN Uses

func EncodeCMSketchWithoutTopN(c *CMSketch) ([]byte, error)

EncodeCMSketchWithoutTopN encodes the given CMSketch to byte slice. Note that it does not include the topN.

func EncodeFeedback Uses

func EncodeFeedback(q *QueryFeedback) ([]byte, error)

EncodeFeedback encodes the given feedback to byte slice.

func FMSketchToProto Uses

func FMSketchToProto(s *FMSketch) *tipb.FMSketch

FMSketchToProto converts FMSketch to its protobuf representation.

func GetOrdinalOfRangeCond Uses

func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int

GetOrdinalOfRangeCond gets the ordinal of the position range condition, if not exist, it returns the end position.

func GetPseudoRowCountByColumnRanges Uses

func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, columnRanges []*ranger.Range, colIdx int) (float64, error)

GetPseudoRowCountByColumnRanges calculate the row count by the ranges if there's no statistics information for this column.

func HistogramEqual Uses

func HistogramEqual(a, b *Histogram, ignoreID bool) bool

HistogramEqual tests if two histograms are equal.

func HistogramToProto Uses

func HistogramToProto(hg *Histogram) *tipb.Histogram

HistogramToProto converts Histogram to its protobuf representation. Note that when this is used, the lower/upper bound in the bucket must be BytesDatum.

func IsAnalyzed Uses

func IsAnalyzed(flag int64) bool

IsAnalyzed checks whether this flag contains AnalyzeFlag.

func MoveToHistory Uses

func MoveToHistory(job *AnalyzeJob)

MoveToHistory moves the analyze job to history.

func ResetAnalyzeFlag Uses

func ResetAnalyzeFlag(flag int64) int64

ResetAnalyzeFlag resets the AnalyzeFlag because it has been modified by feedback.

func RowToDatums Uses

func RowToDatums(row chunk.Row, fields []*ast.ResultField) []types.Datum

RowToDatums converts row to datum slice.

func SampleCollectorToProto Uses

func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector

SampleCollectorToProto converts SampleCollector to its protobuf representation.

func SortSampleItems Uses

func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) error

SortSampleItems sorts a slice of SampleItem.

func SplitFeedbackByQueryType Uses

func SplitFeedbackByQueryType(feedbacks []Feedback) ([]Feedback, []Feedback)

SplitFeedbackByQueryType splits the feedbacks into equality feedbacks and range feedbacks.

func SupportColumnType Uses

func SupportColumnType(ft *types.FieldType) bool

SupportColumnType checks if the type of the column can be updated by feedback.

func ValueToString Uses

func ValueToString(value *types.Datum, idxCols int) (string, error)

ValueToString converts a possible encoded value to a formatted string. If the value is encoded, then idxCols equals to number of origin values, else idxCols is 0.

type AnalyzeJob Uses

type AnalyzeJob struct {
    DBName        string
    TableName     string
    PartitionName string
    JobInfo       string
    RowCount      int64
    StartTime     time.Time
    State         string
    // contains filtered or unexported fields

AnalyzeJob is used to represent the status of one analyze job.

func GetAllAnalyzeJobs Uses

func GetAllAnalyzeJobs() []*AnalyzeJob

GetAllAnalyzeJobs gets all analyze jobs.

func (*AnalyzeJob) Finish Uses

func (job *AnalyzeJob) Finish(meetError bool)

Finish update the status of analyze job to finished or failed according to `meetError`.

func (*AnalyzeJob) Start Uses

func (job *AnalyzeJob) Start()

Start marks status of the analyze job as running and update the start time.

func (*AnalyzeJob) Update Uses

func (job *AnalyzeJob) Update(rowCount int64)

Update updates the row count of analyze job.

type Bucket Uses

type Bucket struct {
    Count  int64
    Repeat int64

Bucket store the bucket count and repeat.

type BucketFeedback Uses

type BucketFeedback struct {
    // contains filtered or unexported fields

BucketFeedback stands for all the feedback for a bucket.

type CMSketch Uses

type CMSketch struct {
    // contains filtered or unexported fields

CMSketch is used to estimate point queries. Refer: https://en.wikipedia.org/wiki/Count-min_sketch

func CMSketchFromProto Uses

func CMSketchFromProto(protoSketch *tipb.CMSketch) *CMSketch

CMSketchFromProto converts CMSketch from its protobuf representation.

func DecodeCMSketch Uses

func DecodeCMSketch(data []byte, topNRows []chunk.Row) (*CMSketch, error)

DecodeCMSketch decode a CMSketch from the given byte slice.

func NewCMSketch Uses

func NewCMSketch(d, w int32) *CMSketch

NewCMSketch returns a new CM sketch.

func NewCMSketchWithTopN Uses

func NewCMSketchWithTopN(d, w int32, sample [][]byte, numTop uint32, rowCount uint64) (*CMSketch, uint64, uint64)

NewCMSketchWithTopN returns a new CM sketch with TopN elements, the estimate NDV and the scale ratio.

func UpdateCMSketch Uses

func UpdateCMSketch(c *CMSketch, eqFeedbacks []Feedback) *CMSketch

UpdateCMSketch updates the CMSketch by feedback.

func (*CMSketch) AppendTopN Uses

func (c *CMSketch) AppendTopN(data []byte, count uint64)

AppendTopN appends a topn into the cm sketch.

func (*CMSketch) Copy Uses

func (c *CMSketch) Copy() *CMSketch

Copy makes a copy for current CMSketch.

func (*CMSketch) Equal Uses

func (c *CMSketch) Equal(rc *CMSketch) bool

Equal tests if two CM Sketch equal, it is only used for test.

func (*CMSketch) GetWidthAndDepth Uses

func (c *CMSketch) GetWidthAndDepth() (int32, int32)

GetWidthAndDepth returns the width and depth of CM Sketch.

func (*CMSketch) InsertBytes Uses

func (c *CMSketch) InsertBytes(bytes []byte)

InsertBytes inserts the bytes value into the CM Sketch.

func (*CMSketch) MergeCMSketch Uses

func (c *CMSketch) MergeCMSketch(rc *CMSketch, numTopN uint32) error

MergeCMSketch merges two CM Sketch.

func (*CMSketch) MergeCMSketch4IncrementalAnalyze Uses

func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32) error

MergeCMSketch4IncrementalAnalyze merges two CM Sketch for incremental analyze. Since there is no value that appears partially in `c` and `rc` for incremental analyze, it uses `max` to merge them. Here is a simple proof: when we query from the CM sketch, we use the `min` to get the answer:

(1): For values that only appears in `c, using `max` to merge them affects the `min` query result less than using `sum`;
(2): For values that only appears in `rc`, it is the same as condition (1);
(3): For values that appears both in `c` and `rc`, if they do not appear partially in `c` and `rc`, for example,
     if `v` appears 5 times in the table, it can appears 5 times in `c` and 3 times in `rc`, then `max` also gives the correct answer.

So in fact, if we can know the number of appearances of each value in the first place, it is better to use `max` to construct the CM sketch rather than `sum`.

func (*CMSketch) QueryBytes Uses

func (c *CMSketch) QueryBytes(d []byte) uint64

QueryBytes is used to query the count of specified bytes.

func (*CMSketch) QueryTopN Uses

func (c *CMSketch) QueryTopN(h1, h2 uint64, d []byte) (uint64, bool)

QueryTopN returns the results for (h1, h2) in murmur3.Sum128(), if not exists, return (0, false).

func (*CMSketch) TopN Uses

func (c *CMSketch) TopN() []*TopNMeta

TopN gets all the topN meta.

func (*CMSketch) TopNMap Uses

func (c *CMSketch) TopNMap() map[uint64][]*TopNMeta

TopNMap gets the origin topN map.

func (*CMSketch) TotalCount Uses

func (c *CMSketch) TotalCount() uint64

TotalCount returns the total count in the sketch, it is only used for test.

type Column Uses

type Column struct {
    PhysicalID int64
    Count      int64
    Info       *model.ColumnInfo
    IsHandle   bool
    Flag           int64
    LastAnalyzePos types.Datum

Column represents a column histogram.

func (*Column) AvgColSize Uses

func (c *Column) AvgColSize(count int64, isKey bool) float64

AvgColSize is the average column size of the histogram. These sizes are derived from function `encode` and `Datum::ConvertTo`, so we need to update them if those 2 functions are changed.

func (*Column) AvgColSizeChunkFormat Uses

func (c *Column) AvgColSizeChunkFormat(count int64) float64

AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode` and `DecodeToChunk`, so we need to update them if those 2 functions are changed.

func (*Column) AvgColSizeListInDisk Uses

func (c *Column) AvgColSizeListInDisk(count int64) float64

AvgColSizeListInDisk is the average column size of the histogram. These sizes are derived from `chunk.ListInDisk` so we need to update them if those 2 functions are changed.

func (*Column) GetColumnRowCount Uses

func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error)

GetColumnRowCount estimates the row count by a slice of Range.

func (*Column) IsInvalid Uses

func (c *Column) IsInvalid(sc *stmtctx.StatementContext, collPseudo bool) bool

IsInvalid checks if this column is invalid. If this column has histogram but not loaded yet, then we mark it as need histogram.

func (*Column) String Uses

func (c *Column) String() string

type ErrorRate Uses

type ErrorRate struct {
    ErrorTotal float64
    QueryTotal int64

ErrorRate is the error rate of estimate row count by bucket and cm sketch.

func (*ErrorRate) Merge Uses

func (e *ErrorRate) Merge(rate *ErrorRate)

Merge range merges two ErrorRate.

func (*ErrorRate) NotAccurate Uses

func (e *ErrorRate) NotAccurate() bool

NotAccurate is true when the total of query is zero or the average error rate is greater than MaxErrorRate.

func (*ErrorRate) Update Uses

func (e *ErrorRate) Update(rate float64)

Update updates the ErrorRate.

type FMSketch Uses

type FMSketch struct {
    // contains filtered or unexported fields

FMSketch is used to count the number of distinct elements in a set.

func FMSketchFromProto Uses

func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch

FMSketchFromProto converts FMSketch from its protobuf representation.

func NewFMSketch Uses

func NewFMSketch(maxSize int) *FMSketch

NewFMSketch returns a new FM sketch.

func (*FMSketch) InsertValue Uses

func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error

InsertValue inserts a value into the FM sketch.

func (*FMSketch) NDV Uses

func (s *FMSketch) NDV() int64

NDV returns the ndv of the sketch.

type Feedback Uses

type Feedback struct {
    Lower  *types.Datum
    Upper  *types.Datum
    Count  int64
    Repeat int64

Feedback represents the total scan count in range [lower, upper).

type HistColl Uses

type HistColl struct {
    PhysicalID int64
    Columns    map[int64]*Column
    Indices    map[int64]*Index
    // Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
    Idx2ColumnIDs map[int64][]int64
    // ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner.
    ColID2IdxID map[int64]int64
    Count       int64
    ModifyCount int64 // Total modify count in a table.

    // HavePhysicalID is true means this HistColl is from single table and have its ID's information.
    // The physical id is used when try to load column stats from storage.
    HavePhysicalID bool
    Pseudo         bool

HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.

func (*HistColl) GenerateHistCollFromColumnInfo Uses

func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl

GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter.

func (*HistColl) GetAvgRowSize Uses

func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, isForScan bool) (size float64)

GetAvgRowSize computes average row size for given columns.

func (*HistColl) GetAvgRowSizeListInDisk Uses

func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)

GetAvgRowSizeListInDisk computes average row size for given columns.

func (*HistColl) GetIndexAvgRowSize Uses

func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)

GetIndexAvgRowSize computes average row size for a index scan.

func (*HistColl) GetRowCountByColumnRanges Uses

func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)

GetRowCountByColumnRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIndexRanges Uses

func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)

GetRowCountByIndexRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIntColumnRanges Uses

func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)

GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.

func (*HistColl) GetTableAvgRowSize Uses

func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, storeType kv.StoreType, handleInCols bool) (size float64)

GetTableAvgRowSize computes average row size for a table scan, exclude the index key-value pairs.

func (*HistColl) ID2UniqueID Uses

func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl

ID2UniqueID generates a new HistColl whose `Columns` is built from UniqueID of given columns.

func (*HistColl) NewHistCollBySelectivity Uses

func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, statsNodes []*StatsNode) *HistColl

NewHistCollBySelectivity creates new HistColl by the given statsNodes.

func (*HistColl) Selectivity Uses

func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, filledPaths []*planutil.AccessPath) (float64, []*StatsNode, error)

Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. Currently the time complexity is o(n^2).

type Histogram Uses

type Histogram struct {
    ID        int64 // Column ID.
    NDV       int64 // Number of distinct values.
    NullCount int64 // Number of null values.
    // LastUpdateVersion is the version that this histogram updated last time.
    LastUpdateVersion uint64

    Tp  *types.FieldType

    // Histogram elements.
    // A bucket bound is the smallest and greatest values stored in the bucket. The lower and upper bound
    // are stored in one column.
    // A bucket count is the number of items stored in all previous buckets and the current bucket.
    // Bucket counts are always in increasing order.
    // A bucket repeat is the number of repeats of the bucket value, it can be used to find popular values.
    Bounds  *chunk.Chunk
    Buckets []Bucket

    // TotColSize is the total column size for the histogram.
    // For unfixed-len types, it includes LEN and BYTE.
    TotColSize int64

    // Correlation is the statistical correlation between physical row ordering and logical ordering of
    // the column values. This ranges from -1 to +1, and it is only valid for Column histogram, not for
    // Index histogram.
    Correlation float64
    // contains filtered or unexported fields

Histogram represents statistics for a column or index.

func BuildColumn Uses

func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType) (*Histogram, error)

BuildColumn builds histogram from samples for column.

func BuildColumnHist Uses

func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType, count int64, ndv int64, nullCount int64) (*Histogram, error)

BuildColumnHist build a histogram for a column. numBuckets: number of buckets for the histogram. id: the id of the table. collector: the collector of samples. tp: the FieldType for the column. count: represents the row count for the column. ndv: represents the number of distinct values for the column. nullCount: represents the number of null values for the column.

func HistogramFromProto Uses

func HistogramFromProto(protoHg *tipb.Histogram) *Histogram

HistogramFromProto converts Histogram from its protobuf representation. Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will be after all histograms merged.

func MergeHistograms Uses

func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)

MergeHistograms merges two histograms.

func NewHistogram Uses

func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, totColSize int64) *Histogram

NewHistogram creates a new histogram.

func UpdateHistogram Uses

func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram

UpdateHistogram updates the histogram according buckets.

func (*Histogram) AppendBucket Uses

func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)

AppendBucket appends a bucket into `hg`.

func (*Histogram) AvgCountPerNotNullValue Uses

func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64

AvgCountPerNotNullValue gets the average row count per value by the data of histogram.

func (*Histogram) BetweenRowCount Uses

func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64

BetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Histogram) BucketToString Uses

func (hg *Histogram) BucketToString(bktID, idxCols int) string

BucketToString change the given bucket to string format.

func (*Histogram) ConvertTo Uses

func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)

ConvertTo converts the histogram bucket values into `Tp`.

func (*Histogram) Copy Uses

func (hg *Histogram) Copy() *Histogram

Copy deep copies the histogram.

func (*Histogram) DecodeTo Uses

func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error

DecodeTo decodes the histogram bucket values into `Tp`.

func (*Histogram) ExtractTopN Uses

func (hg *Histogram) ExtractTopN(cms *CMSketch, numCols int, numTopN uint32) error

ExtractTopN extracts topn from histogram.

func (*Histogram) GetIncreaseFactor Uses

func (hg *Histogram) GetIncreaseFactor(totalCount int64) float64

GetIncreaseFactor will return a factor of data increasing after the last analysis.

func (*Histogram) GetLower Uses

func (hg *Histogram) GetLower(idx int) *types.Datum

GetLower gets the lower bound of bucket `idx`.

func (*Histogram) GetUpper Uses

func (hg *Histogram) GetUpper(idx int) *types.Datum

GetUpper gets the upper bound of bucket `idx`.

func (*Histogram) IsIndexHist Uses

func (hg *Histogram) IsIndexHist() bool

IsIndexHist checks whether current histogram is one for index.

func (*Histogram) Len Uses

func (hg *Histogram) Len() int

Len is the number of buckets in the histogram.

func (*Histogram) LessRowCountWithBktIdx Uses

func (hg *Histogram) LessRowCountWithBktIdx(value types.Datum) (float64, int)

LessRowCountWithBktIdx estimates the row count where the column less than value.

func (*Histogram) PreCalculateScalar Uses

func (hg *Histogram) PreCalculateScalar()

PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also calculate their common prefix length, because when a value falls between lower and upper, the common prefix of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`, we do not convert it because we can directly infer the scalar value.

func (*Histogram) RemoveUpperBound Uses

func (hg *Histogram) RemoveUpperBound() *Histogram

RemoveUpperBound removes the upper bound from histogram. It is used when merge stats for incremental analyze.

func (*Histogram) SplitRange Uses

func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)

SplitRange splits the range according to the histogram lower bound. Note that we treat first bucket's lower bound as -inf and last bucket's upper bound as +inf, so all the split ranges will totally fall in one of the (-inf, l(1)), [l(1), l(2)),...[l(n-2), l(n-1)), [l(n-1), +inf), where n is the number of buckets, l(i) is the i-th bucket's lower bound.

func (*Histogram) ToString Uses

func (hg *Histogram) ToString(idxCols int) string

ToString gets the string representation for the histogram.

func (*Histogram) TotalRowCount Uses

func (hg *Histogram) TotalRowCount() float64

TotalRowCount returns the total count of this histogram.

func (*Histogram) TruncateHistogram Uses

func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram

TruncateHistogram truncates the histogram to `numBkt` buckets.

type Index Uses

type Index struct {
    StatsVer       int64 // StatsVer is the version of the current stats, used to maintain compatibility
    Info           *model.IndexInfo
    Flag           int64
    LastAnalyzePos types.Datum

Index represents an index histogram.

func (*Index) GetRowCount Uses

func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*ranger.Range, modifyCount int64) (float64, error)

GetRowCount returns the row count of the given ranges. It uses the modifyCount to adjust the influence of modifications on the table.

func (*Index) IsInvalid Uses

func (idx *Index) IsInvalid(collPseudo bool) bool

IsInvalid checks if this index is invalid.

func (*Index) String Uses

func (idx *Index) String() string

type QueryFeedback Uses

type QueryFeedback struct {
    PhysicalID int64
    Hist       *Histogram
    Tp         int
    Feedback   []Feedback
    Expected   int64 // Expected is the Expected scan count of corresponding query.

    Valid bool // Valid represents the whether this query feedback is still Valid.
    // contains filtered or unexported fields

QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows in each range.

func NewQueryFeedback Uses

func NewQueryFeedback(physicalID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback

NewQueryFeedback returns a new query feedback.

func (*QueryFeedback) Actual Uses

func (q *QueryFeedback) Actual() int64

Actual gets the actual row count.

func (*QueryFeedback) CalcErrorRate Uses

func (q *QueryFeedback) CalcErrorRate() float64

CalcErrorRate calculates the error rate the current QueryFeedback.

func (*QueryFeedback) CollectFeedback Uses

func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool

CollectFeedback decides whether to collect the feedback. It returns false when: 1: the histogram is nil or has no buckets; 2: the number of scan ranges exceeds the limit because it may affect the performance; 3: it does not pass the probabilistic sampler.

func (*QueryFeedback) DecodeIntValues Uses

func (q *QueryFeedback) DecodeIntValues() *QueryFeedback

DecodeIntValues is called when the current Feedback stores encoded int values.

func (*QueryFeedback) DecodeToRanges Uses

func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)

DecodeToRanges decode the feedback to ranges.

func (*QueryFeedback) Invalidate Uses

func (q *QueryFeedback) Invalidate()

Invalidate is used to invalidate the query feedback.

func (*QueryFeedback) StoreRanges Uses

func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)

StoreRanges stores the ranges for update.

func (*QueryFeedback) Update Uses

func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)

Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find the range for update. `counts` is the scan counts of each range, used to update the feedback count info.

type SampleBuilder Uses

type SampleBuilder struct {
    Sc              *stmtctx.StatementContext
    RecordSet       sqlexec.RecordSet
    ColLen          int // ColLen is the number of columns need to be sampled.
    PkBuilder       *SortedBuilder
    MaxBucketSize   int64
    MaxSampleSize   int64
    MaxFMSketchSize int64
    CMSketchDepth   int32
    CMSketchWidth   int32

SampleBuilder is used to build samples for columns. Also, if primary key is handle, it will directly build histogram for it.

func (SampleBuilder) CollectColumnStats Uses

func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)

CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm, and estimates NDVs using FM Sketch during the collecting process. It returns the sample collectors which contain total count, null count, distinct values count and CM Sketch. It also returns the statistic builder for PK which contains the histogram. See https://en.wikipedia.org/wiki/Reservoir_sampling

type SampleCollector Uses

type SampleCollector struct {
    Samples []*SampleItem

    IsMerger      bool
    NullCount     int64
    Count         int64 // Count is the number of non-null rows.
    MaxSampleSize int64
    FMSketch      *FMSketch
    CMSketch      *CMSketch
    TotalSize     int64 // TotalSize is the total size of column.
    // contains filtered or unexported fields

SampleCollector will collect Samples and calculate the count and ndv of an attribute.

func SampleCollectorFromProto Uses

func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector

SampleCollectorFromProto converts SampleCollector from its protobuf representation.

func (*SampleCollector) CalcTotalSize Uses

func (c *SampleCollector) CalcTotalSize()

CalcTotalSize is to calculate total size based on samples.

func (*SampleCollector) ExtractTopN Uses

func (c *SampleCollector) ExtractTopN(numTop uint32, sc *stmtctx.StatementContext, tp *types.FieldType, timeZone *time.Location) error

ExtractTopN extracts the topn from the CM Sketch.

func (*SampleCollector) MergeSampleCollector Uses

func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)

MergeSampleCollector merges two sample collectors.

type SampleItem Uses

type SampleItem struct {
    // Value is the sampled column value.
    Value types.Datum
    // Ordinal is original position of this item in SampleCollector before sorting. This
    // is used for computing correlation.
    Ordinal int
    // RowID is the row id of the sample in its key.
    // This property is used to calculate Ordinal in fast analyze.
    RowID int64

SampleItem is an item of sampled column value.

type SortedBuilder Uses

type SortedBuilder struct {
    Count int64
    // contains filtered or unexported fields

SortedBuilder is used to build histograms for PK and index.

func NewSortedBuilder Uses

func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder

NewSortedBuilder creates a new SortedBuilder.

func (*SortedBuilder) Hist Uses

func (b *SortedBuilder) Hist() *Histogram

Hist returns the histogram built by SortedBuilder.

func (*SortedBuilder) Iterate Uses

func (b *SortedBuilder) Iterate(data types.Datum) error

Iterate updates the histogram incrementally.

type StatsNode Uses

type StatsNode struct {
    Tp  int
    ID  int64

    // Ranges contains all the Ranges we got.
    Ranges []*ranger.Range
    // Selectivity indicates the Selectivity of this column/index.
    Selectivity float64
    // contains filtered or unexported fields

StatsNode is used for calculating selectivity.

func GetUsableSetsByGreedy Uses

func GetUsableSetsByGreedy(nodes []*StatsNode) (newBlocks []*StatsNode)

GetUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.

func MockStatsNode Uses

func MockStatsNode(id int64, m int64, num int) *StatsNode

MockStatsNode is only used for test.

type Table Uses

type Table struct {
    Version uint64
    Name    string

Table represents statistics for a table.

func PseudoTable Uses

func PseudoTable(tblInfo *model.TableInfo) *Table

PseudoTable creates a pseudo table statistics.

func (*Table) ColumnBetweenRowCount Uses

func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64

ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Table) ColumnByName Uses

func (t *Table) ColumnByName(colName string) *Column

ColumnByName finds the statistics.Column for the given column.

func (*Table) ColumnEqualRowCount Uses

func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)

ColumnEqualRowCount estimates the row count where the column equals to value.

func (*Table) ColumnGreaterRowCount Uses

func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64

ColumnGreaterRowCount estimates the row count where the column greater than value.

func (*Table) ColumnLessRowCount Uses

func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64

ColumnLessRowCount estimates the row count where the column less than value. Note that null values are not counted.

func (*Table) Copy Uses

func (t *Table) Copy() *Table

Copy copies the current table.

func (*Table) IndexStartWithColumn Uses

func (t *Table) IndexStartWithColumn(colName string) *Index

IndexStartWithColumn finds the first index whose first column is the given column.

func (*Table) IsOutdated Uses

func (t *Table) IsOutdated() bool

IsOutdated returns true if the table stats is outdated.

func (*Table) PseudoAvgCountPerValue Uses

func (t *Table) PseudoAvgCountPerValue() float64

PseudoAvgCountPerValue gets a pseudo average count if histogram not exists.

func (*Table) String Uses

func (t *Table) String() string

String implements Stringer interface.

type TopNMeta Uses

type TopNMeta struct {
    Data  []byte
    Count uint64
    // contains filtered or unexported fields

TopNMeta is a simple counter used by BuildTopN.

func (*TopNMeta) GetH2 Uses

func (t *TopNMeta) GetH2() uint64

GetH2 get the the second part of `murmur3.Sum128()`, just for test.



Package statistics imports 41 packages (graph) and is imported by 214 packages. Updated 2020-05-20. Refresh now. Tools for package owners.