From a02061d0ed78561268d3ee07e6e75cd2400cd38f Mon Sep 17 00:00:00 2001 From: tpp <146148086+terry1purcell@users.noreply.github.com> Date: Fri, 8 Nov 2024 00:05:49 +0800 Subject: [PATCH] This is an automated cherry-pick of #56848 Signed-off-by: ti-chi-bot --- pkg/planner/cardinality/BUILD.bazel | 97 ++++ pkg/planner/cardinality/row_count_column.go | 414 ++++++++++++++ pkg/planner/cardinality/row_count_index.go | 597 ++++++++++++++++++++ statistics/selectivity_test.go | 50 ++ 4 files changed, 1158 insertions(+) create mode 100644 pkg/planner/cardinality/BUILD.bazel create mode 100644 pkg/planner/cardinality/row_count_column.go create mode 100644 pkg/planner/cardinality/row_count_index.go diff --git a/pkg/planner/cardinality/BUILD.bazel b/pkg/planner/cardinality/BUILD.bazel new file mode 100644 index 0000000000000..36657805f4bce --- /dev/null +++ b/pkg/planner/cardinality/BUILD.bazel @@ -0,0 +1,97 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "cardinality", + srcs = [ + "cross_estimation.go", + "join.go", + "ndv.go", + "pseudo.go", + "row_count_column.go", + "row_count_index.go", + "row_size.go", + "selectivity.go", + "trace.go", + ], + importpath = "github.com/pingcap/tidb/pkg/planner/cardinality", + visibility = ["//visibility:public"], + deps = [ + "//pkg/expression", + "//pkg/kv", + "//pkg/meta/model", + "//pkg/parser/ast", + "//pkg/parser/format", + "//pkg/parser/mysql", + "//pkg/planner/planctx", + "//pkg/planner/property", + "//pkg/planner/util", + "//pkg/planner/util/debugtrace", + "//pkg/planner/util/fixcontrol", + "//pkg/sessionctx/stmtctx", + "//pkg/statistics", + "//pkg/tablecodec", + "//pkg/types", + "//pkg/types/parser_driver", + "//pkg/util/chunk", + "//pkg/util/codec", + "//pkg/util/collate", + "//pkg/util/logutil", + "//pkg/util/mathutil", + "//pkg/util/ranger", + "//pkg/util/set", + "//pkg/util/tracing", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "cardinality_test", + timeout = "short", + srcs = [ + "main_test.go", + "row_count_test.go", + "row_size_test.go", + "selectivity_test.go", + "trace_test.go", + ], + data = glob(["testdata/**"]), + embed = [":cardinality"], + flaky = True, + shard_count = 29, + deps = [ + "//pkg/config", + "//pkg/domain", + "//pkg/executor", + "//pkg/expression", + "//pkg/infoschema", + "//pkg/kv", + "//pkg/meta/model", + "//pkg/parser", + "//pkg/parser/model", + "//pkg/parser/mysql", + "//pkg/planner/core", + "//pkg/planner/core/base", + "//pkg/planner/core/operator/logicalop", + "//pkg/planner/core/resolve", + "//pkg/session", + "//pkg/sessionctx", + "//pkg/sessionctx/stmtctx", + "//pkg/sessionctx/variable", + "//pkg/statistics", + "//pkg/testkit", + "//pkg/testkit/testdata", + "//pkg/testkit/testmain", + "//pkg/testkit/testsetup", + "//pkg/types", + "//pkg/util/codec", + "//pkg/util/collate", + "//pkg/util/mock", + "//pkg/util/ranger", + "//pkg/util/tracing", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_stretchr_testify//require", + "@org_uber_go_goleak//:goleak", + ], +) diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go new file mode 100644 index 0000000000000..fa1d8364ed6e3 --- /dev/null +++ b/pkg/planner/cardinality/row_count_column.go @@ -0,0 +1,414 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cardinality + +import ( + "math" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/planner/planctx" + "github.com/pingcap/tidb/pkg/planner/util/debugtrace" + "github.com/pingcap/tidb/pkg/planner/util/fixcontrol" + "github.com/pingcap/tidb/pkg/statistics" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/collate" + "github.com/pingcap/tidb/pkg/util/mathutil" + "github.com/pingcap/tidb/pkg/util/ranger" +) + +func init() { + statistics.GetRowCountByColumnRanges = GetRowCountByColumnRanges + statistics.GetRowCountByIntColumnRanges = GetRowCountByIntColumnRanges + statistics.GetRowCountByIndexRanges = GetRowCountByIndexRanges +} + +// GetRowCountByColumnRanges estimates the row count by a slice of Range. +func GetRowCountByColumnRanges(sctx planctx.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) { + var name string + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(sctx) + debugTraceGetRowCountInput(sctx, colUniqueID, colRanges) + defer func() { + debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) + debugtrace.LeaveContextCommon(sctx) + }() + } + sc := sctx.GetSessionVars().StmtCtx + c := coll.GetCol(colUniqueID) + colInfoID := colUniqueID + if len(coll.UniqueID2colInfoID) > 0 { + colInfoID = coll.UniqueID2colInfoID[colUniqueID] + } + recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) + if c != nil && c.Info != nil { + name = c.Info.Name.O + } + if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) { + result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0) + if err == nil && sc.EnableOptimizerCETrace && c != nil { + ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result)) + } + return result, err + } + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.RecordAnyValuesWithNames(sctx, + "Histogram NotNull Count", c.Histogram.NotNullCount(), + "TopN total count", c.TopN.TotalCount(), + "Increase Factor", c.GetIncreaseFactor(coll.RealtimeCount), + ) + } + result, err = GetColumnRowCount(sctx, c, colRanges, coll.RealtimeCount, coll.ModifyCount, false) + if sc.EnableOptimizerCETrace { + ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats", uint64(result)) + } + return result, errors.Trace(err) +} + +// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange. +func GetRowCountByIntColumnRanges(sctx planctx.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) { + var name string + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(sctx) + debugTraceGetRowCountInput(sctx, colUniqueID, intRanges) + defer func() { + debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) + debugtrace.LeaveContextCommon(sctx) + }() + } + sc := sctx.GetSessionVars().StmtCtx + c := coll.GetCol(colUniqueID) + colInfoID := colUniqueID + if len(coll.UniqueID2colInfoID) > 0 { + colInfoID = coll.UniqueID2colInfoID[colUniqueID] + } + recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) + if c != nil && c.Info != nil { + name = c.Info.Name.O + } + if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) { + if len(intRanges) == 0 { + return 0, nil + } + if intRanges[0].LowVal[0].Kind() == types.KindInt64 { + result = getPseudoRowCountBySignedIntRanges(intRanges, float64(coll.RealtimeCount)) + } else { + result = getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.RealtimeCount)) + } + if sc.EnableOptimizerCETrace && c != nil { + ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats-Pseudo", uint64(result)) + } + return result, nil + } + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.RecordAnyValuesWithNames(sctx, + "Histogram NotNull Count", c.Histogram.NotNullCount(), + "TopN total count", c.TopN.TotalCount(), + "Increase Factor", c.GetIncreaseFactor(coll.RealtimeCount), + ) + } + result, err = GetColumnRowCount(sctx, c, intRanges, coll.RealtimeCount, coll.ModifyCount, true) + if sc.EnableOptimizerCETrace { + ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats", uint64(result)) + } + return result, errors.Trace(err) +} + +// equalRowCountOnColumn estimates the row count by a slice of Range and a Datum. +func equalRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, val types.Datum, encodedVal []byte, realtimeRowCount, modifyCount int64) (result float64, err error) { + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(sctx) + debugtrace.RecordAnyValuesWithNames(sctx, "Value", val.String(), "Encoded", encodedVal) + defer func() { + debugtrace.RecordAnyValuesWithNames(sctx, "Result", result, "Error", err) + debugtrace.LeaveContextCommon(sctx) + }() + } + if val.IsNull() { + return float64(c.NullCount), nil + } + if c.StatsVer < statistics.Version2 { + // All the values are null. + if c.Histogram.Bounds.NumRows() == 0 { + return 0.0, nil + } + if c.Histogram.NDV > 0 && c.OutOfRange(val) { + return outOfRangeEQSelectivity(sctx, c.Histogram.NDV, realtimeRowCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil + } + if c.CMSketch != nil { + count, err := statistics.QueryValue(sctx, c.CMSketch, c.TopN, val) + return float64(count), errors.Trace(err) + } + histRowCount, _ := c.Histogram.EqualRowCount(sctx, val, false) + return histRowCount, nil + } + + // Stats version == 2 + // All the values are null. + if c.Histogram.Bounds.NumRows() == 0 && c.TopN.Num() == 0 { + return 0, nil + } + // 1. try to find this value in TopN + if c.TopN != nil { + rowcount, ok := c.TopN.QueryTopN(sctx, encodedVal) + if ok { + return float64(rowcount), nil + } + } + // 2. try to find this value in bucket.Repeat(the last value in every bucket) + histCnt, matched := c.Histogram.EqualRowCount(sctx, val, true) + if matched { + return histCnt, nil + } + // 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats) + histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num())) + if histNDV <= 0 { + // If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses + // c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty. + // + // If the table hasn't been modified, it's safe to return 0. + if modifyCount == 0 { + return 0, nil + } + // ELSE calculate an approximate estimate based upon newly inserted rows. + // + // Reset to the original NDV, or if no NDV - derive an NDV using sqrt + if c.Histogram.NDV > 0 { + histNDV = float64(c.Histogram.NDV) + } else { + histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount))) + } + // As a conservative estimate - take the smaller of the orignal totalRows or the additions. + // "realtimeRowCount - original count" is a better measure of inserts than modifyCount + totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount()) + return max(1, totalRowCount/histNDV), nil + } + // return the average histogram rows (which excludes topN) and NDV that excluded topN + return c.Histogram.NotNullCount() / histNDV, nil +} + +// GetColumnRowCount estimates the row count by a slice of Range. +func GetColumnRowCount(sctx planctx.PlanContext, c *statistics.Column, ranges []*ranger.Range, realtimeRowCount, modifyCount int64, pkIsHandle bool) (float64, error) { + sc := sctx.GetSessionVars().StmtCtx + debugTrace := sc.EnableOptimizerDebugTrace + if debugTrace { + debugtrace.EnterContextCommon(sctx) + defer debugtrace.LeaveContextCommon(sctx) + } + var rowCount float64 + for _, rg := range ranges { + highVal := *rg.HighVal[0].Clone() + lowVal := *rg.LowVal[0].Clone() + if highVal.Kind() == types.KindString { + highVal.SetBytes(collate.GetCollator(highVal.Collation()).Key(highVal.GetString())) + } + if lowVal.Kind() == types.KindString { + lowVal.SetBytes(collate.GetCollator(lowVal.Collation()).Key(lowVal.GetString())) + } + cmp, err := lowVal.Compare(sc.TypeCtx(), &highVal, collate.GetBinaryCollator()) + if err != nil { + return 0, errors.Trace(err) + } + lowEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, lowVal) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + highEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, highVal) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + if debugTrace { + debugTraceStartEstimateRange(sctx, rg, lowEncoded, highEncoded, rowCount) + } + if cmp == 0 { + // case 1: it's a point + if !rg.LowExclude && !rg.HighExclude { + // In this case, the row count is at most 1. + if pkIsHandle { + rowCount++ + if debugTrace { + debugTraceEndEstimateRange(sctx, 1, debugTraceUniquePoint) + } + continue + } + var cnt float64 + cnt, err = equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount) + if err != nil { + return 0, errors.Trace(err) + } + // If the current table row count has changed, we should scale the row count accordingly. + cnt *= c.GetIncreaseFactor(realtimeRowCount) + rowCount += cnt + if debugTrace { + debugTraceEndEstimateRange(sctx, cnt, debugTracePoint) + } + } + continue + } + // In stats ver 1, we use CM Sketch to estimate row count for point condition, which is more accurate. + // So for the small range, we convert it to points. + if c.StatsVer < 2 { + rangeVals := statistics.EnumRangeValues(lowVal, highVal, rg.LowExclude, rg.HighExclude) + + // case 2: it's a small range && using ver1 stats + if rangeVals != nil { + for _, val := range rangeVals { + cnt, err := equalRowCountOnColumn(sctx, c, val, lowEncoded, realtimeRowCount, modifyCount) + if err != nil { + return 0, err + } + // If the current table row count has changed, we should scale the row count accordingly. + cnt *= c.GetIncreaseFactor(realtimeRowCount) + if debugTrace { + debugTraceEndEstimateRange(sctx, cnt, debugTraceVer1SmallRange) + } + rowCount += cnt + } + + continue + } + } + + // case 3: it's an interval + cnt := betweenRowCountOnColumn(sctx, c, lowVal, highVal, lowEncoded, highEncoded) + // `betweenRowCount` returns count for [l, h) range, we adjust cnt for boundaries here. + // Note that, `cnt` does not include null values, we need specially handle cases + // where null is the lower bound. + // And because we use (2, MaxValue] to represent expressions like a > 2 and use [MinNotNull, 3) to represent + // expressions like b < 3, we need to exclude the special values. + if rg.LowExclude && !lowVal.IsNull() && lowVal.Kind() != types.KindMaxValue && lowVal.Kind() != types.KindMinNotNull { + lowCnt, err := equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount) + if err != nil { + return 0, errors.Trace(err) + } + cnt -= lowCnt + cnt = mathutil.Clamp(cnt, 0, c.NotNullCount()) + } + if !rg.LowExclude && lowVal.IsNull() { + cnt += float64(c.NullCount) + } + if !rg.HighExclude && highVal.Kind() != types.KindMaxValue && highVal.Kind() != types.KindMinNotNull { + highCnt, err := equalRowCountOnColumn(sctx, c, highVal, highEncoded, realtimeRowCount, modifyCount) + if err != nil { + return 0, errors.Trace(err) + } + cnt += highCnt + } + + cnt = mathutil.Clamp(cnt, 0, c.TotalRowCount()) + + // If the current table row count has changed, we should scale the row count accordingly. + increaseFactor := c.GetIncreaseFactor(realtimeRowCount) + cnt *= increaseFactor + + // handling the out-of-range part + if (c.OutOfRange(lowVal) && !lowVal.IsNull()) || c.OutOfRange(highVal) { + histNDV := c.NDV + // Exclude the TopN + if c.StatsVer == statistics.Version2 { + histNDV -= int64(c.TopN.Num()) + } + cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV, increaseFactor) + } + + if debugTrace { + debugTraceEndEstimateRange(sctx, cnt, debugTraceRange) + } + rowCount += cnt + } + allowZeroEst := fixcontrol.GetBoolWithDefault( + sctx.GetSessionVars().GetOptimizerFixControlMap(), + fixcontrol.Fix47400, + false, + ) + if allowZeroEst { + rowCount = mathutil.Clamp(rowCount, 0, float64(realtimeRowCount)) + } else { + // Don't allow the final result to go below 1 row + rowCount = mathutil.Clamp(rowCount, 1, float64(realtimeRowCount)) + } + return rowCount, nil +} + +// betweenRowCountOnColumn estimates the row count for interval [l, r). +func betweenRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, l, r types.Datum, lowEncoded, highEncoded []byte) float64 { + histBetweenCnt := c.Histogram.BetweenRowCount(sctx, l, r) + if c.StatsVer <= statistics.Version1 { + return histBetweenCnt + } + return float64(c.TopN.BetweenCount(sctx, lowEncoded, highEncoded)) + histBetweenCnt +} + +// functions below are mainly for testing. + +// ColumnGreaterRowCount estimates the row count where the column greater than value. +func ColumnGreaterRowCount(sctx planctx.PlanContext, t *statistics.Table, value types.Datum, colID int64) float64 { + c := t.GetCol(colID) + if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) { + return float64(t.RealtimeCount) / pseudoLessRate + } + return c.GreaterRowCount(value) * c.GetIncreaseFactor(t.RealtimeCount) +} + +// columnLessRowCount estimates the row count where the column less than value. Note that null values are not counted. +func columnLessRowCount(sctx planctx.PlanContext, t *statistics.Table, value types.Datum, colID int64) float64 { + c := t.GetCol(colID) + if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) { + return float64(t.RealtimeCount) / pseudoLessRate + } + return c.LessRowCount(sctx, value) * c.GetIncreaseFactor(t.RealtimeCount) +} + +// columnBetweenRowCount estimates the row count where column greater or equal to a and less than b. +func columnBetweenRowCount(sctx planctx.PlanContext, t *statistics.Table, a, b types.Datum, colID int64) (float64, error) { + sc := sctx.GetSessionVars().StmtCtx + c := t.GetCol(colID) + if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) { + return float64(t.RealtimeCount) / pseudoBetweenRate, nil + } + aEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, a) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + bEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, b) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + count := betweenRowCountOnColumn(sctx, c, a, b, aEncoded, bEncoded) + if a.IsNull() { + count += float64(c.NullCount) + } + return count * c.GetIncreaseFactor(t.RealtimeCount), nil +} + +// ColumnEqualRowCount estimates the row count where the column equals to value. +func ColumnEqualRowCount(sctx planctx.PlanContext, t *statistics.Table, value types.Datum, colID int64) (float64, error) { + c := t.GetCol(colID) + if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) { + return float64(t.RealtimeCount) / pseudoEqualRate, nil + } + encodedVal, err := codec.EncodeKey(sctx.GetSessionVars().StmtCtx.TimeZone(), nil, value) + err = sctx.GetSessionVars().StmtCtx.HandleError(err) + if err != nil { + return 0, err + } + result, err := equalRowCountOnColumn(sctx, c, value, encodedVal, t.RealtimeCount, t.ModifyCount) + result *= c.GetIncreaseFactor(t.RealtimeCount) + return result, errors.Trace(err) +} diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go new file mode 100644 index 0000000000000..928395ae76eef --- /dev/null +++ b/pkg/planner/cardinality/row_count_index.go @@ -0,0 +1,597 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cardinality + +import ( + "bytes" + "math" + "slices" + "strings" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/planner/planctx" + "github.com/pingcap/tidb/pkg/planner/util/debugtrace" + "github.com/pingcap/tidb/pkg/planner/util/fixcontrol" + "github.com/pingcap/tidb/pkg/sessionctx/stmtctx" + "github.com/pingcap/tidb/pkg/statistics" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/chunk" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/collate" + "github.com/pingcap/tidb/pkg/util/mathutil" + "github.com/pingcap/tidb/pkg/util/ranger" +) + +// GetRowCountByIndexRanges estimates the row count by a slice of Range. +func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, err error) { + var name string + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(sctx) + debugTraceGetRowCountInput(sctx, idxID, indexRanges) + defer func() { + debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) + debugtrace.LeaveContextCommon(sctx) + }() + } + sc := sctx.GetSessionVars().StmtCtx + idx := coll.GetIdx(idxID) + colNames := make([]string, 0, 8) + if idx != nil { + if idx.Info != nil { + name = idx.Info.Name.O + for _, col := range idx.Info.Columns { + colNames = append(colNames, col.Name.O) + } + } + } + recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID) + if statistics.IndexStatsIsInvalid(sctx, idx, coll, idxID) { + colsLen := -1 + if idx != nil && idx.Info.Unique { + colsLen = len(idx.Info.Columns) + } + result, err = getPseudoRowCountByIndexRanges(sc.TypeCtx(), indexRanges, float64(coll.RealtimeCount), colsLen) + if err == nil && sc.EnableOptimizerCETrace && idx != nil { + ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result)) + } + return result, err + } + realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx) + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.RecordAnyValuesWithNames(sctx, + "Histogram NotNull Count", idx.Histogram.NotNullCount(), + "TopN total count", idx.TopN.TotalCount(), + "Increase Factor", idx.GetIncreaseFactor(realtimeCnt), + ) + } + if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 { + result, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges) + } else { + result, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount) + } + if sc.EnableOptimizerCETrace { + ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result)) + } + return result, errors.Trace(err) +} + +func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (float64, error) { + sc := sctx.GetSessionVars().StmtCtx + debugTrace := sc.EnableOptimizerDebugTrace + if debugTrace { + debugtrace.EnterContextCommon(sctx) + defer debugtrace.LeaveContextCommon(sctx) + } + idx := coll.GetIdx(idxID) + totalCount := float64(0) + for _, ran := range indexRanges { + if debugTrace { + debugTraceStartEstimateRange(sctx, ran, nil, nil, totalCount) + } + rangePosition := getOrdinalOfRangeCond(sc, ran) + var rangeVals []types.Datum + // Try to enum the last range values. + if rangePosition != len(ran.LowVal) { + rangeVals = statistics.EnumRangeValues(ran.LowVal[rangePosition], ran.HighVal[rangePosition], ran.LowExclude, ran.HighExclude) + if rangeVals != nil { + rangePosition++ + } + } + // If first one is range, just use the previous way to estimate; if it is [NULL, NULL] range + // on single-column index, use previous way as well, because CMSketch does not contain null + // values in this case. + if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) { + realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx) + count, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount) + if err != nil { + return 0, errors.Trace(err) + } + if debugTrace { + debugTraceEndEstimateRange(sctx, count, debugTraceRange) + } + totalCount += count + continue + } + var selectivity float64 + // use CM Sketch to estimate the equal conditions + if rangeVals == nil { + bytes, err := codec.EncodeKey(sc.TimeZone(), nil, ran.LowVal[:rangePosition]...) + err = sc.HandleError(err) + if err != nil { + return 0, errors.Trace(err) + } + selectivity, err = getEqualCondSelectivity(sctx, coll, idx, bytes, rangePosition, ran) + if err != nil { + return 0, errors.Trace(err) + } + } else { + bytes, err := codec.EncodeKey(sc.TimeZone(), nil, ran.LowVal[:rangePosition-1]...) + err = sc.HandleError(err) + if err != nil { + return 0, errors.Trace(err) + } + prefixLen := len(bytes) + for _, val := range rangeVals { + bytes = bytes[:prefixLen] + bytes, err = codec.EncodeKey(sc.TimeZone(), bytes, val) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + res, err := getEqualCondSelectivity(sctx, coll, idx, bytes, rangePosition, ran) + if err != nil { + return 0, errors.Trace(err) + } + selectivity += res + } + } + // use histogram to estimate the range condition + if rangePosition != len(ran.LowVal) { + rang := ranger.Range{ + LowVal: []types.Datum{ran.LowVal[rangePosition]}, + LowExclude: ran.LowExclude, + HighVal: []types.Datum{ran.HighVal[rangePosition]}, + HighExclude: ran.HighExclude, + Collators: []collate.Collator{ran.Collators[rangePosition]}, + } + var count float64 + var err error + colUniqueIDs := coll.Idx2ColUniqueIDs[idxID] + var colUniqueID int64 + if rangePosition >= len(colUniqueIDs) { + colUniqueID = -1 + } else { + colUniqueID = colUniqueIDs[rangePosition] + } + // prefer index stats over column stats + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 { + idxID := idxIDs[0] + count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}) + } else { + count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang}) + } + if err != nil { + return 0, errors.Trace(err) + } + selectivity = selectivity * count / idx.TotalRowCount() + } + count := selectivity * idx.TotalRowCount() + if debugTrace { + debugTraceEndEstimateRange(sctx, count, debugTraceRange) + } + totalCount += count + } + if totalCount > idx.TotalRowCount() { + totalCount = idx.TotalRowCount() + } + return totalCount, nil +} + +// isSingleColIdxNullRange checks if a range is [NULL, NULL] on a single-column index. +func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool { + if len(idx.Info.Columns) > 1 { + return false + } + l, h := ran.LowVal[0], ran.HighVal[0] + if l.IsNull() && h.IsNull() { + return true + } + return false +} + +// It uses the modifyCount to adjust the influence of modifications on the table. +func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error) { + sc := sctx.GetSessionVars().StmtCtx + debugTrace := sc.EnableOptimizerDebugTrace + if debugTrace { + debugtrace.EnterContextCommon(sctx) + defer debugtrace.LeaveContextCommon(sctx) + } + totalCount := float64(0) + isSingleColIdx := len(idx.Info.Columns) == 1 + for _, indexRange := range indexRanges { + var count float64 + lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + rb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...) + err = sc.HandleError(err) + if err != nil { + return 0, err + } + if debugTrace { + debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount) + } + fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns) + if bytes.Equal(lb, rb) { + // case 1: it's a point + if indexRange.LowExclude || indexRange.HighExclude { + if debugTrace { + debugTraceEndEstimateRange(sctx, 0, debugTraceImpossible) + } + continue + } + if fullLen { + // At most 1 in this case. + if idx.Info.Unique { + if !indexRange.IsOnlyNull() { + totalCount++ + if debugTrace { + debugTraceEndEstimateRange(sctx, 1, debugTraceUniquePoint) + } + continue + } + totalCount = float64(idx.NullCount) + if debugTrace { + debugTraceEndEstimateRange(sctx, float64(idx.NullCount), debugTraceUniquePoint) + } + continue + } + count = equalRowCountOnIndex(sctx, idx, lb, realtimeRowCount, modifyCount) + // If the current table row count has changed, we should scale the row count accordingly. + count *= idx.GetIncreaseFactor(realtimeRowCount) + if debugTrace { + debugTraceEndEstimateRange(sctx, count, debugTracePoint) + } + totalCount += count + continue + } + } + + // case 2: it's an interval + // The final interval is [low, high) + if indexRange.LowExclude { + lb = kv.Key(lb).PrefixNext() + } + if !indexRange.HighExclude { + rb = kv.Key(rb).PrefixNext() + } + l := types.NewBytesDatum(lb) + r := types.NewBytesDatum(rb) + lowIsNull := bytes.Equal(lb, nullKeyBytes) + if isSingleColIdx && lowIsNull { + count += float64(idx.Histogram.NullCount) + } + expBackoffSuccess := false + // Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything. + // If the first column's range is point. + if rangePosition := getOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer >= statistics.Version2 && coll != nil { + var expBackoffSel float64 + expBackoffSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange) + if err != nil { + return 0, err + } + if expBackoffSuccess { + expBackoffCnt := expBackoffSel * idx.TotalRowCount() + + upperLimit := expBackoffCnt + // Use the multi-column stats to calculate the max possible row count of [l, r) + if idx.Histogram.Len() > 0 { + _, lowerBkt, _, _ := idx.Histogram.LocateBucket(sctx, l) + _, upperBkt, _, _ := idx.Histogram.LocateBucket(sctx, r) + if debugTrace { + statistics.DebugTraceBuckets(sctx, &idx.Histogram, []int{lowerBkt - 1, upperBkt}) + } + // Use Count of the Bucket before l as the lower bound. + preCount := float64(0) + if lowerBkt > 0 { + preCount = float64(idx.Histogram.Buckets[lowerBkt-1].Count) + } + // Use Count of the Bucket where r exists as the upper bound. + upperCnt := float64(idx.Histogram.Buckets[upperBkt].Count) + upperLimit = upperCnt - preCount + upperLimit += float64(idx.TopN.BetweenCount(sctx, lb, rb)) + } + + // If the result of exponential backoff strategy is larger than the result from multi-column stats, + // use the upper limit from multi-column histogram instead. + if expBackoffCnt > upperLimit { + expBackoffCnt = upperLimit + } + count += expBackoffCnt + } + } + if !expBackoffSuccess { + count += betweenRowCountOnIndex(sctx, idx, l, r) + } + + // If the current table row count has changed, we should scale the row count accordingly. + increaseFactor := idx.GetIncreaseFactor(realtimeRowCount) + count *= increaseFactor + + // handling the out-of-range part + if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) { + histNDV := idx.NDV + // Exclude the TopN in Stats Version 2 + if idx.StatsVer == statistics.Version2 { + c := coll.GetCol(idx.Histogram.ID) + // If this is single column of a multi-column index - use the column's NDV rather than index NDV + isSingleColRange := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == 1 + if isSingleColRange && !isSingleColIdx && c != nil && c.Histogram.NDV > 0 { + histNDV = c.Histogram.NDV - int64(c.TopN.Num()) + } else { + histNDV -= int64(idx.TopN.Num()) + } + } + count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor) + } + + if debugTrace { + debugTraceEndEstimateRange(sctx, count, debugTraceRange) + } + totalCount += count + } + allowZeroEst := fixcontrol.GetBoolWithDefault( + sctx.GetSessionVars().GetOptimizerFixControlMap(), + fixcontrol.Fix47400, + false, + ) + if allowZeroEst { + totalCount = mathutil.Clamp(totalCount, 0, float64(realtimeRowCount)) + } else { + // Don't allow the final result to go below 1 row + totalCount = mathutil.Clamp(totalCount, 1, float64(realtimeRowCount)) + } + return totalCount, nil +} + +var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil)) + +func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []byte, realtimeRowCount, modifyCount int64) (result float64) { + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(sctx) + debugtrace.RecordAnyValuesWithNames(sctx, "Encoded Value", b) + defer func() { + debugtrace.RecordAnyValuesWithNames(sctx, "Result", result) + debugtrace.LeaveContextCommon(sctx) + }() + } + if len(idx.Info.Columns) == 1 { + if bytes.Equal(b, nullKeyBytes) { + return float64(idx.Histogram.NullCount) + } + } + val := types.NewBytesDatum(b) + if idx.StatsVer < statistics.Version2 { + if idx.Histogram.NDV > 0 && outOfRangeOnIndex(idx, val) { + return outOfRangeEQSelectivity(sctx, idx.Histogram.NDV, realtimeRowCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() + } + if idx.CMSketch != nil { + return float64(idx.QueryBytes(sctx, b)) + } + histRowCount, _ := idx.Histogram.EqualRowCount(sctx, val, false) + return histRowCount + } + // stats version == 2 + // 1. try to find this value in TopN + if idx.TopN != nil { + count, found := idx.TopN.QueryTopN(sctx, b) + if found { + return float64(count) + } + } + // 2. try to find this value in bucket.Repeat(the last value in every bucket) + histCnt, matched := idx.Histogram.EqualRowCount(sctx, val, true) + if matched { + return histCnt + } + // 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats) + histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num())) + if histNDV <= 0 { + // If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses + // idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty. + // + // If the table hasn't been modified, it's safe to return 0. + if modifyCount == 0 { + return 0 + } + // ELSE calculate an approximate estimate based upon newly inserted rows. + // + // Reset to the original NDV, or if no NDV - derive an NDV using sqrt + if idx.Histogram.NDV > 0 { + histNDV = float64(idx.Histogram.NDV) + } else { + histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount))) + } + // As a conservative estimate - take the smaller of the orignal totalRows or the additions. + // "realtimeRowCount - original count" is a better measure of inserts than modifyCount + totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount()) + return max(1, totalRowCount/histNDV) + } + // return the average histogram rows (which excludes topN) and NDV that excluded topN + return idx.Histogram.NotNullCount() / histNDV +} + +// expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details. +func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, success bool, err error) { + if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(sctx) + defer func() { + debugtrace.RecordAnyValuesWithNames(sctx, + "Result", sel, + "Success", success, + "error", err, + ) + debugtrace.LeaveContextCommon(sctx) + }() + } + tmpRan := []*ranger.Range{ + { + LowVal: make([]types.Datum, 1), + HighVal: make([]types.Datum, 1), + Collators: make([]collate.Collator, 1), + }, + } + colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID] + singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal)) + // The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like: + // 1. Calc the selectivity of each column. + // 2. Sort them and choose the first 4 most selective filter and the corresponding selectivity is sel_1, sel_2, sel_3, sel_4 where i < j => sel_i < sel_j. + // 3. The final selectivity would be sel_1 * sel_2^{1/2} * sel_3^{1/4} * sel_4^{1/8}. + // This calculation reduced the independence assumption and can work well better than it. + for i := 0; i < len(indexRange.LowVal); i++ { + tmpRan[0].LowVal[0] = indexRange.LowVal[i] + tmpRan[0].HighVal[0] = indexRange.HighVal[i] + tmpRan[0].Collators[0] = indexRange.Collators[0] + if i == len(indexRange.LowVal)-1 { + tmpRan[0].LowExclude = indexRange.LowExclude + tmpRan[0].HighExclude = indexRange.HighExclude + } + colID := colsIDs[i] + var ( + count float64 + selectivity float64 + err error + foundStats bool + ) + if !statistics.ColumnStatsIsInvalid(coll.GetCol(colID), sctx, coll, colID) { + foundStats = true + count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan) + selectivity = count / float64(coll.RealtimeCount) + } + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { + // Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call + // `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This + // check avoids infinite recursion. + for _, idxID := range idxIDs { + idxStats := coll.GetIdx(idxID) + if idxStats == nil || statistics.IndexStatsIsInvalid(sctx, idxStats, coll, idxID) { + continue + } + foundStats = true + count, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan) + if err == nil { + break + } + realtimeCnt, _ := coll.GetScaledRealtimeAndModifyCnt(idxStats) + selectivity = count / float64(realtimeCnt) + } + } + if !foundStats { + continue + } + if err != nil { + return 0, false, err + } + singleColumnEstResults = append(singleColumnEstResults, selectivity) + } + // Sort them. + slices.Sort(singleColumnEstResults) + l := len(singleColumnEstResults) + failpoint.Inject("cleanEstResults", func() { + singleColumnEstResults = singleColumnEstResults[:0] + l = 0 + }) + if l == 1 { + return singleColumnEstResults[0], true, nil + } else if l == 0 { + return 0, false, nil + } + // Do not allow the exponential backoff to go below the available index bound. If the number of predicates + // is less than the number of index columns - use 90% of the bound to differentiate a subset from full index match. + // If there is an individual column selectivity that goes below this bound, use that selectivity only. + histNDV := coll.RealtimeCount + if idx.NDV > 0 { + histNDV = idx.NDV + } + idxLowBound := 1 / float64(min(histNDV, coll.RealtimeCount)) + if l < len(idx.Info.Columns) { + idxLowBound /= 0.9 + } + minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound) + multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) + if l == 2 { + return max(minTwoCol, multTwoCol), true, nil + } + minThreeCol := min(minTwoCol, singleColumnEstResults[2]) + multThreeCol := multTwoCol * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) + if l == 3 { + return max(minThreeCol, multThreeCol), true, nil + } + minFourCol := min(minThreeCol, singleColumnEstResults[3]) + multFourCol := multThreeCol * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))) + return max(minFourCol, multFourCol), true, nil +} + +// outOfRangeOnIndex checks if the datum is out of the range. +func outOfRangeOnIndex(idx *statistics.Index, val types.Datum) bool { + if !idx.Histogram.OutOfRange(val) { + return false + } + if idx.Histogram.Len() > 0 && matchPrefix(idx.Histogram.Bounds.GetRow(0), 0, &val) { + return false + } + return true +} + +// matchPrefix checks whether ad is the prefix of value +func matchPrefix(row chunk.Row, colIdx int, ad *types.Datum) bool { + switch ad.Kind() { + case types.KindString, types.KindBytes, types.KindBinaryLiteral, types.KindMysqlBit: + return strings.HasPrefix(row.GetString(colIdx), ad.GetString()) + } + return false +} + +// betweenRowCountOnIndex estimates the row count for interval [l, r). +// The input sctx is just for debug trace, you can pass nil safely if that's not needed. +func betweenRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, l, r types.Datum) float64 { + histBetweenCnt := idx.Histogram.BetweenRowCount(sctx, l, r) + if idx.StatsVer == statistics.Version1 { + return histBetweenCnt + } + return float64(idx.TopN.BetweenCount(sctx, l.GetBytes(), r.GetBytes())) + histBetweenCnt +} + +// getOrdinalOfRangeCond gets the ordinal of the position range condition, +// if not exist, it returns the end position. +func getOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int { + for i := range ran.LowVal { + a, b := ran.LowVal[i], ran.HighVal[i] + cmp, err := a.Compare(sc.TypeCtx(), &b, ran.Collators[0]) + if err != nil { + return 0 + } + if cmp != 0 { + return i + } + } + return len(ran.LowVal) +} diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index fca2f9261c632..46245bbe5137d 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -274,6 +274,56 @@ func TestEstimationForUnknownValues(t *testing.T) { require.Equal(t, 0.0, count) } +func TestEstimationForUnknownValuesAfterModify(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, key idx(a))") + testKit.MustExec("set @@tidb_analyze_version=2") + testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'") + for i := 1; i <= 10; i++ { + testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i)) + testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i)) + testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i)) + testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i)) + testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i)) + testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i)) + } + testKit.MustExec("analyze table t") + h := dom.StatsHandle() + require.Nil(t, h.DumpStatsDeltaToKV(true)) + + table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t")) + require.NoError(t, err) + statsTbl := h.GetTableStats(table.Meta()) + + // Search for a found value == 10.0 + sctx := mock.NewContext() + col := statsTbl.GetCol(table.Meta().Columns[0].ID) + count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false) + require.NoError(t, err) + require.Equal(t, 10.0, count) + + // Search for a not found value with zero modifyCount. Defaults to count == 1.0 + count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false) + require.NoError(t, err) + require.Equal(t, 1.0, count) + + // Add another 200 rows to the table + testKit.MustExec("insert into t select a+10 from t") + testKit.MustExec("insert into t select a+10 from t where a <= 10") + require.Nil(t, h.DumpStatsDeltaToKV(true)) + require.Nil(t, h.Update(context.Background(), dom.InfoSchema())) + statsTblnew := h.GetTableStats(table.Meta()) + + // Search for a not found value based upon statistics - count should be >= 10 and <=40 + count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false) + require.NoError(t, err) + require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count) + require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count) +} + func TestEstimationUniqueKeyEqualConds(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) testKit := testkit.NewTestKit(t, store)