From a02061d0ed78561268d3ee07e6e75cd2400cd38f Mon Sep 17 00:00:00 2001
From: tpp <146148086+terry1purcell@users.noreply.github.com>
Date: Fri, 8 Nov 2024 00:05:49 +0800
Subject: [PATCH] This is an automated cherry-pick of #56848

Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
---
 pkg/planner/cardinality/BUILD.bazel         |  97 ++++
 pkg/planner/cardinality/row_count_column.go | 414 ++++++++++++++
 pkg/planner/cardinality/row_count_index.go  | 597 ++++++++++++++++++++
 statistics/selectivity_test.go              |  50 ++
 4 files changed, 1158 insertions(+)
 create mode 100644 pkg/planner/cardinality/BUILD.bazel
 create mode 100644 pkg/planner/cardinality/row_count_column.go
 create mode 100644 pkg/planner/cardinality/row_count_index.go

diff --git a/pkg/planner/cardinality/BUILD.bazel b/pkg/planner/cardinality/BUILD.bazel
new file mode 100644
index 0000000000000..36657805f4bce
--- /dev/null
+++ b/pkg/planner/cardinality/BUILD.bazel
@@ -0,0 +1,97 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "cardinality",
+    srcs = [
+        "cross_estimation.go",
+        "join.go",
+        "ndv.go",
+        "pseudo.go",
+        "row_count_column.go",
+        "row_count_index.go",
+        "row_size.go",
+        "selectivity.go",
+        "trace.go",
+    ],
+    importpath = "github.com/pingcap/tidb/pkg/planner/cardinality",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/expression",
+        "//pkg/kv",
+        "//pkg/meta/model",
+        "//pkg/parser/ast",
+        "//pkg/parser/format",
+        "//pkg/parser/mysql",
+        "//pkg/planner/planctx",
+        "//pkg/planner/property",
+        "//pkg/planner/util",
+        "//pkg/planner/util/debugtrace",
+        "//pkg/planner/util/fixcontrol",
+        "//pkg/sessionctx/stmtctx",
+        "//pkg/statistics",
+        "//pkg/tablecodec",
+        "//pkg/types",
+        "//pkg/types/parser_driver",
+        "//pkg/util/chunk",
+        "//pkg/util/codec",
+        "//pkg/util/collate",
+        "//pkg/util/logutil",
+        "//pkg/util/mathutil",
+        "//pkg/util/ranger",
+        "//pkg/util/set",
+        "//pkg/util/tracing",
+        "@com_github_pingcap_errors//:errors",
+        "@com_github_pingcap_failpoint//:failpoint",
+        "@org_uber_go_zap//:zap",
+    ],
+)
+
+go_test(
+    name = "cardinality_test",
+    timeout = "short",
+    srcs = [
+        "main_test.go",
+        "row_count_test.go",
+        "row_size_test.go",
+        "selectivity_test.go",
+        "trace_test.go",
+    ],
+    data = glob(["testdata/**"]),
+    embed = [":cardinality"],
+    flaky = True,
+    shard_count = 29,
+    deps = [
+        "//pkg/config",
+        "//pkg/domain",
+        "//pkg/executor",
+        "//pkg/expression",
+        "//pkg/infoschema",
+        "//pkg/kv",
+        "//pkg/meta/model",
+        "//pkg/parser",
+        "//pkg/parser/model",
+        "//pkg/parser/mysql",
+        "//pkg/planner/core",
+        "//pkg/planner/core/base",
+        "//pkg/planner/core/operator/logicalop",
+        "//pkg/planner/core/resolve",
+        "//pkg/session",
+        "//pkg/sessionctx",
+        "//pkg/sessionctx/stmtctx",
+        "//pkg/sessionctx/variable",
+        "//pkg/statistics",
+        "//pkg/testkit",
+        "//pkg/testkit/testdata",
+        "//pkg/testkit/testmain",
+        "//pkg/testkit/testsetup",
+        "//pkg/types",
+        "//pkg/util/codec",
+        "//pkg/util/collate",
+        "//pkg/util/mock",
+        "//pkg/util/ranger",
+        "//pkg/util/tracing",
+        "@com_github_pingcap_failpoint//:failpoint",
+        "@com_github_stretchr_testify//require",
+        "@org_uber_go_goleak//:goleak",
+    ],
+)
diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go
new file mode 100644
index 0000000000000..fa1d8364ed6e3
--- /dev/null
+++ b/pkg/planner/cardinality/row_count_column.go
@@ -0,0 +1,414 @@
+// Copyright 2023 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cardinality
+
+import (
+	"math"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/tidb/pkg/planner/planctx"
+	"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
+	"github.com/pingcap/tidb/pkg/planner/util/fixcontrol"
+	"github.com/pingcap/tidb/pkg/statistics"
+	"github.com/pingcap/tidb/pkg/types"
+	"github.com/pingcap/tidb/pkg/util/codec"
+	"github.com/pingcap/tidb/pkg/util/collate"
+	"github.com/pingcap/tidb/pkg/util/mathutil"
+	"github.com/pingcap/tidb/pkg/util/ranger"
+)
+
+func init() {
+	statistics.GetRowCountByColumnRanges = GetRowCountByColumnRanges
+	statistics.GetRowCountByIntColumnRanges = GetRowCountByIntColumnRanges
+	statistics.GetRowCountByIndexRanges = GetRowCountByIndexRanges
+}
+
+// GetRowCountByColumnRanges estimates the row count by a slice of Range.
+func GetRowCountByColumnRanges(sctx planctx.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) {
+	var name string
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		debugTraceGetRowCountInput(sctx, colUniqueID, colRanges)
+		defer func() {
+			debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
+			debugtrace.LeaveContextCommon(sctx)
+		}()
+	}
+	sc := sctx.GetSessionVars().StmtCtx
+	c := coll.GetCol(colUniqueID)
+	colInfoID := colUniqueID
+	if len(coll.UniqueID2colInfoID) > 0 {
+		colInfoID = coll.UniqueID2colInfoID[colUniqueID]
+	}
+	recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
+	if c != nil && c.Info != nil {
+		name = c.Info.Name.O
+	}
+	if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
+		result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0)
+		if err == nil && sc.EnableOptimizerCETrace && c != nil {
+			ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
+		}
+		return result, err
+	}
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.RecordAnyValuesWithNames(sctx,
+			"Histogram NotNull Count", c.Histogram.NotNullCount(),
+			"TopN total count", c.TopN.TotalCount(),
+			"Increase Factor", c.GetIncreaseFactor(coll.RealtimeCount),
+		)
+	}
+	result, err = GetColumnRowCount(sctx, c, colRanges, coll.RealtimeCount, coll.ModifyCount, false)
+	if sc.EnableOptimizerCETrace {
+		ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats", uint64(result))
+	}
+	return result, errors.Trace(err)
+}
+
+// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
+func GetRowCountByIntColumnRanges(sctx planctx.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) {
+	var name string
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		debugTraceGetRowCountInput(sctx, colUniqueID, intRanges)
+		defer func() {
+			debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
+			debugtrace.LeaveContextCommon(sctx)
+		}()
+	}
+	sc := sctx.GetSessionVars().StmtCtx
+	c := coll.GetCol(colUniqueID)
+	colInfoID := colUniqueID
+	if len(coll.UniqueID2colInfoID) > 0 {
+		colInfoID = coll.UniqueID2colInfoID[colUniqueID]
+	}
+	recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
+	if c != nil && c.Info != nil {
+		name = c.Info.Name.O
+	}
+	if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
+		if len(intRanges) == 0 {
+			return 0, nil
+		}
+		if intRanges[0].LowVal[0].Kind() == types.KindInt64 {
+			result = getPseudoRowCountBySignedIntRanges(intRanges, float64(coll.RealtimeCount))
+		} else {
+			result = getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.RealtimeCount))
+		}
+		if sc.EnableOptimizerCETrace && c != nil {
+			ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats-Pseudo", uint64(result))
+		}
+		return result, nil
+	}
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.RecordAnyValuesWithNames(sctx,
+			"Histogram NotNull Count", c.Histogram.NotNullCount(),
+			"TopN total count", c.TopN.TotalCount(),
+			"Increase Factor", c.GetIncreaseFactor(coll.RealtimeCount),
+		)
+	}
+	result, err = GetColumnRowCount(sctx, c, intRanges, coll.RealtimeCount, coll.ModifyCount, true)
+	if sc.EnableOptimizerCETrace {
+		ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats", uint64(result))
+	}
+	return result, errors.Trace(err)
+}
+
+// equalRowCountOnColumn estimates the row count by a slice of Range and a Datum.
+func equalRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, val types.Datum, encodedVal []byte, realtimeRowCount, modifyCount int64) (result float64, err error) {
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		debugtrace.RecordAnyValuesWithNames(sctx, "Value", val.String(), "Encoded", encodedVal)
+		defer func() {
+			debugtrace.RecordAnyValuesWithNames(sctx, "Result", result, "Error", err)
+			debugtrace.LeaveContextCommon(sctx)
+		}()
+	}
+	if val.IsNull() {
+		return float64(c.NullCount), nil
+	}
+	if c.StatsVer < statistics.Version2 {
+		// All the values are null.
+		if c.Histogram.Bounds.NumRows() == 0 {
+			return 0.0, nil
+		}
+		if c.Histogram.NDV > 0 && c.OutOfRange(val) {
+			return outOfRangeEQSelectivity(sctx, c.Histogram.NDV, realtimeRowCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil
+		}
+		if c.CMSketch != nil {
+			count, err := statistics.QueryValue(sctx, c.CMSketch, c.TopN, val)
+			return float64(count), errors.Trace(err)
+		}
+		histRowCount, _ := c.Histogram.EqualRowCount(sctx, val, false)
+		return histRowCount, nil
+	}
+
+	// Stats version == 2
+	// All the values are null.
+	if c.Histogram.Bounds.NumRows() == 0 && c.TopN.Num() == 0 {
+		return 0, nil
+	}
+	// 1. try to find this value in TopN
+	if c.TopN != nil {
+		rowcount, ok := c.TopN.QueryTopN(sctx, encodedVal)
+		if ok {
+			return float64(rowcount), nil
+		}
+	}
+	// 2. try to find this value in bucket.Repeat(the last value in every bucket)
+	histCnt, matched := c.Histogram.EqualRowCount(sctx, val, true)
+	if matched {
+		return histCnt, nil
+	}
+	// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
+	histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
+	if histNDV <= 0 {
+		// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
+		// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
+		//
+		// If the table hasn't been modified, it's safe to return 0.
+		if modifyCount == 0 {
+			return 0, nil
+		}
+		// ELSE calculate an approximate estimate based upon newly inserted rows.
+		//
+		// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
+		if c.Histogram.NDV > 0 {
+			histNDV = float64(c.Histogram.NDV)
+		} else {
+			histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
+		}
+		// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
+		// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
+		totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
+		return max(1, totalRowCount/histNDV), nil
+	}
+	// return the average histogram rows (which excludes topN) and NDV that excluded topN
+	return c.Histogram.NotNullCount() / histNDV, nil
+}
+
+// GetColumnRowCount estimates the row count by a slice of Range.
+func GetColumnRowCount(sctx planctx.PlanContext, c *statistics.Column, ranges []*ranger.Range, realtimeRowCount, modifyCount int64, pkIsHandle bool) (float64, error) {
+	sc := sctx.GetSessionVars().StmtCtx
+	debugTrace := sc.EnableOptimizerDebugTrace
+	if debugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		defer debugtrace.LeaveContextCommon(sctx)
+	}
+	var rowCount float64
+	for _, rg := range ranges {
+		highVal := *rg.HighVal[0].Clone()
+		lowVal := *rg.LowVal[0].Clone()
+		if highVal.Kind() == types.KindString {
+			highVal.SetBytes(collate.GetCollator(highVal.Collation()).Key(highVal.GetString()))
+		}
+		if lowVal.Kind() == types.KindString {
+			lowVal.SetBytes(collate.GetCollator(lowVal.Collation()).Key(lowVal.GetString()))
+		}
+		cmp, err := lowVal.Compare(sc.TypeCtx(), &highVal, collate.GetBinaryCollator())
+		if err != nil {
+			return 0, errors.Trace(err)
+		}
+		lowEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, lowVal)
+		err = sc.HandleError(err)
+		if err != nil {
+			return 0, err
+		}
+		highEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, highVal)
+		err = sc.HandleError(err)
+		if err != nil {
+			return 0, err
+		}
+		if debugTrace {
+			debugTraceStartEstimateRange(sctx, rg, lowEncoded, highEncoded, rowCount)
+		}
+		if cmp == 0 {
+			// case 1: it's a point
+			if !rg.LowExclude && !rg.HighExclude {
+				// In this case, the row count is at most 1.
+				if pkIsHandle {
+					rowCount++
+					if debugTrace {
+						debugTraceEndEstimateRange(sctx, 1, debugTraceUniquePoint)
+					}
+					continue
+				}
+				var cnt float64
+				cnt, err = equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount)
+				if err != nil {
+					return 0, errors.Trace(err)
+				}
+				// If the current table row count has changed, we should scale the row count accordingly.
+				cnt *= c.GetIncreaseFactor(realtimeRowCount)
+				rowCount += cnt
+				if debugTrace {
+					debugTraceEndEstimateRange(sctx, cnt, debugTracePoint)
+				}
+			}
+			continue
+		}
+		// In stats ver 1, we use CM Sketch to estimate row count for point condition, which is more accurate.
+		// So for the small range, we convert it to points.
+		if c.StatsVer < 2 {
+			rangeVals := statistics.EnumRangeValues(lowVal, highVal, rg.LowExclude, rg.HighExclude)
+
+			// case 2: it's a small range && using ver1 stats
+			if rangeVals != nil {
+				for _, val := range rangeVals {
+					cnt, err := equalRowCountOnColumn(sctx, c, val, lowEncoded, realtimeRowCount, modifyCount)
+					if err != nil {
+						return 0, err
+					}
+					// If the current table row count has changed, we should scale the row count accordingly.
+					cnt *= c.GetIncreaseFactor(realtimeRowCount)
+					if debugTrace {
+						debugTraceEndEstimateRange(sctx, cnt, debugTraceVer1SmallRange)
+					}
+					rowCount += cnt
+				}
+
+				continue
+			}
+		}
+
+		// case 3: it's an interval
+		cnt := betweenRowCountOnColumn(sctx, c, lowVal, highVal, lowEncoded, highEncoded)
+		// `betweenRowCount` returns count for [l, h) range, we adjust cnt for boundaries here.
+		// Note that, `cnt` does not include null values, we need specially handle cases
+		//   where null is the lower bound.
+		// And because we use (2, MaxValue] to represent expressions like a > 2 and use [MinNotNull, 3) to represent
+		//   expressions like b < 3, we need to exclude the special values.
+		if rg.LowExclude && !lowVal.IsNull() && lowVal.Kind() != types.KindMaxValue && lowVal.Kind() != types.KindMinNotNull {
+			lowCnt, err := equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount)
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			cnt -= lowCnt
+			cnt = mathutil.Clamp(cnt, 0, c.NotNullCount())
+		}
+		if !rg.LowExclude && lowVal.IsNull() {
+			cnt += float64(c.NullCount)
+		}
+		if !rg.HighExclude && highVal.Kind() != types.KindMaxValue && highVal.Kind() != types.KindMinNotNull {
+			highCnt, err := equalRowCountOnColumn(sctx, c, highVal, highEncoded, realtimeRowCount, modifyCount)
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			cnt += highCnt
+		}
+
+		cnt = mathutil.Clamp(cnt, 0, c.TotalRowCount())
+
+		// If the current table row count has changed, we should scale the row count accordingly.
+		increaseFactor := c.GetIncreaseFactor(realtimeRowCount)
+		cnt *= increaseFactor
+
+		// handling the out-of-range part
+		if (c.OutOfRange(lowVal) && !lowVal.IsNull()) || c.OutOfRange(highVal) {
+			histNDV := c.NDV
+			// Exclude the TopN
+			if c.StatsVer == statistics.Version2 {
+				histNDV -= int64(c.TopN.Num())
+			}
+			cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV, increaseFactor)
+		}
+
+		if debugTrace {
+			debugTraceEndEstimateRange(sctx, cnt, debugTraceRange)
+		}
+		rowCount += cnt
+	}
+	allowZeroEst := fixcontrol.GetBoolWithDefault(
+		sctx.GetSessionVars().GetOptimizerFixControlMap(),
+		fixcontrol.Fix47400,
+		false,
+	)
+	if allowZeroEst {
+		rowCount = mathutil.Clamp(rowCount, 0, float64(realtimeRowCount))
+	} else {
+		// Don't allow the final result to go below 1 row
+		rowCount = mathutil.Clamp(rowCount, 1, float64(realtimeRowCount))
+	}
+	return rowCount, nil
+}
+
+// betweenRowCountOnColumn estimates the row count for interval [l, r).
+func betweenRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, l, r types.Datum, lowEncoded, highEncoded []byte) float64 {
+	histBetweenCnt := c.Histogram.BetweenRowCount(sctx, l, r)
+	if c.StatsVer <= statistics.Version1 {
+		return histBetweenCnt
+	}
+	return float64(c.TopN.BetweenCount(sctx, lowEncoded, highEncoded)) + histBetweenCnt
+}
+
+// functions below are mainly for testing.
+
+// ColumnGreaterRowCount estimates the row count where the column greater than value.
+func ColumnGreaterRowCount(sctx planctx.PlanContext, t *statistics.Table, value types.Datum, colID int64) float64 {
+	c := t.GetCol(colID)
+	if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) {
+		return float64(t.RealtimeCount) / pseudoLessRate
+	}
+	return c.GreaterRowCount(value) * c.GetIncreaseFactor(t.RealtimeCount)
+}
+
+// columnLessRowCount estimates the row count where the column less than value. Note that null values are not counted.
+func columnLessRowCount(sctx planctx.PlanContext, t *statistics.Table, value types.Datum, colID int64) float64 {
+	c := t.GetCol(colID)
+	if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) {
+		return float64(t.RealtimeCount) / pseudoLessRate
+	}
+	return c.LessRowCount(sctx, value) * c.GetIncreaseFactor(t.RealtimeCount)
+}
+
+// columnBetweenRowCount estimates the row count where column greater or equal to a and less than b.
+func columnBetweenRowCount(sctx planctx.PlanContext, t *statistics.Table, a, b types.Datum, colID int64) (float64, error) {
+	sc := sctx.GetSessionVars().StmtCtx
+	c := t.GetCol(colID)
+	if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) {
+		return float64(t.RealtimeCount) / pseudoBetweenRate, nil
+	}
+	aEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, a)
+	err = sc.HandleError(err)
+	if err != nil {
+		return 0, err
+	}
+	bEncoded, err := codec.EncodeKey(sc.TimeZone(), nil, b)
+	err = sc.HandleError(err)
+	if err != nil {
+		return 0, err
+	}
+	count := betweenRowCountOnColumn(sctx, c, a, b, aEncoded, bEncoded)
+	if a.IsNull() {
+		count += float64(c.NullCount)
+	}
+	return count * c.GetIncreaseFactor(t.RealtimeCount), nil
+}
+
+// ColumnEqualRowCount estimates the row count where the column equals to value.
+func ColumnEqualRowCount(sctx planctx.PlanContext, t *statistics.Table, value types.Datum, colID int64) (float64, error) {
+	c := t.GetCol(colID)
+	if statistics.ColumnStatsIsInvalid(c, sctx, &t.HistColl, colID) {
+		return float64(t.RealtimeCount) / pseudoEqualRate, nil
+	}
+	encodedVal, err := codec.EncodeKey(sctx.GetSessionVars().StmtCtx.TimeZone(), nil, value)
+	err = sctx.GetSessionVars().StmtCtx.HandleError(err)
+	if err != nil {
+		return 0, err
+	}
+	result, err := equalRowCountOnColumn(sctx, c, value, encodedVal, t.RealtimeCount, t.ModifyCount)
+	result *= c.GetIncreaseFactor(t.RealtimeCount)
+	return result, errors.Trace(err)
+}
diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go
new file mode 100644
index 0000000000000..928395ae76eef
--- /dev/null
+++ b/pkg/planner/cardinality/row_count_index.go
@@ -0,0 +1,597 @@
+// Copyright 2023 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cardinality
+
+import (
+	"bytes"
+	"math"
+	"slices"
+	"strings"
+	"time"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/failpoint"
+	"github.com/pingcap/tidb/pkg/kv"
+	"github.com/pingcap/tidb/pkg/planner/planctx"
+	"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
+	"github.com/pingcap/tidb/pkg/planner/util/fixcontrol"
+	"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
+	"github.com/pingcap/tidb/pkg/statistics"
+	"github.com/pingcap/tidb/pkg/types"
+	"github.com/pingcap/tidb/pkg/util/chunk"
+	"github.com/pingcap/tidb/pkg/util/codec"
+	"github.com/pingcap/tidb/pkg/util/collate"
+	"github.com/pingcap/tidb/pkg/util/mathutil"
+	"github.com/pingcap/tidb/pkg/util/ranger"
+)
+
+// GetRowCountByIndexRanges estimates the row count by a slice of Range.
+func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, err error) {
+	var name string
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		debugTraceGetRowCountInput(sctx, idxID, indexRanges)
+		defer func() {
+			debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
+			debugtrace.LeaveContextCommon(sctx)
+		}()
+	}
+	sc := sctx.GetSessionVars().StmtCtx
+	idx := coll.GetIdx(idxID)
+	colNames := make([]string, 0, 8)
+	if idx != nil {
+		if idx.Info != nil {
+			name = idx.Info.Name.O
+			for _, col := range idx.Info.Columns {
+				colNames = append(colNames, col.Name.O)
+			}
+		}
+	}
+	recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID)
+	if statistics.IndexStatsIsInvalid(sctx, idx, coll, idxID) {
+		colsLen := -1
+		if idx != nil && idx.Info.Unique {
+			colsLen = len(idx.Info.Columns)
+		}
+		result, err = getPseudoRowCountByIndexRanges(sc.TypeCtx(), indexRanges, float64(coll.RealtimeCount), colsLen)
+		if err == nil && sc.EnableOptimizerCETrace && idx != nil {
+			ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result))
+		}
+		return result, err
+	}
+	realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.RecordAnyValuesWithNames(sctx,
+			"Histogram NotNull Count", idx.Histogram.NotNullCount(),
+			"TopN total count", idx.TopN.TotalCount(),
+			"Increase Factor", idx.GetIncreaseFactor(realtimeCnt),
+		)
+	}
+	if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 {
+		result, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges)
+	} else {
+		result, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
+	}
+	if sc.EnableOptimizerCETrace {
+		ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
+	}
+	return result, errors.Trace(err)
+}
+
+func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (float64, error) {
+	sc := sctx.GetSessionVars().StmtCtx
+	debugTrace := sc.EnableOptimizerDebugTrace
+	if debugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		defer debugtrace.LeaveContextCommon(sctx)
+	}
+	idx := coll.GetIdx(idxID)
+	totalCount := float64(0)
+	for _, ran := range indexRanges {
+		if debugTrace {
+			debugTraceStartEstimateRange(sctx, ran, nil, nil, totalCount)
+		}
+		rangePosition := getOrdinalOfRangeCond(sc, ran)
+		var rangeVals []types.Datum
+		// Try to enum the last range values.
+		if rangePosition != len(ran.LowVal) {
+			rangeVals = statistics.EnumRangeValues(ran.LowVal[rangePosition], ran.HighVal[rangePosition], ran.LowExclude, ran.HighExclude)
+			if rangeVals != nil {
+				rangePosition++
+			}
+		}
+		// If first one is range, just use the previous way to estimate; if it is [NULL, NULL] range
+		// on single-column index, use previous way as well, because CMSketch does not contain null
+		// values in this case.
+		if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) {
+			realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
+			count, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			if debugTrace {
+				debugTraceEndEstimateRange(sctx, count, debugTraceRange)
+			}
+			totalCount += count
+			continue
+		}
+		var selectivity float64
+		// use CM Sketch to estimate the equal conditions
+		if rangeVals == nil {
+			bytes, err := codec.EncodeKey(sc.TimeZone(), nil, ran.LowVal[:rangePosition]...)
+			err = sc.HandleError(err)
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			selectivity, err = getEqualCondSelectivity(sctx, coll, idx, bytes, rangePosition, ran)
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+		} else {
+			bytes, err := codec.EncodeKey(sc.TimeZone(), nil, ran.LowVal[:rangePosition-1]...)
+			err = sc.HandleError(err)
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			prefixLen := len(bytes)
+			for _, val := range rangeVals {
+				bytes = bytes[:prefixLen]
+				bytes, err = codec.EncodeKey(sc.TimeZone(), bytes, val)
+				err = sc.HandleError(err)
+				if err != nil {
+					return 0, err
+				}
+				res, err := getEqualCondSelectivity(sctx, coll, idx, bytes, rangePosition, ran)
+				if err != nil {
+					return 0, errors.Trace(err)
+				}
+				selectivity += res
+			}
+		}
+		// use histogram to estimate the range condition
+		if rangePosition != len(ran.LowVal) {
+			rang := ranger.Range{
+				LowVal:      []types.Datum{ran.LowVal[rangePosition]},
+				LowExclude:  ran.LowExclude,
+				HighVal:     []types.Datum{ran.HighVal[rangePosition]},
+				HighExclude: ran.HighExclude,
+				Collators:   []collate.Collator{ran.Collators[rangePosition]},
+			}
+			var count float64
+			var err error
+			colUniqueIDs := coll.Idx2ColUniqueIDs[idxID]
+			var colUniqueID int64
+			if rangePosition >= len(colUniqueIDs) {
+				colUniqueID = -1
+			} else {
+				colUniqueID = colUniqueIDs[rangePosition]
+			}
+			// prefer index stats over column stats
+			if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
+				idxID := idxIDs[0]
+				count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
+			} else {
+				count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
+			}
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			selectivity = selectivity * count / idx.TotalRowCount()
+		}
+		count := selectivity * idx.TotalRowCount()
+		if debugTrace {
+			debugTraceEndEstimateRange(sctx, count, debugTraceRange)
+		}
+		totalCount += count
+	}
+	if totalCount > idx.TotalRowCount() {
+		totalCount = idx.TotalRowCount()
+	}
+	return totalCount, nil
+}
+
+// isSingleColIdxNullRange checks if a range is [NULL, NULL] on a single-column index.
+func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool {
+	if len(idx.Info.Columns) > 1 {
+		return false
+	}
+	l, h := ran.LowVal[0], ran.HighVal[0]
+	if l.IsNull() && h.IsNull() {
+		return true
+	}
+	return false
+}
+
+// It uses the modifyCount to adjust the influence of modifications on the table.
+func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error) {
+	sc := sctx.GetSessionVars().StmtCtx
+	debugTrace := sc.EnableOptimizerDebugTrace
+	if debugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		defer debugtrace.LeaveContextCommon(sctx)
+	}
+	totalCount := float64(0)
+	isSingleColIdx := len(idx.Info.Columns) == 1
+	for _, indexRange := range indexRanges {
+		var count float64
+		lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
+		err = sc.HandleError(err)
+		if err != nil {
+			return 0, err
+		}
+		rb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
+		err = sc.HandleError(err)
+		if err != nil {
+			return 0, err
+		}
+		if debugTrace {
+			debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount)
+		}
+		fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns)
+		if bytes.Equal(lb, rb) {
+			// case 1: it's a point
+			if indexRange.LowExclude || indexRange.HighExclude {
+				if debugTrace {
+					debugTraceEndEstimateRange(sctx, 0, debugTraceImpossible)
+				}
+				continue
+			}
+			if fullLen {
+				// At most 1 in this case.
+				if idx.Info.Unique {
+					if !indexRange.IsOnlyNull() {
+						totalCount++
+						if debugTrace {
+							debugTraceEndEstimateRange(sctx, 1, debugTraceUniquePoint)
+						}
+						continue
+					}
+					totalCount = float64(idx.NullCount)
+					if debugTrace {
+						debugTraceEndEstimateRange(sctx, float64(idx.NullCount), debugTraceUniquePoint)
+					}
+					continue
+				}
+				count = equalRowCountOnIndex(sctx, idx, lb, realtimeRowCount, modifyCount)
+				// If the current table row count has changed, we should scale the row count accordingly.
+				count *= idx.GetIncreaseFactor(realtimeRowCount)
+				if debugTrace {
+					debugTraceEndEstimateRange(sctx, count, debugTracePoint)
+				}
+				totalCount += count
+				continue
+			}
+		}
+
+		// case 2: it's an interval
+		// The final interval is [low, high)
+		if indexRange.LowExclude {
+			lb = kv.Key(lb).PrefixNext()
+		}
+		if !indexRange.HighExclude {
+			rb = kv.Key(rb).PrefixNext()
+		}
+		l := types.NewBytesDatum(lb)
+		r := types.NewBytesDatum(rb)
+		lowIsNull := bytes.Equal(lb, nullKeyBytes)
+		if isSingleColIdx && lowIsNull {
+			count += float64(idx.Histogram.NullCount)
+		}
+		expBackoffSuccess := false
+		// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
+		// If the first column's range is point.
+		if rangePosition := getOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer >= statistics.Version2 && coll != nil {
+			var expBackoffSel float64
+			expBackoffSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
+			if err != nil {
+				return 0, err
+			}
+			if expBackoffSuccess {
+				expBackoffCnt := expBackoffSel * idx.TotalRowCount()
+
+				upperLimit := expBackoffCnt
+				// Use the multi-column stats to calculate the max possible row count of [l, r)
+				if idx.Histogram.Len() > 0 {
+					_, lowerBkt, _, _ := idx.Histogram.LocateBucket(sctx, l)
+					_, upperBkt, _, _ := idx.Histogram.LocateBucket(sctx, r)
+					if debugTrace {
+						statistics.DebugTraceBuckets(sctx, &idx.Histogram, []int{lowerBkt - 1, upperBkt})
+					}
+					// Use Count of the Bucket before l as the lower bound.
+					preCount := float64(0)
+					if lowerBkt > 0 {
+						preCount = float64(idx.Histogram.Buckets[lowerBkt-1].Count)
+					}
+					// Use Count of the Bucket where r exists as the upper bound.
+					upperCnt := float64(idx.Histogram.Buckets[upperBkt].Count)
+					upperLimit = upperCnt - preCount
+					upperLimit += float64(idx.TopN.BetweenCount(sctx, lb, rb))
+				}
+
+				// If the result of exponential backoff strategy is larger than the result from multi-column stats,
+				// 	use the upper limit from multi-column histogram instead.
+				if expBackoffCnt > upperLimit {
+					expBackoffCnt = upperLimit
+				}
+				count += expBackoffCnt
+			}
+		}
+		if !expBackoffSuccess {
+			count += betweenRowCountOnIndex(sctx, idx, l, r)
+		}
+
+		// If the current table row count has changed, we should scale the row count accordingly.
+		increaseFactor := idx.GetIncreaseFactor(realtimeRowCount)
+		count *= increaseFactor
+
+		// handling the out-of-range part
+		if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) {
+			histNDV := idx.NDV
+			// Exclude the TopN in Stats Version 2
+			if idx.StatsVer == statistics.Version2 {
+				c := coll.GetCol(idx.Histogram.ID)
+				// If this is single column of a multi-column index - use the column's NDV rather than index NDV
+				isSingleColRange := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == 1
+				if isSingleColRange && !isSingleColIdx && c != nil && c.Histogram.NDV > 0 {
+					histNDV = c.Histogram.NDV - int64(c.TopN.Num())
+				} else {
+					histNDV -= int64(idx.TopN.Num())
+				}
+			}
+			count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor)
+		}
+
+		if debugTrace {
+			debugTraceEndEstimateRange(sctx, count, debugTraceRange)
+		}
+		totalCount += count
+	}
+	allowZeroEst := fixcontrol.GetBoolWithDefault(
+		sctx.GetSessionVars().GetOptimizerFixControlMap(),
+		fixcontrol.Fix47400,
+		false,
+	)
+	if allowZeroEst {
+		totalCount = mathutil.Clamp(totalCount, 0, float64(realtimeRowCount))
+	} else {
+		// Don't allow the final result to go below 1 row
+		totalCount = mathutil.Clamp(totalCount, 1, float64(realtimeRowCount))
+	}
+	return totalCount, nil
+}
+
+var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil))
+
+func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []byte, realtimeRowCount, modifyCount int64) (result float64) {
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		debugtrace.RecordAnyValuesWithNames(sctx, "Encoded Value", b)
+		defer func() {
+			debugtrace.RecordAnyValuesWithNames(sctx, "Result", result)
+			debugtrace.LeaveContextCommon(sctx)
+		}()
+	}
+	if len(idx.Info.Columns) == 1 {
+		if bytes.Equal(b, nullKeyBytes) {
+			return float64(idx.Histogram.NullCount)
+		}
+	}
+	val := types.NewBytesDatum(b)
+	if idx.StatsVer < statistics.Version2 {
+		if idx.Histogram.NDV > 0 && outOfRangeOnIndex(idx, val) {
+			return outOfRangeEQSelectivity(sctx, idx.Histogram.NDV, realtimeRowCount, int64(idx.TotalRowCount())) * idx.TotalRowCount()
+		}
+		if idx.CMSketch != nil {
+			return float64(idx.QueryBytes(sctx, b))
+		}
+		histRowCount, _ := idx.Histogram.EqualRowCount(sctx, val, false)
+		return histRowCount
+	}
+	// stats version == 2
+	// 1. try to find this value in TopN
+	if idx.TopN != nil {
+		count, found := idx.TopN.QueryTopN(sctx, b)
+		if found {
+			return float64(count)
+		}
+	}
+	// 2. try to find this value in bucket.Repeat(the last value in every bucket)
+	histCnt, matched := idx.Histogram.EqualRowCount(sctx, val, true)
+	if matched {
+		return histCnt
+	}
+	// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
+	histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
+	if histNDV <= 0 {
+		// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
+		// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
+		//
+		// If the table hasn't been modified, it's safe to return 0.
+		if modifyCount == 0 {
+			return 0
+		}
+		// ELSE calculate an approximate estimate based upon newly inserted rows.
+		//
+		// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
+		if idx.Histogram.NDV > 0 {
+			histNDV = float64(idx.Histogram.NDV)
+		} else {
+			histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
+		}
+		// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
+		// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
+		totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
+		return max(1, totalRowCount/histNDV)
+	}
+	// return the average histogram rows (which excludes topN) and NDV that excluded topN
+	return idx.Histogram.NotNullCount() / histNDV
+}
+
+// expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details.
+func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, success bool, err error) {
+	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
+		debugtrace.EnterContextCommon(sctx)
+		defer func() {
+			debugtrace.RecordAnyValuesWithNames(sctx,
+				"Result", sel,
+				"Success", success,
+				"error", err,
+			)
+			debugtrace.LeaveContextCommon(sctx)
+		}()
+	}
+	tmpRan := []*ranger.Range{
+		{
+			LowVal:    make([]types.Datum, 1),
+			HighVal:   make([]types.Datum, 1),
+			Collators: make([]collate.Collator, 1),
+		},
+	}
+	colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
+	singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
+	// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
+	//   1. Calc the selectivity of each column.
+	//   2. Sort them and choose the first 4 most selective filter and the corresponding selectivity is sel_1, sel_2, sel_3, sel_4 where i < j => sel_i < sel_j.
+	//   3. The final selectivity would be sel_1 * sel_2^{1/2} * sel_3^{1/4} * sel_4^{1/8}.
+	// This calculation reduced the independence assumption and can work well better than it.
+	for i := 0; i < len(indexRange.LowVal); i++ {
+		tmpRan[0].LowVal[0] = indexRange.LowVal[i]
+		tmpRan[0].HighVal[0] = indexRange.HighVal[i]
+		tmpRan[0].Collators[0] = indexRange.Collators[0]
+		if i == len(indexRange.LowVal)-1 {
+			tmpRan[0].LowExclude = indexRange.LowExclude
+			tmpRan[0].HighExclude = indexRange.HighExclude
+		}
+		colID := colsIDs[i]
+		var (
+			count       float64
+			selectivity float64
+			err         error
+			foundStats  bool
+		)
+		if !statistics.ColumnStatsIsInvalid(coll.GetCol(colID), sctx, coll, colID) {
+			foundStats = true
+			count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan)
+			selectivity = count / float64(coll.RealtimeCount)
+		}
+		if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
+			// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
+			// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
+			// check avoids infinite recursion.
+			for _, idxID := range idxIDs {
+				idxStats := coll.GetIdx(idxID)
+				if idxStats == nil || statistics.IndexStatsIsInvalid(sctx, idxStats, coll, idxID) {
+					continue
+				}
+				foundStats = true
+				count, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan)
+				if err == nil {
+					break
+				}
+				realtimeCnt, _ := coll.GetScaledRealtimeAndModifyCnt(idxStats)
+				selectivity = count / float64(realtimeCnt)
+			}
+		}
+		if !foundStats {
+			continue
+		}
+		if err != nil {
+			return 0, false, err
+		}
+		singleColumnEstResults = append(singleColumnEstResults, selectivity)
+	}
+	// Sort them.
+	slices.Sort(singleColumnEstResults)
+	l := len(singleColumnEstResults)
+	failpoint.Inject("cleanEstResults", func() {
+		singleColumnEstResults = singleColumnEstResults[:0]
+		l = 0
+	})
+	if l == 1 {
+		return singleColumnEstResults[0], true, nil
+	} else if l == 0 {
+		return 0, false, nil
+	}
+	// Do not allow the exponential backoff to go below the available index bound. If the number of predicates
+	// is less than the number of index columns - use 90% of the bound to differentiate a subset from full index match.
+	// If there is an individual column selectivity that goes below this bound, use that selectivity only.
+	histNDV := coll.RealtimeCount
+	if idx.NDV > 0 {
+		histNDV = idx.NDV
+	}
+	idxLowBound := 1 / float64(min(histNDV, coll.RealtimeCount))
+	if l < len(idx.Info.Columns) {
+		idxLowBound /= 0.9
+	}
+	minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound)
+	multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1])
+	if l == 2 {
+		return max(minTwoCol, multTwoCol), true, nil
+	}
+	minThreeCol := min(minTwoCol, singleColumnEstResults[2])
+	multThreeCol := multTwoCol * math.Sqrt(math.Sqrt(singleColumnEstResults[2]))
+	if l == 3 {
+		return max(minThreeCol, multThreeCol), true, nil
+	}
+	minFourCol := min(minThreeCol, singleColumnEstResults[3])
+	multFourCol := multThreeCol * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3])))
+	return max(minFourCol, multFourCol), true, nil
+}
+
+// outOfRangeOnIndex checks if the datum is out of the range.
+func outOfRangeOnIndex(idx *statistics.Index, val types.Datum) bool {
+	if !idx.Histogram.OutOfRange(val) {
+		return false
+	}
+	if idx.Histogram.Len() > 0 && matchPrefix(idx.Histogram.Bounds.GetRow(0), 0, &val) {
+		return false
+	}
+	return true
+}
+
+// matchPrefix checks whether ad is the prefix of value
+func matchPrefix(row chunk.Row, colIdx int, ad *types.Datum) bool {
+	switch ad.Kind() {
+	case types.KindString, types.KindBytes, types.KindBinaryLiteral, types.KindMysqlBit:
+		return strings.HasPrefix(row.GetString(colIdx), ad.GetString())
+	}
+	return false
+}
+
+// betweenRowCountOnIndex estimates the row count for interval [l, r).
+// The input sctx is just for debug trace, you can pass nil safely if that's not needed.
+func betweenRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, l, r types.Datum) float64 {
+	histBetweenCnt := idx.Histogram.BetweenRowCount(sctx, l, r)
+	if idx.StatsVer == statistics.Version1 {
+		return histBetweenCnt
+	}
+	return float64(idx.TopN.BetweenCount(sctx, l.GetBytes(), r.GetBytes())) + histBetweenCnt
+}
+
+// getOrdinalOfRangeCond gets the ordinal of the position range condition,
+// if not exist, it returns the end position.
+func getOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int {
+	for i := range ran.LowVal {
+		a, b := ran.LowVal[i], ran.HighVal[i]
+		cmp, err := a.Compare(sc.TypeCtx(), &b, ran.Collators[0])
+		if err != nil {
+			return 0
+		}
+		if cmp != 0 {
+			return i
+		}
+	}
+	return len(ran.LowVal)
+}
diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
index fca2f9261c632..46245bbe5137d 100644
--- a/statistics/selectivity_test.go
+++ b/statistics/selectivity_test.go
@@ -274,6 +274,56 @@ func TestEstimationForUnknownValues(t *testing.T) {
 	require.Equal(t, 0.0, count)
 }
 
+func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	testKit := testkit.NewTestKit(t, store)
+	testKit.MustExec("use test")
+	testKit.MustExec("drop table if exists t")
+	testKit.MustExec("create table t(a int, key idx(a))")
+	testKit.MustExec("set @@tidb_analyze_version=2")
+	testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
+	for i := 1; i <= 10; i++ {
+		testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
+		testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
+		testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
+		testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
+		testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
+		testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
+	}
+	testKit.MustExec("analyze table t")
+	h := dom.StatsHandle()
+	require.Nil(t, h.DumpStatsDeltaToKV(true))
+
+	table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
+	require.NoError(t, err)
+	statsTbl := h.GetTableStats(table.Meta())
+
+	// Search for a found value == 10.0
+	sctx := mock.NewContext()
+	col := statsTbl.GetCol(table.Meta().Columns[0].ID)
+	count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
+	require.NoError(t, err)
+	require.Equal(t, 10.0, count)
+
+	// Search for a not found value with zero modifyCount. Defaults to count == 1.0
+	count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
+	require.NoError(t, err)
+	require.Equal(t, 1.0, count)
+
+	// Add another 200 rows to the table
+	testKit.MustExec("insert into t select a+10 from t")
+	testKit.MustExec("insert into t select a+10 from t where a <= 10")
+	require.Nil(t, h.DumpStatsDeltaToKV(true))
+	require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
+	statsTblnew := h.GetTableStats(table.Meta())
+
+	// Search for a not found value based upon statistics - count should be >= 10 and <=40
+	count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
+	require.NoError(t, err)
+	require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
+	require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
+}
+
 func TestEstimationUniqueKeyEqualConds(t *testing.T) {
 	store, dom := testkit.CreateMockStoreAndDomain(t)
 	testKit := testkit.NewTestKit(t, store)