Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#56848
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <[email protected]>
  • Loading branch information
terry1purcell authored and ti-chi-bot committed Dec 11, 2024
1 parent 42e82c4 commit 0bab466
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pkg/planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
<<<<<<< HEAD
shard_count = 25,
=======
shard_count = 29,
>>>>>>> cec48bb1649 (planner: Use realtimeRowCount when all topN collected (#56848))
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
25 changes: 25 additions & 0 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package cardinality

import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
"github.com/pingcap/tidb/pkg/sessionctx"
Expand Down Expand Up @@ -172,8 +174,31 @@ func equalRowCountOnColumn(sctx sessionctx.Context, c *statistics.Column, val ty
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
<<<<<<< HEAD
return 0, nil
=======
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0, nil
}
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if c.Histogram.NDV > 0 {
histNDV = float64(c.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
return max(1, totalRowCount/histNDV), nil
>>>>>>> cec48bb1649 (planner: Use realtimeRowCount when all topN collected (#56848))
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return c.Histogram.NotNullCount() / histNDV, nil
}

Expand Down
23 changes: 23 additions & 0 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,31 @@ func equalRowCountOnIndex(sctx sessionctx.Context, idx *statistics.Index, b []by
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
<<<<<<< HEAD
return 0
=======
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0
}
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if idx.Histogram.NDV > 0 {
histNDV = float64(idx.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
return max(1, totalRowCount/histNDV)
>>>>>>> cec48bb1649 (planner: Use realtimeRowCount when all topN collected (#56848))
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return idx.Histogram.NotNullCount() / histNDV
}

Expand Down
50 changes: 50 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,56 @@ func TestEstimationForUnknownValues(t *testing.T) {
require.Equal(t, 0.0, count)
}

func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, key idx(a))")
testKit.MustExec("set @@tidb_analyze_version=2")
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
for i := 1; i <= 10; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
}
testKit.MustExec("analyze table t")
h := dom.StatsHandle()
require.Nil(t, h.DumpStatsDeltaToKV(true))

table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
require.NoError(t, err)
statsTbl := h.GetTableStats(table.Meta())

// Search for a found value == 10.0
sctx := mock.NewContext()
col := statsTbl.GetCol(table.Meta().Columns[0].ID)
count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 10.0, count)

// Search for a not found value with zero modifyCount. Defaults to count == 1.0
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 1.0, count)

// Add another 200 rows to the table
testKit.MustExec("insert into t select a+10 from t")
testKit.MustExec("insert into t select a+10 from t where a <= 10")
require.Nil(t, h.DumpStatsDeltaToKV(true))
require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
statsTblnew := h.GetTableStats(table.Meta())

// Search for a not found value based upon statistics - count should be >= 10 and <=40
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
require.NoError(t, err)
require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
}

func TestEstimationUniqueKeyEqualConds(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down

0 comments on commit 0bab466

Please sign in to comment.