Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: Use realtimeRowCount when all topN collected (#56848) #57689

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
shard_count = 28,
shard_count = 29,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
21 changes: 19 additions & 2 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package cardinality

import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/planner/planctx"
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
Expand Down Expand Up @@ -173,12 +175,27 @@ func equalRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, val t
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0, nil
}
return 1, nil
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if c.Histogram.NDV > 0 {
histNDV = float64(c.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
return max(1, totalRowCount/histNDV), nil
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return c.Histogram.NotNullCount() / histNDV, nil
}

Expand Down
19 changes: 17 additions & 2 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,12 +415,27 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0
}
return 1
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if idx.Histogram.NDV > 0 {
histNDV = float64(idx.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
return max(1, totalRowCount/histNDV)
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return idx.Histogram.NotNullCount() / histNDV
}

Expand Down
50 changes: 50 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,56 @@ func TestEstimationForUnknownValues(t *testing.T) {
require.Equal(t, 0.0, count)
}

func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, key idx(a))")
testKit.MustExec("set @@tidb_analyze_version=2")
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
for i := 1; i <= 10; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
}
testKit.MustExec("analyze table t")
h := dom.StatsHandle()
require.Nil(t, h.DumpStatsDeltaToKV(true))

table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
require.NoError(t, err)
statsTbl := h.GetTableStats(table.Meta())

// Search for a found value == 10.0
sctx := mock.NewContext()
col := statsTbl.GetCol(table.Meta().Columns[0].ID)
count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 10.0, count)

// Search for a not found value with zero modifyCount. Defaults to count == 1.0
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 1.0, count)

// Add another 200 rows to the table
testKit.MustExec("insert into t select a+10 from t")
testKit.MustExec("insert into t select a+10 from t where a <= 10")
require.Nil(t, h.DumpStatsDeltaToKV(true))
require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
statsTblnew := h.GetTableStats(table.Meta())

// Search for a not found value based upon statistics - count should be >= 10 and <=40
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
require.NoError(t, err)
require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
}

func TestEstimationUniqueKeyEqualConds(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down