Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: Use realtimeRowCount when all topN collected (#56848) #58174

Open
wants to merge 2 commits into
base: release-7.5
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package cardinality

import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
"github.com/pingcap/tidb/pkg/sessionctx"
Expand Down Expand Up @@ -172,8 +174,31 @@ func equalRowCountOnColumn(sctx sessionctx.Context, c *statistics.Column, val ty
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
<<<<<<< HEAD
return 0, nil
=======
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0, nil
}
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if c.Histogram.NDV > 0 {
histNDV = float64(c.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
return max(1, totalRowCount/histNDV), nil
>>>>>>> cec48bb1649 (planner: Use realtimeRowCount when all topN collected (#56848))
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return c.Histogram.NotNullCount() / histNDV, nil
}

Expand Down
23 changes: 23 additions & 0 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,31 @@ func equalRowCountOnIndex(sctx sessionctx.Context, idx *statistics.Index, b []by
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
<<<<<<< HEAD
return 0
=======
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0
}
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if idx.Histogram.NDV > 0 {
histNDV = float64(idx.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
return max(1, totalRowCount/histNDV)
>>>>>>> cec48bb1649 (planner: Use realtimeRowCount when all topN collected (#56848))
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return idx.Histogram.NotNullCount() / histNDV
}

Expand Down
50 changes: 50 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,56 @@ func TestEstimationForUnknownValues(t *testing.T) {
require.Equal(t, 0.0, count)
}

func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, key idx(a))")
testKit.MustExec("set @@tidb_analyze_version=2")
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
for i := 1; i <= 10; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
}
testKit.MustExec("analyze table t")
h := dom.StatsHandle()
require.Nil(t, h.DumpStatsDeltaToKV(true))

table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
require.NoError(t, err)
statsTbl := h.GetTableStats(table.Meta())

// Search for a found value == 10.0
sctx := mock.NewContext()
col := statsTbl.GetCol(table.Meta().Columns[0].ID)
count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 10.0, count)

// Search for a not found value with zero modifyCount. Defaults to count == 1.0
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 1.0, count)

// Add another 200 rows to the table
testKit.MustExec("insert into t select a+10 from t")
testKit.MustExec("insert into t select a+10 from t where a <= 10")
require.Nil(t, h.DumpStatsDeltaToKV(true))
require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
statsTblnew := h.GetTableStats(table.Meta())

// Search for a not found value based upon statistics - count should be >= 10 and <=40
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
require.NoError(t, err)
require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
}

func TestEstimationUniqueKeyEqualConds(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down