From 1c88893a6e83506190e63d050f027419d514f1cd Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 18 Nov 2024 12:20:32 +0100 Subject: [PATCH] Let reference use schema file and table counts. Update tests Signed-off-by: Rohit Nayak --- go/cmd/reference.go | 6 +-- go/reference/reference.go | 70 ++++++++++++++++++++++++++++++---- go/reference/reference_test.go | 10 ++++- t/sakila/sakila.test | 4 +- 4 files changed, 75 insertions(+), 15 deletions(-) diff --git a/go/cmd/reference.go b/go/cmd/reference.go index b85aac3..ed4165b 100644 --- a/go/cmd/reference.go +++ b/go/cmd/reference.go @@ -27,12 +27,12 @@ func referenceCmd() *cobra.Command { cmd := &cobra.Command{ Use: "reference ", Short: "Suggests potential reference tables based on query logs and database schema", - Example: "vt reference file.test ", + Example: "vt reference ", Args: cobra.ExactArgs(2), RunE: func(_ *cobra.Command, args []string) error { cfg := reference.Config{ - FileName: args[0], - ConnectionString: args[1], + KeysOutputFile: args[0], + SchemaInfoFile: args[1], } loader, err := configureLoader(inputType, false) diff --git a/go/reference/reference.go b/go/reference/reference.go index 7e47ce7..254a8d6 100644 --- a/go/reference/reference.go +++ b/go/reference/reference.go @@ -17,17 +17,19 @@ limitations under the License. package reference import ( + "encoding/json" "fmt" "github.com/vitessio/vt/go/data" "github.com/vitessio/vt/go/keys" + "github.com/vitessio/vt/go/schema" "io" "os" "strings" ) type Config struct { - FileName string - ConnectionString string + KeysOutputFile string + SchemaInfoFile string Loader data.Loader } @@ -51,22 +53,52 @@ func Find(cfg Config) (*ReferenceInfo, error) { return float64(ts.NumWrites) / float64(ts.NumWrites+ts.NumReads) } writePercentageThreshold := 1 / 100.0 // 1% + tableCountThreshold := 1000 for _, ts := range ri.TableSummaries { - if ts.JoinCount > thresholdJoins && writePercentage(ts) < writePercentageThreshold { - ri.ChosenTables = append(ri.ChosenTables, ts.TableName) + tableName := strings.Trim(ts.TableName, "'`\"") + numRows := ri.TableRows[tableName] + if ts.JoinCount > thresholdJoins && writePercentage(ts) < writePercentageThreshold && numRows < tableCountThreshold { + ri.ChosenTables = append(ri.ChosenTables, tableName) + } else { + fmt.Printf("Table: %s, Reads: %d, Writes: %d, Joins: %d, Rows: %d\n", + ts.TableName, ts.NumReads, ts.NumWrites, ts.JoinCount, ri.TableRows[tableName]) } } return ri, nil } +type TableInfo struct { + Name string + NumWrites int + NumReads int + JoinCount int + Rows int +} +type ReferenceOutput struct { + Tables []TableInfo +} + func run(out io.Writer, cfg Config) error { ri, err := Find(cfg) if err != nil { return err } + ro := ReferenceOutput{} for _, table := range ri.ChosenTables { - fmt.Fprintf(out, "%s:: %+v\n", table, ri.TableSummaries[table]) + ts := ri.TableSummaries[table] + ro.Tables = append(ro.Tables, TableInfo{ + Name: table, + NumWrites: ts.NumWrites, + NumReads: ts.NumReads, + JoinCount: ts.JoinCount, + Rows: ri.TableRows[table], + }) } + b, err := json.MarshalIndent(ro, "", " ") + if err != nil { + return err + } + out.Write(b) return nil } @@ -84,14 +116,20 @@ func (ts TableSummary) String() string { type ReferenceInfo struct { TableSummaries map[string]*TableSummary ChosenTables []string + TableRows map[string]int } -func GetReferenceInfo(cfg Config) (*ReferenceInfo, error) { - ri := &ReferenceInfo{ +func NewReferenceInfo() *ReferenceInfo { + return &ReferenceInfo{ TableSummaries: make(map[string]*TableSummary), + TableRows: make(map[string]int), } +} + +func GetReferenceInfo(cfg Config) (*ReferenceInfo, error) { + ri := NewReferenceInfo() keysConfig := keys.Config{ - FileName: cfg.FileName, + FileName: cfg.KeysOutputFile, Loader: cfg.Loader, } keysOutput, err := keys.GetKeysInfo(keysConfig) @@ -99,6 +137,7 @@ func GetReferenceInfo(cfg Config) (*ReferenceInfo, error) { return nil, err } getRit := func(table string) *TableSummary { + table = strings.Trim(table, "'`\"") summary, ok := ri.TableSummaries[table] if !ok { summary = &TableSummary{ @@ -124,6 +163,7 @@ func GetReferenceInfo(cfg Config) (*ReferenceInfo, error) { } for _, table := range query.TableNames { + rit := getRit(table) if isRead { rit.NumReads += usageCount @@ -140,5 +180,19 @@ func GetReferenceInfo(cfg Config) (*ReferenceInfo, error) { rit2.JoinCount += usageCount } } + + si, err := schema.Load(cfg.SchemaInfoFile) + if err != nil { + return nil, err + } + for _, table := range ri.TableSummaries { + for _, table2 := range si.Tables { + t := strings.Trim(table.TableName, "'`\"") + t2 := strings.Trim(table2.Name, "'`\"") + if t == t2 { + ri.TableRows[t] = table2.Rows + } + } + } return ri, nil } diff --git a/go/reference/reference_test.go b/go/reference/reference_test.go index 84251c8..46cadee 100644 --- a/go/reference/reference_test.go +++ b/go/reference/reference_test.go @@ -3,13 +3,15 @@ package reference import ( "github.com/stretchr/testify/require" "github.com/vitessio/vt/go/data" + "sort" "testing" ) func TestReference(t *testing.T) { cfg := Config{ - FileName: "../../t/sakila/sakila.test", - Loader: data.SQLScriptLoader{}, + KeysOutputFile: "../../t/sakila/sakila.test", + Loader: data.SQLScriptLoader{}, + SchemaInfoFile: "../../t/sakila/sakila-schema-info.json", } ri, err := Find(cfg) @@ -17,7 +19,11 @@ func TestReference(t *testing.T) { require.NotNil(t, ri) require.NotEmpty(t, ri.TableSummaries) validReferenceTables := []string{"actor", "address", "category", "city", "country", "film", "language", "staff"} + expectedTables := []string{"city", "language", "country", "address"} for _, table := range ri.ChosenTables { require.Containsf(t, validReferenceTables, table, "table %s is not a valid reference table", table) } + sort.Strings(expectedTables) + sort.Strings(ri.ChosenTables) + require.EqualValuesf(t, expectedTables, ri.ChosenTables, "expected tables %v, got %v", expectedTables, ri.ChosenTables) } diff --git a/t/sakila/sakila.test b/t/sakila/sakila.test index 22b7c68..82d4b93 100644 --- a/t/sakila/sakila.test +++ b/t/sakila/sakila.test @@ -347,10 +347,10 @@ WHERE category_id = 7; INSERT /*vt+ VT_USAGE_COUNT=5 */ INTO language (name, last_update) VALUES ('Mandarin', NOW()); -DELETE /*vt+ VT_USAGE_COUNT=20 */ FROM language +DELETE /*vt+ VT_USAGE_COUNT=1 */ FROM language WHERE language_id = 5; -UPDATE /*vt+ VT_USAGE_COUNT=14 */ language +UPDATE /*vt+ VT_USAGE_COUNT=3 */ language SET name = 'French' WHERE language_id = 2;