From 50f2cb663b25ddf7d6e440c9b530fede04ef6780 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Wed, 13 Nov 2024 11:21:16 -0600 Subject: [PATCH 01/10] feat: include query timings in json log from keys Signed-off-by: Florent Poinsard Signed-off-by: Andres Taylor --- go/cmd/keys.go | 2 +- go/data/query.go | 5 + go/data/slow_query_log_loader.go | 330 ++++++++++++++++++++++ go/data/slow_query_log_loader_test.go | 48 ++++ go/data/sql_script_loader.go | 184 ------------ go/data/testdata/slow_query_log | 69 +++++ go/keys/keys.go | 14 + go/keys/keys_test.go | 9 +- go/summarize/testdata/slow-query-log.json | 76 +++++ 9 files changed, 551 insertions(+), 186 deletions(-) create mode 100644 go/data/slow_query_log_loader.go create mode 100644 go/data/slow_query_log_loader_test.go delete mode 100644 go/data/sql_script_loader.go create mode 100644 go/data/testdata/slow_query_log create mode 100644 go/summarize/testdata/slow-query-log.json diff --git a/go/cmd/keys.go b/go/cmd/keys.go index 8bfdd7c..8569db8 100644 --- a/go/cmd/keys.go +++ b/go/cmd/keys.go @@ -61,7 +61,7 @@ func addInputTypeFlag(cmd *cobra.Command, s *string) { func configureLoader(inputType string, needsBindVars bool) (data.Loader, error) { switch inputType { case "sql": - return data.SQLScriptLoader{}, nil + return data.SlowQueryLogLoader{}, nil case "mysql-log": return data.MySQLLogLoader{}, nil case "vtgate-log": diff --git a/go/data/query.go b/go/data/query.go index 7087fad..bd6f722 100644 --- a/go/data/query.go +++ b/go/data/query.go @@ -40,6 +40,11 @@ type ( Query string Line int Type CmdType + + // These fields are only set if the log file is a slow query log + QueryTime, LockTime float64 + RowsSent, RowsExamined int + Timestamp int64 } errLoader struct { diff --git a/go/data/slow_query_log_loader.go b/go/data/slow_query_log_loader.go new file mode 100644 index 0000000..d268789 --- /dev/null +++ b/go/data/slow_query_log_loader.go @@ -0,0 +1,330 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package data + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "net/http" + "os" + "strconv" + "strings" +) + +type SlowQueryLogLoader struct{} + +type slowQueryLogReaderState struct { + logReaderState +} + +type lineProcessorState struct { + currentQuery Query + newStmt bool + hasQueryMetadata bool +} + +func (s *slowQueryLogReaderState) Next() (Query, bool) { + s.mu.Lock() + defer s.mu.Unlock() + + if s.closed || s.err != nil { + return Query{}, false + } + + state := &lineProcessorState{ + newStmt: true, + } + + for s.scanner.Scan() { + s.lineNumber++ + line := strings.TrimSpace(s.scanner.Text()) + + result, done, err := s.processLine(line, state) + if err != nil { + s.err = err + return Query{}, false + } + if done { + return result, true + } + } + + if err := s.scanner.Err(); err != nil { + s.err = err + return Query{}, false + } + + if !state.newStmt && state.currentQuery.Query != "" { + s.err = errors.New("EOF: missing semicolon") + } + return Query{}, false +} + +func (s *slowQueryLogReaderState) processLine(line string, state *lineProcessorState) (Query, bool, error) { + switch { + case len(line) == 0: + return Query{}, false, nil + case strings.HasPrefix(line, "#"): + hasMetadata, err := s.processCommentLine(line, state) + if err != nil { + return Query{}, false, fmt.Errorf("line %d: %w", s.lineNumber, err) + } + if hasMetadata { + state.hasQueryMetadata = true + } + return Query{}, false, nil + case strings.HasPrefix(line, "SET timestamp=") && state.hasQueryMetadata: + err := s.processSetTimestampLine(line, state) + if err != nil { + return Query{}, false, fmt.Errorf("line %d: %w", s.lineNumber, err) + } + state.hasQueryMetadata = false + return Query{}, false, nil + case strings.HasPrefix(line, "--"): + pq, err := s.processStatementLine(line, state) + if err != nil { + return Query{}, false, fmt.Errorf("line %d: %w", s.lineNumber, err) + } + if pq != nil { + return *pq, true, nil + } + return Query{}, false, nil + case state.newStmt: + s.startNewQuery(line, state) + default: + s.appendToCurrentQuery(line, state) + } + + state.newStmt = strings.HasSuffix(line, ";") + if state.newStmt { + pq, err := s.processEndOfStatement(line, state) + if err != nil { + return Query{}, false, fmt.Errorf("line %d: %w", s.lineNumber, err) + } + if pq != nil { + return *pq, true, nil + } + } + + return Query{}, false, nil +} + +func (s *slowQueryLogReaderState) processCommentLine(line string, state *lineProcessorState) (bool, error) { + if strings.HasPrefix(line, "# Query_time:") { + if err := parseQueryMetrics(line, &state.currentQuery); err != nil { + return false, err + } + return true, nil + } + return false, nil +} + +func (s *slowQueryLogReaderState) processSetTimestampLine(line string, state *lineProcessorState) error { + tsStr := strings.TrimPrefix(line, "SET timestamp=") + tsStr = strings.TrimSuffix(tsStr, ";") + ts, err := strconv.ParseInt(tsStr, 10, 64) + if err != nil { + return fmt.Errorf("invalid timestamp '%s': %w", tsStr, err) + } + state.currentQuery.Timestamp = ts + return nil +} + +func (s *slowQueryLogReaderState) processStatementLine(line string, state *lineProcessorState) (*Query, error) { + state.newStmt = true + q := Query{Query: line, Line: s.lineNumber} + pq, err := parseQuery(q) + if err != nil { + return nil, err + } + return pq, nil +} + +func (s *slowQueryLogReaderState) processEndOfStatement(line string, state *lineProcessorState) (*Query, error) { + if strings.HasPrefix(line, "SET timestamp=") && state.currentQuery.QueryTime > 0 { + return nil, nil + } + pq, err := parseQuery(state.currentQuery) + if err != nil { + return nil, err + } + return pq, nil +} + +func (s *slowQueryLogReaderState) startNewQuery(line string, state *lineProcessorState) { + state.currentQuery.Query = line + state.currentQuery.Line = s.lineNumber +} + +func (s *slowQueryLogReaderState) appendToCurrentQuery(line string, state *lineProcessorState) { + state.currentQuery.Query = fmt.Sprintf("%s\n%s", state.currentQuery.Query, line) +} + +// parseQueryMetrics parses the metrics from the comment line and assigns them to the Query struct. +func parseQueryMetrics(line string, q *Query) error { + line = strings.TrimPrefix(line, "#") + line = strings.TrimSpace(line) + + fields := strings.Fields(line) + + i := 0 + for i < len(fields) { + field := fields[i] + if !strings.HasSuffix(field, ":") { + return fmt.Errorf("unexpected field format '%s'", field) + } + + // Remove the trailing colon to get the key + key := strings.TrimSuffix(field, ":") + if i+1 >= len(fields) { + return fmt.Errorf("missing value for key '%s'", key) + } + value := fields[i+1] + + // Assign to Query struct based on key + switch key { + case "Query_time": + fval, err := strconv.ParseFloat(value, 64) + if err != nil { + return fmt.Errorf("invalid Query_time value '%s'", value) + } + q.QueryTime = fval + case "Lock_time": + fval, err := strconv.ParseFloat(value, 64) + if err != nil { + return fmt.Errorf("invalid Lock_time value '%s'", value) + } + q.LockTime = fval + case "Rows_sent": + ival, err := strconv.Atoi(value) + if err != nil { + return fmt.Errorf("invalid Rows_sent value '%s'", value) + } + q.RowsSent = ival + case "Rows_examined": + ival, err := strconv.Atoi(value) + if err != nil { + return fmt.Errorf("invalid Rows_examined value '%s'", value) + } + q.RowsExamined = ival + } + i += 2 // Move to the next key-value pair + } + + return nil +} + +func readData(url string) ([]byte, error) { + client := http.Client{} + res, err := client.Get(url) + if err != nil { + return nil, err + } + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to get data from %s, status code %d", url, res.StatusCode) + } + defer res.Body.Close() + return io.ReadAll(res.Body) +} + +func (SlowQueryLogLoader) Load(filename string) IteratorLoader { + var scanner *bufio.Scanner + var fd *os.File + + if strings.HasPrefix(filename, "http") { + data, err := readData(filename) + if err != nil { + return &errLoader{err: err} + } + scanner = bufio.NewScanner(bytes.NewReader(data)) + } else { + var err error + fd, err = os.OpenFile(filename, os.O_RDONLY, 0) + if err != nil { + return &errLoader{err: err} + } + scanner = bufio.NewScanner(fd) + } + + return &slowQueryLogReaderState{ + logReaderState: logReaderState{ + scanner: scanner, + fd: fd, + }, + } +} + +// Helper function to parse individual queries +func parseQuery(rs Query) (*Query, error) { + realS := rs.Query + s := rs.Query + q := Query{ + Line: rs.Line, + Type: Unknown, + QueryTime: rs.QueryTime, + LockTime: rs.LockTime, + RowsSent: rs.RowsSent, + RowsExamined: rs.RowsExamined, + Timestamp: rs.Timestamp, + } + + if len(s) < 3 { + return nil, nil + } + + switch { + case strings.HasPrefix(s, "#"): + q.Type = Comment + return &q, nil + case strings.HasPrefix(s, "--"): + q.Type = CommentWithCommand + if len(s) > 2 && s[2] == ' ' { + s = s[3:] + } else { + s = s[2:] + } + case s[0] == '\n': + q.Type = EmptyLine + return &q, nil + } + + i := findFirstWord(s) + if i > 0 { + q.FirstWord = s[:i] + } + q.Query = s[i:] + + if q.Type == Unknown || q.Type == CommentWithCommand { + if err := q.getQueryType(realS); err != nil { + return nil, err + } + } + + return &q, nil +} + +// findFirstWord calculates the length of the first word in the string +func findFirstWord(s string) int { + i := 0 + for i < len(s) && s[i] != '(' && s[i] != ' ' && s[i] != ';' && s[i] != '\n' { + i++ + } + return i +} diff --git a/go/data/slow_query_log_loader_test.go b/go/data/slow_query_log_loader_test.go new file mode 100644 index 0000000..1b774d2 --- /dev/null +++ b/go/data/slow_query_log_loader_test.go @@ -0,0 +1,48 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package data + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLoadSlowQueryLogWithMetadata(t *testing.T) { + loader := SlowQueryLogLoader{}.Load("./testdata/slow_query_log") + queries, err := makeSlice(loader) + require.NoError(t, err) + + expected := []Query{ + {FirstWord: "/bin/mysqld,", Query: "/bin/mysqld, Version: 8.0.26 (Source distribution). started with:\nTcp port: 3306 Unix socket: /tmp/mysql.sock\nTime Id Command Argument\nuse testdb;", Line: 1, Type: 0, QueryTime: 0.000153, LockTime: 6.3e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 0}, + {FirstWord: "SET", Query: "SET timestamp=1690891201;", Line: 8}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343274;", Line: 9}, + {FirstWord: "FLUSH", Query: "FLUSH SLOW LOGS;", Line: 19, QueryTime: 0.005047, LockTime: 0, RowsSent: 0, RowsExamined: 0, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343272;", Line: 24, QueryTime: 0.000162, LockTime: 6.7e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select s1_0.id, s1_0.code, s1_0.token, s1_0.date from stores s1_0 where s1_0.id=11393;", Line: 29, QueryTime: 0.000583, LockTime: 0.000322, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343265;", Line: 34, QueryTime: 0.000148, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343188;", Line: 39, QueryTime: 0.000159, LockTime: 6.5e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343180;", Line: 44, QueryTime: 0.000152, LockTime: 6.3e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343011;", Line: 49, QueryTime: 0.000149, LockTime: 6.1e-05, RowsSent: 666, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342469;", Line: 54, QueryTime: 0.000153, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342465;", Line: 59, QueryTime: 0.000151, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342439;", Line: 64, QueryTime: 0.000148, LockTime: 6.1e-05, RowsSent: 1, RowsExamined: 731, Timestamp: 1690891201}, + {FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342389;", Line: 69, QueryTime: 0.000163, LockTime: 6.7e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201}, + } + + require.Equal(t, expected, queries) +} diff --git a/go/data/sql_script_loader.go b/go/data/sql_script_loader.go deleted file mode 100644 index 2a2faa1..0000000 --- a/go/data/sql_script_loader.go +++ /dev/null @@ -1,184 +0,0 @@ -/* -Copyright 2024 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package data - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "net/http" - "os" - "strings" -) - -type SQLScriptLoader struct{} - -type sqlLogReaderState struct { - logReaderState -} - -func (s *sqlLogReaderState) Next() (Query, bool) { - s.mu.Lock() - defer s.mu.Unlock() - - if s.closed || s.err != nil { - return Query{}, false - } - - var currentQuery Query - newStmt := true - for s.scanner.Scan() { - s.lineNumber++ - line := s.scanner.Text() - line = strings.TrimSpace(line) - - switch { - case len(line) == 0: - continue - case strings.HasPrefix(line, "#"): - continue - case strings.HasPrefix(line, "--"): - newStmt = true - q := Query{Query: line, Line: s.lineNumber} - pq, err := parseQuery(q) - if err != nil { - s.err = err - return Query{}, false - } - if pq == nil { - continue - } - return *pq, true - } - - if newStmt { - currentQuery = Query{Query: line, Line: s.lineNumber} - } else { - currentQuery.Query = fmt.Sprintf("%s\n%s", currentQuery.Query, line) - } - - // Treat new line as a new statement if line ends with ';' - newStmt = strings.HasSuffix(line, ";") - if newStmt { - pq, err := parseQuery(currentQuery) - if err != nil { - s.err = err - return Query{}, false - } - if pq == nil { - continue - } - return *pq, true - } - } - if !newStmt { - s.err = errors.New("EOF: missing semicolon") - } - return Query{}, false -} - -func readData(url string) ([]byte, error) { - client := http.Client{} - res, err := client.Get(url) - if err != nil { - return nil, err - } - if res.StatusCode != http.StatusOK { - return nil, fmt.Errorf("failed to get data from %s, status code %d", url, res.StatusCode) - } - defer res.Body.Close() - return io.ReadAll(res.Body) -} - -func (SQLScriptLoader) Load(filename string) IteratorLoader { - var scanner *bufio.Scanner - var fd *os.File - - if strings.HasPrefix(filename, "http") { - data, err := readData(filename) - if err != nil { - return &errLoader{err: err} - } - scanner = bufio.NewScanner(bytes.NewReader(data)) - } else { - var err error - fd, err = os.OpenFile(filename, os.O_RDONLY, 0) - if err != nil { - return &errLoader{err: err} - } - scanner = bufio.NewScanner(fd) - } - - return &sqlLogReaderState{ - logReaderState: logReaderState{ - scanner: scanner, - fd: fd, - }, - } -} - -// Helper function to parse individual queries -func parseQuery(rs Query) (*Query, error) { - realS := rs.Query - s := rs.Query - q := Query{Line: rs.Line, Type: Unknown} - - if len(s) < 3 { - return nil, nil - } - - switch { - case strings.HasPrefix(s, "#"): - q.Type = Comment - return &q, nil - case strings.HasPrefix(s, "--"): - q.Type = CommentWithCommand - if len(s) > 2 && s[2] == ' ' { - s = s[3:] - } else { - s = s[2:] - } - case s[0] == '\n': - q.Type = EmptyLine - return &q, nil - } - - i := findFirstWord(s) - if i > 0 { - q.FirstWord = s[:i] - } - q.Query = s[i:] - - if q.Type == Unknown || q.Type == CommentWithCommand { - if err := q.getQueryType(realS); err != nil { - return nil, err - } - } - - return &q, nil -} - -// findFirstWord calculates the length of the first word in the string -func findFirstWord(s string) int { - i := 0 - for i < len(s) && s[i] != '(' && s[i] != ' ' && s[i] != ';' && s[i] != '\n' { - i++ - } - return i -} diff --git a/go/data/testdata/slow_query_log b/go/data/testdata/slow_query_log new file mode 100644 index 0000000..2b2132f --- /dev/null +++ b/go/data/testdata/slow_query_log @@ -0,0 +1,69 @@ +/bin/mysqld, Version: 8.0.26 (Source distribution). started with: +Tcp port: 3306 Unix socket: /tmp/mysql.sock +Time Id Command Argument +# Time: 2023-08-01T12:00:01.852235Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000153 Lock_time: 0.000063 Rows_sent: 1 Rows_examined: 1 +use testdb; +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343274; +# Time: 2023-08-01T12:00:01.852861Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 779060 +# Query_time: 0.000043 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0 +SET timestamp=1690891201; +# administrator command: Prepare; +# Time: 2023-08-01T12:00:01.856654Z +# User@Host: admin[admin] @ localhost [] Id: 341291 +# Query_time: 0.005047 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0 +SET timestamp=1690891201; +FLUSH SLOW LOGS; +# Time: 2023-08-01T12:00:01.857984Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000162 Lock_time: 0.000067 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343272; +# Time: 2023-08-01T12:00:01.858711Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780506 +# Query_time: 0.000583 Lock_time: 0.000322 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select s1_0.id, s1_0.code, s1_0.token, s1_0.date from stores s1_0 where s1_0.id=11393; +# Time: 2023-08-01T12:00:01.859281Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000148 Lock_time: 0.000062 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343265; +# Time: 2023-08-01T12:00:01.860595Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000159 Lock_time: 0.000065 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343188; +# Time: 2023-08-01T12:00:01.861900Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000152 Lock_time: 0.000063 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343180; +# Time: 2023-08-01T12:00:01.863201Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000149 Lock_time: 0.000061 Rows_sent: 666 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343011; +# Time: 2023-08-01T12:00:01.864517Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000153 Lock_time: 0.000062 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342469; +# Time: 2023-08-01T12:00:01.865820Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000151 Lock_time: 0.000062 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342465; +# Time: 2023-08-01T12:00:01.867130Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000148 Lock_time: 0.000061 Rows_sent: 1 Rows_examined: 731 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342439; +# Time: 2023-08-01T12:00:01.868511Z +# User@Host: user[user] @ [XXX.XXX.XXX.XXX] Id: 780496 +# Query_time: 0.000163 Lock_time: 0.000067 Rows_sent: 1 Rows_examined: 1 +SET timestamp=1690891201; +select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342389; \ No newline at end of file diff --git a/go/keys/keys.go b/go/keys/keys.go index 3b854b3..31a6d4d 100644 --- a/go/keys/keys.go +++ b/go/keys/keys.go @@ -138,6 +138,10 @@ func (ql *queryList) processQuery(si *schemaInfo, ast sqlparser.Statement, q dat if found { r.UsageCount++ r.LineNumbers = append(r.LineNumbers, q.Line) + r.QueryTime += q.QueryTime + r.LockTime += q.LockTime + r.RowsSent += q.RowsSent + r.RowsExamined += q.RowsExamined return } @@ -160,6 +164,11 @@ func (ql *queryList) processQuery(si *schemaInfo, ast sqlparser.Statement, q dat GroupingColumns: result.GroupingColumns, JoinPredicates: result.JoinPredicates, FilterColumns: result.FilterColumns, + QueryTime: q.QueryTime, + LockTime: q.LockTime, + RowsSent: q.RowsSent, + RowsExamined: q.RowsExamined, + Timestamp: q.Timestamp, } } @@ -223,6 +232,11 @@ type QueryAnalysisResult struct { JoinPredicates []operators.JoinPredicate `json:"joinPredicates,omitempty"` FilterColumns []operators.ColumnUse `json:"filterColumns,omitempty"` StatementType string `json:"statementType"` + QueryTime float64 `json:"queryTime,omitempty"` + LockTime float64 `json:"lockTime,omitempty"` + RowsSent int `json:"rowsSent,omitempty"` + RowsExamined int `json:"rowsExamined,omitempty"` + Timestamp int64 `json:"timestamp,omitempty"` } type QueryFailedResult struct { diff --git a/go/keys/keys_test.go b/go/keys/keys_test.go index c376676..6b616e6 100644 --- a/go/keys/keys_test.go +++ b/go/keys/keys_test.go @@ -34,7 +34,7 @@ func TestKeys(t *testing.T) { { cfg: Config{ FileName: "../../t/tpch_failing_queries.test", - Loader: data.SQLScriptLoader{}, + Loader: data.SlowQueryLogLoader{}, }, expectedFile: "../summarize/testdata/keys-log.json", }, @@ -45,6 +45,13 @@ func TestKeys(t *testing.T) { }, expectedFile: "../summarize/testdata/keys-log-vtgate.json", }, + { + cfg: Config{ + FileName: "../data/testdata/slow_query_log", + Loader: data.SlowQueryLogLoader{}, + }, + expectedFile: "../summarize/testdata/slow-query-log.json", + }, } for _, tcase := range cases { diff --git a/go/summarize/testdata/slow-query-log.json b/go/summarize/testdata/slow-query-log.json new file mode 100644 index 0000000..16e4228 --- /dev/null +++ b/go/summarize/testdata/slow-query-log.json @@ -0,0 +1,76 @@ +{ + "queries": [ + { + "queryStructure": "SELECT `m1_0`.`id`, `m1_0`.`name`, `m1_0`.`value`, `m1_0`.`date` FROM `items` AS `m1_0` WHERE `m1_0`.`id` = :_m1_0_id /* INT64 */", + "usageCount": 10, + "lineNumbers": [ + 9, + 24, + 34, + 39, + 44, + 49, + 54, + 59, + 64, + 69 + ], + "tableNames": [ + "items" + ], + "filterColumns": [ + "items.id =" + ], + "statementType": "SELECT", + "queryTime": 0.001385, + "lockTime": 0.00057, + "rowsSent": 674, + "rowsExamined": 739 + }, + { + "queryStructure": "FLUSH SLOW LOGS", + "usageCount": 1, + "lineNumbers": [ + 19 + ], + "statementType": "FLUSH", + "queryTime": 0.005047, + "timestamp": 1690891201 + }, + { + "queryStructure": "SELECT `s1_0`.`id`, `s1_0`.`code`, `s1_0`.`token`, `s1_0`.`date` FROM `stores` AS `s1_0` WHERE `s1_0`.`id` = :_s1_0_id /* INT64 */", + "usageCount": 1, + "lineNumbers": [ + 29 + ], + "tableNames": [ + "stores" + ], + "filterColumns": [ + "stores.id =" + ], + "statementType": "SELECT", + "queryTime": 0.000583, + "lockTime": 0.000322, + "rowsSent": 1, + "rowsExamined": 1, + "timestamp": 1690891201 + } + ], + "failed": [ + { + "query": "/bin/mysqld, Version: 8.0.26 (Source distribution). started with:\nTcp port: 3306 Unix socket: /tmp/mysql.sock\nTime Id Command Argument\nuse testdb;", + "lineNumbers": [ + 1 + ], + "error": "syntax error at position 2" + }, + { + "query": "SET timestamp=1690891201;", + "lineNumbers": [ + 8 + ], + "error": "syntax error at position 14 near 'timestamp'" + } + ] + } \ No newline at end of file From e53f3e135c3ed11c2ee254413736fb4a5827b9b9 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 15 Nov 2024 09:46:47 +0100 Subject: [PATCH 02/10] refactor: move to budio.Reader instead of Scanner Signed-off-by: Andres Taylor --- go/data/query_log_parse.go | 108 ++++++++++++++++++++----------- go/data/slow_query_log_loader.go | 27 ++++---- go/data/vtgate_log_parse.go | 19 ++++-- 3 files changed, 96 insertions(+), 58 deletions(-) diff --git a/go/data/query_log_parse.go b/go/data/query_log_parse.go index 496510d..cf5c87b 100644 --- a/go/data/query_log_parse.go +++ b/go/data/query_log_parse.go @@ -19,6 +19,8 @@ package data import ( "bufio" "errors" + "fmt" + "io" "os" "regexp" "sync" @@ -29,7 +31,7 @@ type ( logReaderState struct { fd *os.File - scanner *bufio.Scanner + reader *bufio.Reader reg *regexp.Regexp mu sync.Mutex lineNumber int @@ -65,9 +67,18 @@ func (s *mysqlLogReaderState) Next() (Query, bool) { return Query{}, false } - for s.scanner.Scan() { + for { + line, done, err := s.readLine() + if err != nil { + s.err = fmt.Errorf("error reading file: %w", err) + return Query{}, false + } + if done { + break + } + s.lineNumber++ - line := s.scanner.Text() + if len(line) == 0 { continue } @@ -82,20 +93,7 @@ func (s *mysqlLogReaderState) Next() (Query, bool) { // If we have a previous query, return it before processing the new line if s.prevQuery != "" { - query := Query{ - Query: s.prevQuery, - Line: s.queryStart, - Type: QueryT, - } - s.prevQuery = "" - - // If the new line is a query, store it for next iteration - if matches[3] == "Query" { - s.prevQuery = matches[4] - s.queryStart = s.lineNumber - } - - return query, true + return s.processQuery(matches), true } // Start a new query if this line is a query @@ -108,19 +106,62 @@ func (s *mysqlLogReaderState) Next() (Query, bool) { // Return the last query if we have one if s.prevQuery != "" { - query := Query{ - Query: s.prevQuery, - Line: s.queryStart, - Type: QueryT, - } - s.prevQuery = "" - return query, true + return s.finalizeQuery(), true } - s.err = s.scanner.Err() return Query{}, false } +func (s *logReaderState) readLine() (string, bool, error) { + line, isPrefix, err := s.reader.ReadLine() + if err == io.EOF { + return "", true, nil + } + if err != nil { + return "", false, err + } + + // Handle lines longer than the buffer size + totalLine := append([]byte{}, line...) + for isPrefix { + line, isPrefix, err = s.reader.ReadLine() + if err == io.EOF { + break + } + if err != nil { + return "", false, err + } + totalLine = append(totalLine, line...) + } + return string(totalLine), false, nil +} + +func (s *mysqlLogReaderState) finalizeQuery() Query { + query := Query{ + Query: s.prevQuery, + Line: s.queryStart, + Type: QueryT, + } + s.prevQuery = "" + return query +} + +func (s *mysqlLogReaderState) processQuery(matches []string) Query { + query := Query{ + Query: s.prevQuery, + Line: s.queryStart, + Type: QueryT, + } + s.prevQuery = "" + + // If the new line is a query, store it for next iteration + if matches[3] == "Query" { + s.prevQuery = matches[4] + s.queryStart = s.lineNumber + } + return query +} + func (s *logReaderState) Close() error { s.mu.Lock() defer s.mu.Unlock() @@ -136,15 +177,6 @@ func (s *logReaderState) Close() error { return s.err } -func (s *logReaderState) NextLine() (string, bool) { - more := s.scanner.Scan() - if !more { - return "", false - } - - return s.scanner.Text(), true -} - func (MySQLLogLoader) Load(fileName string) IteratorLoader { reg := regexp.MustCompile(`^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z)\s+(\d+)\s+(\w+)\s+(.*)`) @@ -153,13 +185,11 @@ func (MySQLLogLoader) Load(fileName string) IteratorLoader { return &errLoader{err} } - scanner := bufio.NewScanner(fd) - return &mysqlLogReaderState{ logReaderState: logReaderState{ - scanner: scanner, - reg: reg, - fd: fd, + reader: bufio.NewReader(fd), + reg: reg, + fd: fd, }, } } diff --git a/go/data/slow_query_log_loader.go b/go/data/slow_query_log_loader.go index d268789..d302cda 100644 --- a/go/data/slow_query_log_loader.go +++ b/go/data/slow_query_log_loader.go @@ -52,9 +52,17 @@ func (s *slowQueryLogReaderState) Next() (Query, bool) { newStmt: true, } - for s.scanner.Scan() { + for { + line, done, err := s.readLine() + if err != nil { + s.err = fmt.Errorf("error reading file: %w", err) + return Query{}, false + } + if done { + break + } s.lineNumber++ - line := strings.TrimSpace(s.scanner.Text()) + line = strings.TrimSpace(line) result, done, err := s.processLine(line, state) if err != nil { @@ -66,11 +74,6 @@ func (s *slowQueryLogReaderState) Next() (Query, bool) { } } - if err := s.scanner.Err(); err != nil { - s.err = err - return Query{}, false - } - if !state.newStmt && state.currentQuery.Query != "" { s.err = errors.New("EOF: missing semicolon") } @@ -245,7 +248,7 @@ func readData(url string) ([]byte, error) { } func (SlowQueryLogLoader) Load(filename string) IteratorLoader { - var scanner *bufio.Scanner + var reader *bufio.Reader var fd *os.File if strings.HasPrefix(filename, "http") { @@ -253,20 +256,20 @@ func (SlowQueryLogLoader) Load(filename string) IteratorLoader { if err != nil { return &errLoader{err: err} } - scanner = bufio.NewScanner(bytes.NewReader(data)) + reader = bufio.NewReader(bytes.NewReader(data)) } else { var err error fd, err = os.OpenFile(filename, os.O_RDONLY, 0) if err != nil { return &errLoader{err: err} } - scanner = bufio.NewScanner(fd) + reader = bufio.NewReader(fd) } return &slowQueryLogReaderState{ logReaderState: logReaderState{ - scanner: scanner, - fd: fd, + fd: fd, + reader: reader, }, } } diff --git a/go/data/vtgate_log_parse.go b/go/data/vtgate_log_parse.go index 889ec50..76644a5 100644 --- a/go/data/vtgate_log_parse.go +++ b/go/data/vtgate_log_parse.go @@ -52,13 +52,11 @@ func (vll VtGateLogLoader) Load(fileName string) IteratorLoader { return &errLoader{err: err} } - scanner := bufio.NewScanner(fd) - return &vtgateLogReaderState{ logReaderState: logReaderState{ - scanner: scanner, - reg: reg, - fd: fd, + reader: bufio.NewReader(fd), + reg: reg, + fd: fd, }, NeedsBindVars: vll.NeedsBindVars, } @@ -76,9 +74,16 @@ func (s *vtgateLogReaderState) Next() (Query, bool) { return Query{}, false } - for s.scanner.Scan() { + for { + line, done, err := s.readLine() + if err != nil { + s.fail(fmt.Errorf("error reading file: %w", err)) + return Query{}, false + } + if done { + break + } s.lineNumber++ - line := s.scanner.Text() if len(line) == 0 { continue From 5de0332cbc02585c059d8061ae5dca52736d9778 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 18 Nov 2024 07:02:00 +0100 Subject: [PATCH 03/10] feat: remove printing of failed queries Signed-off-by: Andres Taylor --- go/data/query_log_parse.go | 3 +-- go/data/slow_query_log_loader.go | 1 - go/data/vtgate_log_parse.go | 1 - go/keys/keys.go | 10 +++++----- go/summarize/summarize-keys.go | 6 ++---- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/go/data/query_log_parse.go b/go/data/query_log_parse.go index cf5c87b..6042687 100644 --- a/go/data/query_log_parse.go +++ b/go/data/query_log_parse.go @@ -77,8 +77,6 @@ func (s *mysqlLogReaderState) Next() (Query, bool) { break } - s.lineNumber++ - if len(line) == 0 { continue } @@ -113,6 +111,7 @@ func (s *mysqlLogReaderState) Next() (Query, bool) { } func (s *logReaderState) readLine() (string, bool, error) { + s.lineNumber++ line, isPrefix, err := s.reader.ReadLine() if err == io.EOF { return "", true, nil diff --git a/go/data/slow_query_log_loader.go b/go/data/slow_query_log_loader.go index d302cda..4e533c5 100644 --- a/go/data/slow_query_log_loader.go +++ b/go/data/slow_query_log_loader.go @@ -61,7 +61,6 @@ func (s *slowQueryLogReaderState) Next() (Query, bool) { if done { break } - s.lineNumber++ line = strings.TrimSpace(line) result, done, err := s.processLine(line, state) diff --git a/go/data/vtgate_log_parse.go b/go/data/vtgate_log_parse.go index 76644a5..efd354d 100644 --- a/go/data/vtgate_log_parse.go +++ b/go/data/vtgate_log_parse.go @@ -83,7 +83,6 @@ func (s *vtgateLogReaderState) Next() (Query, bool) { if done { break } - s.lineNumber++ if len(line) == 0 { continue diff --git a/go/keys/keys.go b/go/keys/keys.go index 31a6d4d..1b4d5b2 100644 --- a/go/keys/keys.go +++ b/go/keys/keys.go @@ -18,6 +18,7 @@ package keys import ( "encoding/json" + "errors" "fmt" "io" "os" @@ -74,11 +75,10 @@ func run(out io.Writer, cfg Config) error { } } - if err := loader.Close(); err != nil { - return err - } + closeErr := loader.Close() + jsonWriteErr := ql.writeJSONTo(out) - return ql.writeJSONTo(out) + return errors.Join(closeErr, jsonWriteErr) } func process(q data.Query, si *schemaInfo, ql *queryList) { @@ -240,7 +240,7 @@ type QueryAnalysisResult struct { } type QueryFailedResult struct { - Query string `json:"query"` + Query string LineNumbers []int `json:"lineNumbers"` Error string `json:"error"` } diff --git a/go/summarize/summarize-keys.go b/go/summarize/summarize-keys.go index ba5631b..5afee0f 100644 --- a/go/summarize/summarize-keys.go +++ b/go/summarize/summarize-keys.go @@ -55,7 +55,6 @@ type ( } FailuresSummary struct { - Query string Error string Count int } @@ -299,10 +298,10 @@ func renderFailures(md *markdown.MarkDown, failures []FailuresSummary) { } md.PrintHeader("Failures", 2) - headers := []string{"Query", "Error", "Count"} + headers := []string{"Error", "Count"} var rows [][]string for _, failure := range failures { - rows = append(rows, []string{failure.Query, failure.Error, strconv.Itoa(failure.Count)}) + rows = append(rows, []string{failure.Error, strconv.Itoa(failure.Count)}) } md.PrintTable(headers, rows) } @@ -369,7 +368,6 @@ func summarizeKeysQueries(queries *keys.Output) ([]TableSummary, []FailuresSumma var failures []FailuresSummary for _, query := range queries.Failed { failures = append(failures, FailuresSummary{ - Query: query.Query, Error: query.Error, Count: len(query.LineNumbers), }) From bc0e08a32684c32dd6e0dbb8a0d3aaf4704eb78f Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 18 Nov 2024 07:56:09 +0100 Subject: [PATCH 04/10] refactor: collect all types in one block Signed-off-by: Andres Taylor --- go/keys/keys.go | 78 ++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/go/keys/keys.go b/go/keys/keys.go index 1b4d5b2..6cb55df 100644 --- a/go/keys/keys.go +++ b/go/keys/keys.go @@ -33,10 +33,44 @@ import ( "github.com/vitessio/vt/go/data" ) -type Config struct { - FileName string - Loader data.Loader -} +type ( + Config struct { + FileName string + Loader data.Loader + } + // Output represents the output generated by 'vt keys' + Output struct { + Queries []QueryAnalysisResult `json:"queries"` + Failed []QueryFailedResult `json:"failed,omitempty"` + } + queryList struct { + queries map[string]*QueryAnalysisResult + failed map[string]*QueryFailedResult + } + // QueryAnalysisResult represents the result of analyzing a query in a query log. It contains the query structure, the number of + // times the query was used, the line numbers where the query was used, the table name, grouping columns, join columns, + // filter columns, and the statement type. + QueryAnalysisResult struct { + QueryStructure string `json:"queryStructure"` + UsageCount int `json:"usageCount"` + LineNumbers []int `json:"lineNumbers"` + TableNames []string `json:"tableNames,omitempty"` + GroupingColumns []operators.Column `json:"groupingColumns,omitempty"` + JoinPredicates []operators.JoinPredicate `json:"joinPredicates,omitempty"` + FilterColumns []operators.ColumnUse `json:"filterColumns,omitempty"` + StatementType string `json:"statementType"` + QueryTime float64 `json:"queryTime,omitempty"` + LockTime float64 `json:"lockTime,omitempty"` + RowsSent int `json:"rowsSent,omitempty"` + RowsExamined int `json:"rowsExamined,omitempty"` + Timestamp int64 `json:"timestamp,omitempty"` + } + QueryFailedResult struct { + Query string + LineNumbers []int `json:"lineNumbers"` + Error string `json:"error"` + } +) func Run(cfg Config) error { return run(os.Stdout, cfg) @@ -96,17 +130,6 @@ func process(q data.Query, si *schemaInfo, ql *queryList) { } } -// Output represents the output generated by 'vt keys' -type Output struct { - Queries []QueryAnalysisResult `json:"queries"` - Failed []QueryFailedResult `json:"failed,omitempty"` -} - -type queryList struct { - queries map[string]*QueryAnalysisResult - failed map[string]*QueryFailedResult -} - func (ql *queryList) processQuery(si *schemaInfo, ast sqlparser.Statement, q data.Query, bv sqlparser.BindVars) { // handle panics defer func() { @@ -219,28 +242,3 @@ func (ql *queryList) writeJSONTo(w io.Writer) error { return err } - -// QueryAnalysisResult represents the result of analyzing a query in a query log. It contains the query structure, the number of -// times the query was used, the line numbers where the query was used, the table name, grouping columns, join columns, -// filter columns, and the statement type. -type QueryAnalysisResult struct { - QueryStructure string `json:"queryStructure"` - UsageCount int `json:"usageCount"` - LineNumbers []int `json:"lineNumbers"` - TableNames []string `json:"tableNames,omitempty"` - GroupingColumns []operators.Column `json:"groupingColumns,omitempty"` - JoinPredicates []operators.JoinPredicate `json:"joinPredicates,omitempty"` - FilterColumns []operators.ColumnUse `json:"filterColumns,omitempty"` - StatementType string `json:"statementType"` - QueryTime float64 `json:"queryTime,omitempty"` - LockTime float64 `json:"lockTime,omitempty"` - RowsSent int `json:"rowsSent,omitempty"` - RowsExamined int `json:"rowsExamined,omitempty"` - Timestamp int64 `json:"timestamp,omitempty"` -} - -type QueryFailedResult struct { - Query string - LineNumbers []int `json:"lineNumbers"` - Error string `json:"error"` -} From 591e697cb66cc305797abf27e7630575a7dbb2c7 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 18 Nov 2024 09:13:06 +0100 Subject: [PATCH 05/10] test: add bigger slow query log reading test Signed-off-by: Andres Taylor --- go/data/testdata/bigger_slow_query_log.log | 219 +++++++++++++ go/keys/keys.go | 2 +- go/keys/keys_test.go | 13 +- go/keys/testdata/bigger_slow_query_log.json | 336 ++++++++++++++++++++ 4 files changed, 568 insertions(+), 2 deletions(-) create mode 100644 go/data/testdata/bigger_slow_query_log.log create mode 100644 go/keys/testdata/bigger_slow_query_log.json diff --git a/go/data/testdata/bigger_slow_query_log.log b/go/data/testdata/bigger_slow_query_log.log new file mode 100644 index 0000000..983b9f5 --- /dev/null +++ b/go/data/testdata/bigger_slow_query_log.log @@ -0,0 +1,219 @@ +/bin/mysqld, Version: 8.0.26 (Source distribution). started with: +Tcp port: 3306 Unix socket: /tmp/mysql.sock +Time Id Command Argument +# Time: 2023-08-01T12:00:01.000123Z +# User@Host: user1[user1] @ [192.168.1.101] Id: 800001 +# Query_time: 0.150623 Lock_time: 0.002000 Rows_sent: 5 Rows_examined: 2000 +use testdb; +SET timestamp=1690891201; +SELECT p.name, AVG(r.rating) as avg_rating FROM products p JOIN reviews r ON p.id = r.product_id GROUP BY p.id ORDER BY avg_rating DESC LIMIT 5; +# Time: 2023-08-01T12:00:02.000456Z +# User@Host: admin[admin] @ localhost [] Id: 800002 +# Query_time: 0.200012 Lock_time: 0.003000 Rows_sent: 10 Rows_examined: 5000 +SET timestamp=1690891202; +SELECT u.username, SUM(o.total_amount) as total_spent FROM users u JOIN orders o ON u.id = o.user_id WHERE o.created_at BETWEEN '2023-07-01' AND '2023-07-31' GROUP BY u.id HAVING total_spent > 500; +# Time: 2023-08-01T12:00:03.000789Z +# User@Host: user2[user2] @ [192.168.1.102] Id: 800003 +# Query_time: 0.180234 Lock_time: 0.001500 Rows_sent: 20 Rows_examined: 10000 +SET timestamp=1690891203; +SELECT c.name, COUNT(o.id) as order_count +FROM categories c +JOIN products p ON c.id = p.category_id +JOIN order_items oi ON p.id = oi.product_id +JOIN orders o ON oi.order_id = o.id +GROUP BY c.id; +# Time: 2023-08-01T12:00:04.001012Z +# User@Host: user3[user3] @ [192.168.1.103] Id: 800004 +# Query_time: 0.130456 Lock_time: 0.002300 Rows_sent: 15 Rows_examined: 8000 +SET timestamp=1690891204; +SELECT p.name, i.stock_level +FROM products p +JOIN inventory i ON p.id = i.product_id +WHERE i.stock_level < 5; +# Time: 2023-08-01T12:00:05.001345Z +# User@Host: user4[user4] @ [192.168.1.104] Id: 800005 +# Query_time: 0.210678 Lock_time: 0.004000 Rows_sent: 25 Rows_examined: 15000 +SET timestamp=1690891205; +SELECT c.name, SUM(oi.price * oi.quantity) as total_sales +FROM categories c +JOIN products p ON c.id = p.category_id +JOIN order_items oi ON p.id = oi.product_id +GROUP BY c.id +ORDER BY total_sales DESC +LIMIT 3; +# Time: 2023-08-01T12:00:06.001678Z +# User@Host: user5[user5] @ [192.168.1.105] Id: 800006 +# Query_time: 0.140789 Lock_time: 0.001800 Rows_sent: 1 Rows_examined: 4000 +SET timestamp=1690891206; +SELECT o.id, o.created_at +FROM orders o +LEFT JOIN shipments s ON o.id = s.order_id +WHERE s.shipped_date IS NULL AND o.created_at < DATE_SUB(NOW(), INTERVAL 7 DAY); +# Time: 2023-08-01T12:00:07.002001Z +# User@Host: user1[user1] @ [192.168.1.101] Id: 800001 +# Query_time: 0.160123 Lock_time: 0.002100 Rows_sent: 3 Rows_examined: 2500 +SET timestamp=1690891207; +SELECT p.payment_method, AVG(o.total_amount) as avg_order_value +FROM payments p +JOIN orders o ON p.order_id = o.id +GROUP BY p.payment_method; +# Time: 2023-08-01T12:00:08.002334Z +# User@Host: user2[user2] @ [192.168.1.102] Id: 800003 +# Query_time: 0.190456 Lock_time: 0.001600 Rows_sent: 50 Rows_examined: 12000 +SET timestamp=1690891208; +SELECT DATE(o.created_at) as order_date, COUNT(*) as order_count +FROM orders o +WHERE o.created_at >= DATE_SUB(NOW(), INTERVAL 7 DAY) +GROUP BY DATE(o.created_at); +# Time: 2023-08-01T12:00:09.002667Z +# User@Host: user3[user3] @ [192.168.1.103] Id: 800004 +# Query_time: 0.220789 Lock_time: 0.003300 Rows_sent: 30 Rows_examined: 18000 +SET timestamp=1690891209; +SELECT m.sender_id, COUNT(DISTINCT m.receiver_id) as unique_receivers +FROM messages m +GROUP BY m.sender_id +HAVING unique_receivers > 10; +# Time: 2023-08-01T12:00:10.003000Z +# User@Host: user4[user4] @ [192.168.1.104] Id: 800005 +# Query_time: 0.250012 Lock_time: 0.005000 Rows_sent: 2 Rows_examined: 6000 +SET timestamp=1690891210; +SELECT u.id, u.username +FROM users u +LEFT JOIN orders o ON u.id = o.user_id +WHERE o.id IS NULL; +# Time: 2023-08-01T12:00:11.003333Z +# User@Host: user5[user5] @ [192.168.1.105] Id: 800006 +# Query_time: 0.170345 Lock_time: 0.001700 Rows_sent: 100 Rows_examined: 5000 +SET timestamp=1690891211; +SELECT u.username, SUM(o.total_amount) as total_spent +FROM users u +JOIN orders o ON u.id = o.user_id +WHERE o.created_at BETWEEN '2023-06-01' AND '2023-06-30' +GROUP BY u.id +HAVING total_spent > 2000; +# Time: 2023-08-01T12:00:12.003666Z +# User@Host: user1[user1] @ [192.168.1.101] Id: 800001 +# Query_time: 0.210456 Lock_time: 0.002500 Rows_sent: 7 Rows_examined: 3000 +SET timestamp=1690891212; +SELECT p.name, AVG(r.rating) as avg_rating +FROM products p +JOIN reviews r ON p.id = r.product_id +GROUP BY p.id +ORDER BY avg_rating DESC +LIMIT 10; +# Time: 2023-08-01T12:00:13.003999Z +# User@Host: user2[user2] @ [192.168.1.102] Id: 800003 +# Query_time: 0.180789 Lock_time: 0.002200 Rows_sent: 12 Rows_examined: 7000 +SET timestamp=1690891213; +SELECT p.name, i.stock_level +FROM products p +JOIN inventory i ON p.id = i.product_id +WHERE i.stock_level < 15; +# Time: 2023-08-01T12:00:14.004332Z +# User@Host: user3[user3] @ [192.168.1.103] Id: 800004 +# Query_time: 0.200123 Lock_time: 0.001900 Rows_sent: 1 Rows_examined: 4500 +SET timestamp=1690891214; +SELECT o.id, o.created_at +FROM orders o +LEFT JOIN shipments s ON o.id = s.order_id +WHERE s.shipped_date IS NULL AND o.created_at < DATE_SUB(NOW(), INTERVAL 14 DAY); +# Time: 2023-08-01T12:00:15.004665Z +# User@Host: user4[user4] @ [192.168.1.104] Id: 800005 +# Query_time: 0.240456 Lock_time: 0.003500 Rows_sent: 5 Rows_examined: 10000 +SET timestamp=1690891215; +SELECT u.id, u.username +FROM users u +LEFT JOIN orders o ON u.id = o.user_id +WHERE o.id IS NULL; +# Time: 2023-08-01T12:00:16.004998Z +# User@Host: user5[user5] @ [192.168.1.105] Id: 800006 +# Query_time: 0.190789 Lock_time: 0.002600 Rows_sent: 8 Rows_examined: 6000 +SET timestamp=1690891216; +SELECT c.name, COUNT(o.id) as order_count +FROM categories c +JOIN products p ON c.id = p.category_id +JOIN order_items oi ON p.id = oi.product_id +JOIN orders o ON oi.order_id = o.id +GROUP BY c.id; +# Time: 2023-08-01T12:00:17.005331Z +# User@Host: user1[user1] @ [192.168.1.101] Id: 800001 +# Query_time: 0.170123 Lock_time: 0.001800 Rows_sent: 4 Rows_examined: 3500 +SET timestamp=1690891217; +SELECT p.payment_method, AVG(o.total_amount) as avg_order_value +FROM payments p +JOIN orders o ON p.order_id = o.id +GROUP BY p.payment_method; +# Time: 2023-08-01T12:00:18.005664Z +# User@Host: user2[user2] @ [192.168.1.102] Id: 800003 +# Query_time: 0.160456 Lock_time: 0.001400 Rows_sent: 6 Rows_examined: 3000 +SET timestamp=1690891218; +SELECT m.sender_id, COUNT(DISTINCT m.receiver_id) as unique_receivers +FROM messages m +GROUP BY m.sender_id +HAVING unique_receivers > 5; +# Time: 2023-08-01T12:00:19.005997Z +# User@Host: user3[user3] @ [192.168.1.103] Id: 800004 +# Query_time: 0.210789 Lock_time: 0.003000 Rows_sent: 9 Rows_examined: 7000 +SET timestamp=1690891219; +SELECT u.username, SUM(o.total_amount) as total_spent +FROM users u +JOIN orders o ON u.id = o.user_id +WHERE o.created_at BETWEEN '2023-07-01' AND '2023-07-31' +GROUP BY u.id +HAVING total_spent > 1500; +# Time: 2023-08-01T12:00:20.006330Z +# User@Host: user4[user4] @ [192.168.1.104] Id: 800005 +# Query_time: 0.180456 Lock_time: 0.002300 Rows_sent: 3 Rows_examined: 4000 +SET timestamp=1690891220; +SELECT DATE(o.created_at) as order_date, COUNT(*) as order_count +FROM orders o +WHERE o.created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY) +GROUP BY DATE(o.created_at); +# Time: 2023-08-01T12:00:21.006663Z +# User@Host: user5[user5] @ [192.168.1.105] Id: 800006 +# Query_time: 0.220123 Lock_time: 0.003800 Rows_sent: 2 Rows_examined: 8000 +SET timestamp=1690891221; +SELECT u.id, u.username +FROM users u +JOIN orders o ON u.id = o.user_id +JOIN reviews r ON u.id = r.user_id +WHERE o.created_at >= DATE_SUB(NOW(), INTERVAL 1 MONTH) +AND r.created_at >= DATE_SUB(NOW(), INTERVAL 1 MONTH); +# Time: 2023-08-01T12:00:22.006996Z +# User@Host: user1[user1] @ [192.168.1.101] Id: 800001 +# Query_time: 0.190789 Lock_time: 0.002100 Rows_sent: 6 Rows_examined: 5000 +SET timestamp=1690891222; +SELECT c.name, SUM(oi.price * oi.quantity) as total_sales +FROM categories c +JOIN products p ON c.id = p.category_id +JOIN order_items oi ON p.id = oi.product_id +GROUP BY c.id +ORDER BY total_sales DESC +LIMIT 5; +# Time: 2023-08-01T12:00:23.007329Z +# User@Host: user2[user2] @ [192.168.1.102] Id: 800003 +# Query_time: 0.160456 Lock_time: 0.001500 Rows_sent: 10 Rows_examined: 3500 +SET timestamp=1690891223; +SELECT p.name, AVG(r.rating) as avg_rating +FROM products p +JOIN reviews r ON p.id = r.product_id +WHERE r.created_at >= DATE_SUB(NOW(), INTERVAL 1 WEEK) +GROUP BY p.id +ORDER BY avg_rating DESC +LIMIT 5; +# Time: 2023-08-01T12:00:24.007662Z +# User@Host: user3[user3] @ [192.168.1.103] Id: 800004 +# Query_time: 0.200123 Lock_time: 0.002700 Rows_sent: 15 Rows_examined: 6500 +SET timestamp=1690891224; +SELECT p.name, i.stock_level +FROM products p +JOIN inventory i ON p.id = i.product_id +WHERE i.stock_level BETWEEN 5 AND 15; +# Time: 2023-08-01T12:00:25.007995Z +# User@Host: user4[user4] @ [192.168.1.104] Id: 800005 +# Query_time: 0.230789 Lock_time: 0.003100 Rows_sent: 20 Rows_examined: 9000 +SET timestamp=1690891225; +SELECT m.sender_id, COUNT(DISTINCT m.receiver_id) as unique_receivers +FROM messages m +GROUP BY m.sender_id +HAVING unique_receivers > 15; \ No newline at end of file diff --git a/go/keys/keys.go b/go/keys/keys.go index 6cb55df..bc5b995 100644 --- a/go/keys/keys.go +++ b/go/keys/keys.go @@ -66,7 +66,7 @@ type ( Timestamp int64 `json:"timestamp,omitempty"` } QueryFailedResult struct { - Query string + Query string `json:"query"` LineNumbers []int `json:"lineNumbers"` Error string `json:"error"` } diff --git a/go/keys/keys_test.go b/go/keys/keys_test.go index 6b616e6..bd47506 100644 --- a/go/keys/keys_test.go +++ b/go/keys/keys_test.go @@ -21,6 +21,7 @@ import ( "strings" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/vitessio/vt/go/data" @@ -52,6 +53,13 @@ func TestKeys(t *testing.T) { }, expectedFile: "../summarize/testdata/slow-query-log.json", }, + { + cfg: Config{ + FileName: "../data/testdata/bigger_slow_query_log.log", + Loader: data.SlowQueryLogLoader{}, + }, + expectedFile: "./testdata/bigger_slow_query_log.json", + }, } for _, tcase := range cases { @@ -63,7 +71,10 @@ func TestKeys(t *testing.T) { out, err := os.ReadFile(tcase.expectedFile) require.NoError(t, err) - require.Equal(t, string(out), sb.String()) + assert.Equal(t, string(out), sb.String()) + if t.Failed() { + _ = os.WriteFile(tcase.expectedFile+".correct", []byte(sb.String()), 0o644) + } }) } } diff --git a/go/keys/testdata/bigger_slow_query_log.json b/go/keys/testdata/bigger_slow_query_log.json new file mode 100644 index 0000000..3ae6e26 --- /dev/null +++ b/go/keys/testdata/bigger_slow_query_log.json @@ -0,0 +1,336 @@ +{ + "queries": [ + { + "queryStructure": "SELECT `p`.`name`, avg(`r`.`rating`) AS `avg_rating` FROM `products` AS `p` JOIN `reviews` AS `r` ON `p`.`id` = `r`.`product_id` GROUP BY `p`.`id` ORDER BY avg(`r`.`rating`) DESC LIMIT :1 /* INT64 */", + "usageCount": 2, + "lineNumbers": [ + 9, + 98 + ], + "tableNames": [ + "products", + "reviews" + ], + "groupingColumns": [ + "products.id" + ], + "joinPredicates": [ + "products.id = reviews.product_id" + ], + "statementType": "SELECT", + "queryTime": 0.210456, + "lockTime": 0.0025, + "rowsSent": 7, + "rowsExamined": 3000 + }, + { + "queryStructure": "SELECT `u`.`username`, sum(`o`.`total_amount`) AS `total_spent` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`created_at` BETWEEN :1 /* VARCHAR */ AND :2 /* VARCHAR */ GROUP BY `u`.`id` HAVING sum(`o`.`total_amount`) \u003e :_total_spent /* INT64 */", + "usageCount": 3, + "lineNumbers": [ + 14, + 88, + 158 + ], + "tableNames": [ + "users", + "orders" + ], + "groupingColumns": [ + "users.id" + ], + "joinPredicates": [ + "orders.user_id = users.id" + ], + "filterColumns": [ + "orders.created_at ge", + "orders.created_at le" + ], + "statementType": "SELECT", + "queryTime": 0.5811459999999999, + "lockTime": 0.0077, + "rowsSent": 119, + "rowsExamined": 17000, + "timestamp": 1690891202 + }, + { + "queryStructure": "SELECT `c`.`name`, COUNT(`o`.`id`) AS `order_count` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` JOIN `orders` AS `o` ON `oi`.`order_id` = `o`.`id` GROUP BY `c`.`id`", + "usageCount": 2, + "lineNumbers": [ + 19, + 132 + ], + "tableNames": [ + "categories", + "products", + "order_items", + "orders" + ], + "groupingColumns": [ + "categories.id" + ], + "joinPredicates": [ + "categories.id = products.category_id", + "order_items.order_id = orders.id", + "order_items.product_id = products.id" + ], + "statementType": "SELECT", + "queryTime": 0.371023, + "lockTime": 0.0040999999999999995, + "rowsSent": 28, + "rowsExamined": 16000, + "timestamp": 1690891203 + }, + { + "queryStructure": "SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` \u003c :_i_stock_level /* INT64 */", + "usageCount": 2, + "lineNumbers": [ + 29, + 108 + ], + "tableNames": [ + "products", + "inventory" + ], + "joinPredicates": [ + "inventory.product_id = products.id" + ], + "filterColumns": [ + "inventory.stock_level lt" + ], + "statementType": "SELECT", + "queryTime": 0.311245, + "lockTime": 0.0045000000000000005, + "rowsSent": 27, + "rowsExamined": 15000, + "timestamp": 1690891204 + }, + { + "queryStructure": "SELECT `c`.`name`, sum(`oi`.`price` * `oi`.`quantity`) AS `total_sales` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` GROUP BY `c`.`id` ORDER BY sum(`oi`.`price` * `oi`.`quantity`) DESC LIMIT :1 /* INT64 */", + "usageCount": 2, + "lineNumbers": [ + 37, + 186 + ], + "tableNames": [ + "categories", + "products", + "order_items" + ], + "groupingColumns": [ + "categories.id" + ], + "joinPredicates": [ + "categories.id = products.category_id", + "order_items.product_id = products.id" + ], + "statementType": "SELECT", + "queryTime": 0.401467, + "lockTime": 0.0060999999999999995, + "rowsSent": 31, + "rowsExamined": 20000, + "timestamp": 1690891205 + }, + { + "queryStructure": "SELECT `o`.`id`, `o`.`created_at` FROM `orders` AS `o` LEFT JOIN `shipments` AS `s` ON `o`.`id` = `s`.`order_id` WHERE `s`.`shipped_date` IS NULL AND `o`.`created_at` \u003c DATE_SUB(now(), INTERVAL :1 /* INT64 */ day)", + "usageCount": 2, + "lineNumbers": [ + 48, + 116 + ], + "tableNames": [ + "orders", + "shipments" + ], + "joinPredicates": [ + "orders.id = shipments.order_id" + ], + "filterColumns": [ + "orders.created_at lt" + ], + "statementType": "SELECT", + "queryTime": 0.340912, + "lockTime": 0.0037, + "rowsSent": 2, + "rowsExamined": 8500, + "timestamp": 1690891206 + }, + { + "queryStructure": "SELECT `p`.`payment_method`, avg(`o`.`total_amount`) AS `avg_order_value` FROM `payments` AS `p` JOIN `orders` AS `o` ON `p`.`order_id` = `o`.`id` GROUP BY `p`.`payment_method`", + "usageCount": 2, + "lineNumbers": [ + 56, + 142 + ], + "tableNames": [ + "payments", + "orders" + ], + "groupingColumns": [ + "payments.payment_method" + ], + "joinPredicates": [ + "orders.id = payments.order_id" + ], + "statementType": "SELECT", + "queryTime": 0.330246, + "lockTime": 0.0039, + "rowsSent": 7, + "rowsExamined": 6000, + "timestamp": 1690891207 + }, + { + "queryStructure": "SELECT DATE(`o`.`created_at`) AS `order_date`, count(*) AS `order_count` FROM `orders` AS `o` WHERE `o`.`created_at` \u003e= DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) GROUP BY DATE(`o`.`created_at`)", + "usageCount": 2, + "lineNumbers": [ + 64, + 168 + ], + "tableNames": [ + "orders" + ], + "filterColumns": [ + "orders.created_at ge" + ], + "statementType": "SELECT", + "queryTime": 0.370912, + "lockTime": 0.0039, + "rowsSent": 53, + "rowsExamined": 16000, + "timestamp": 1690891208 + }, + { + "queryStructure": "SELECT `m`.`sender_id`, COUNT(DISTINCT `m`.`receiver_id`) AS `unique_receivers` FROM `messages` AS `m` GROUP BY `m`.`sender_id` HAVING COUNT(DISTINCT `m`.`receiver_id`) \u003e :_unique_receivers /* INT64 */", + "usageCount": 3, + "lineNumbers": [ + 72, + 150, + 216 + ], + "tableNames": [ + "messages" + ], + "groupingColumns": [ + "messages.sender_id" + ], + "statementType": "SELECT", + "queryTime": 0.612034, + "lockTime": 0.0078, + "rowsSent": 56, + "rowsExamined": 30000, + "timestamp": 1690891209 + }, + { + "queryStructure": "SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` LEFT JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`id` IS NULL", + "usageCount": 2, + "lineNumbers": [ + 80, + 124 + ], + "tableNames": [ + "users", + "orders" + ], + "joinPredicates": [ + "orders.user_id = users.id" + ], + "statementType": "SELECT", + "queryTime": 0.490468, + "lockTime": 0.0085, + "rowsSent": 7, + "rowsExamined": 16000, + "timestamp": 1690891210 + }, + { + "queryStructure": "SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` JOIN `reviews` AS `r` ON `u`.`id` = `r`.`user_id` WHERE `o`.`created_at` \u003e= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) AND `r`.`created_at` \u003e= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month)", + "usageCount": 1, + "lineNumbers": [ + 176 + ], + "tableNames": [ + "users", + "orders", + "reviews" + ], + "joinPredicates": [ + "orders.user_id = users.id", + "reviews.user_id = users.id" + ], + "filterColumns": [ + "orders.created_at ge", + "reviews.created_at ge" + ], + "statementType": "SELECT", + "queryTime": 0.220123, + "lockTime": 0.0038, + "rowsSent": 2, + "rowsExamined": 8000, + "timestamp": 1690891221 + }, + { + "queryStructure": "SELECT `p`.`name`, avg(`r`.`rating`) AS `avg_rating` FROM `products` AS `p` JOIN `reviews` AS `r` ON `p`.`id` = `r`.`product_id` WHERE `r`.`created_at` \u003e= DATE_SUB(now(), INTERVAL :1 /* INT64 */ week) GROUP BY `p`.`id` ORDER BY avg(`r`.`rating`) DESC LIMIT :2 /* INT64 */", + "usageCount": 1, + "lineNumbers": [ + 197 + ], + "tableNames": [ + "products", + "reviews" + ], + "groupingColumns": [ + "products.id" + ], + "joinPredicates": [ + "products.id = reviews.product_id" + ], + "filterColumns": [ + "reviews.created_at ge" + ], + "statementType": "SELECT", + "queryTime": 0.160456, + "lockTime": 0.0015, + "rowsSent": 10, + "rowsExamined": 3500, + "timestamp": 1690891223 + }, + { + "queryStructure": "SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` BETWEEN :1 /* INT64 */ AND :2 /* INT64 */", + "usageCount": 1, + "lineNumbers": [ + 208 + ], + "tableNames": [ + "products", + "inventory" + ], + "joinPredicates": [ + "inventory.product_id = products.id" + ], + "filterColumns": [ + "inventory.stock_level ge", + "inventory.stock_level le" + ], + "statementType": "SELECT", + "queryTime": 0.200123, + "lockTime": 0.0027, + "rowsSent": 15, + "rowsExamined": 6500, + "timestamp": 1690891224 + } + ], + "failed": [ + { + "query": "/bin/mysqld, Version: 8.0.26 (Source distribution). started with:\nTcp port: 3306 Unix socket: /tmp/mysql.sock\nTime Id Command Argument\nuse testdb;", + "lineNumbers": [ + 1 + ], + "error": "syntax error at position 2" + }, + { + "query": "SET timestamp=1690891201;", + "lineNumbers": [ + 8 + ], + "error": "syntax error at position 14 near 'timestamp'" + } + ] + } \ No newline at end of file From c0ab4d9fedc5535dec76e02bb5658ab851ee3740 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 18 Nov 2024 13:40:18 +0100 Subject: [PATCH 06/10] refactor: introduce summary holder Signed-off-by: Andres Taylor --- go/summarize/summarize-keys.go | 15 ++++++++++----- go/summarize/testdata/keys-summary.md | 6 +++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/go/summarize/summarize-keys.go b/go/summarize/summarize-keys.go index 5afee0f..eb042d8 100644 --- a/go/summarize/summarize-keys.go +++ b/go/summarize/summarize-keys.go @@ -45,6 +45,11 @@ type ( Pos Position } + Summary struct { + tables []TableSummary + failures []FailuresSummary + } + TableSummary struct { Table string ReadQueryCount int @@ -153,11 +158,11 @@ func printKeysSummary(out io.Writer, file readingSummary, now time.Time) { ` md.Printf(msg, now.Format(time.DateTime), file.Name) - tableSummaries, failuresSummaries := summarizeKeysQueries(file.AnalysedQueries) + summary := summarizeKeysQueries(file.AnalysedQueries) - renderTableUsage(tableSummaries, md) + renderTableUsage(summary.tables, md) renderTablesJoined(md, file.AnalysedQueries) - renderFailures(md, failuresSummaries) + renderFailures(md, summary.failures) _, err := md.WriteTo(out) if err != nil { @@ -315,7 +320,7 @@ func makeKey(lhs, rhs operators.Column) graphKey { return graphKey{rhs.Table, lhs.Table} } -func summarizeKeysQueries(queries *keys.Output) ([]TableSummary, []FailuresSummary) { +func summarizeKeysQueries(queries *keys.Output) Summary { tableSummaries := make(map[string]*TableSummary) tableUsageWriteCounts := make(map[string]int) tableUsageReadCounts := make(map[string]int) @@ -373,7 +378,7 @@ func summarizeKeysQueries(queries *keys.Output) ([]TableSummary, []FailuresSumma }) } - return result, failures + return Summary{tables: result, failures: failures} } func summarizeColumnUsage(tableSummary *TableSummary, query keys.QueryAnalysisResult) { diff --git a/go/summarize/testdata/keys-summary.md b/go/summarize/testdata/keys-summary.md index 64fd2f6..6ce1434 100644 --- a/go/summarize/testdata/keys-summary.md +++ b/go/summarize/testdata/keys-summary.md @@ -148,7 +148,7 @@ partsupp ↔ supplier (Occurrences: 1) ``` ## Failures -|Query|Error|Count| -|---|---|---| -|I am a failing query;|syntax error at position 2 near 'I'|2| +|Error|Count| +|---|---| +|syntax error at position 2 near 'I'|2| From 42ce9cc31d4d2f124f931fbb25b822ba626c0ffb Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 18 Nov 2024 14:25:02 +0100 Subject: [PATCH 07/10] test: add test for summarizing logs with query timing Signed-off-by: Andres Taylor --- go/summarize/summarize-keys.go | 63 ++++++++---- go/summarize/summarize-keys_test.go | 24 +++-- go/summarize/testdata/bigger_slow_log.md | 117 +++++++++++++++++++++++ 3 files changed, 179 insertions(+), 25 deletions(-) create mode 100644 go/summarize/testdata/bigger_slow_log.md diff --git a/go/summarize/summarize-keys.go b/go/summarize/summarize-keys.go index eb042d8..b47afcc 100644 --- a/go/summarize/summarize-keys.go +++ b/go/summarize/summarize-keys.go @@ -34,12 +34,16 @@ import ( "github.com/vitessio/vt/go/markdown" ) +const HotQueryCount = 10 + type ( - Position int + Position int + ColumnUsage struct { Percentage float64 Count int } + ColumnInformation struct { Name string Pos Position @@ -324,27 +328,12 @@ func summarizeKeysQueries(queries *keys.Output) Summary { tableSummaries := make(map[string]*TableSummary) tableUsageWriteCounts := make(map[string]int) tableUsageReadCounts := make(map[string]int) + var hotQueries []keys.QueryAnalysisResult // First pass: collect all data and count occurrences for _, query := range queries.Queries { - for _, table := range query.TableNames { - if _, exists := tableSummaries[table]; !exists { - tableSummaries[table] = &TableSummary{ - Table: table, - Columns: make(map[ColumnInformation]ColumnUsage), - } - } - - switch query.StatementType { - case "INSERT", "DELETE", "UPDATE", "REPLACE": - tableUsageWriteCounts[table] += query.UsageCount - default: - tableUsageReadCounts[table] += query.UsageCount - } - - summarizeColumnUsage(tableSummaries[table], query) - summarizeJoinPredicates(query.JoinPredicates, table, tableSummaries) - } + gatherTableInfo(query, tableSummaries, tableUsageWriteCounts, tableUsageReadCounts) + checkQueryForHotness(&hotQueries, query) } // Second pass: calculate percentages @@ -381,6 +370,42 @@ func summarizeKeysQueries(queries *keys.Output) Summary { return Summary{tables: result, failures: failures} } +func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.QueryAnalysisResult) { + // todo: we should be able to choose different metrics for hotness - e.g. total time spent on query, number of rows examined, etc. + switch { + case len(*hotQueries) < HotQueryCount: + // If we have not yet reached the limit, add the query + *hotQueries = append(*hotQueries, query) + case query.UsageCount > (*hotQueries)[0].UsageCount: + // If the current query has more usage than the least used hot query, replace it + (*hotQueries)[0] = query + sort.Slice(*hotQueries, func(i, j int) bool { + return (*hotQueries)[i].UsageCount > (*hotQueries)[j].UsageCount + }) + } +} + +func gatherTableInfo(query keys.QueryAnalysisResult, tableSummaries map[string]*TableSummary, tableUsageWriteCounts map[string]int, tableUsageReadCounts map[string]int) { + for _, table := range query.TableNames { + if _, exists := tableSummaries[table]; !exists { + tableSummaries[table] = &TableSummary{ + Table: table, + Columns: make(map[ColumnInformation]ColumnUsage), + } + } + + switch query.StatementType { + case "INSERT", "DELETE", "UPDATE", "REPLACE": + tableUsageWriteCounts[table] += query.UsageCount + default: + tableUsageReadCounts[table] += query.UsageCount + } + + summarizeColumnUsage(tableSummaries[table], query) + summarizeJoinPredicates(query.JoinPredicates, table, tableSummaries) + } +} + func summarizeColumnUsage(tableSummary *TableSummary, query keys.QueryAnalysisResult) { updateColumnUsage := func(columns []ColumnInformation) { sort.Slice(columns, func(i, j int) bool { diff --git a/go/summarize/summarize-keys_test.go b/go/summarize/summarize-keys_test.go index 55f91da..e51d5aa 100644 --- a/go/summarize/summarize-keys_test.go +++ b/go/summarize/summarize-keys_test.go @@ -66,10 +66,22 @@ func TestTableSummary(t *testing.T) { } func TestSummarizeKeysFile(t *testing.T) { - file := readTraceFile("testdata/keys-log.json") - sb := &strings.Builder{} - printKeysSummary(sb, file, time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC)) - expected, err := os.ReadFile("testdata/keys-summary.md") - require.NoError(t, err) - require.Equal(t, string(expected), sb.String()) + tests := []struct { + inputFile string + expectedFile string + }{ + {"testdata/keys-log.json", "testdata/keys-summary.md"}, + {"../keys/testdata/bigger_slow_query_log.json", "testdata/bigger_slow_log.md"}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("input: %s, expected: %s", tt.inputFile, tt.expectedFile), func(t *testing.T) { + file := readTraceFile(tt.inputFile) + sb := &strings.Builder{} + printKeysSummary(sb, file, time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC)) + expected, err := os.ReadFile(tt.expectedFile) + require.NoError(t, err) + require.Equal(t, string(expected), sb.String()) + }) + } } diff --git a/go/summarize/testdata/bigger_slow_log.md b/go/summarize/testdata/bigger_slow_log.md new file mode 100644 index 0000000..1e80d4b --- /dev/null +++ b/go/summarize/testdata/bigger_slow_log.md @@ -0,0 +1,117 @@ +# Query Analysis Report + +**Date of Analysis**: 2024-01-01 01:02:03 +**Analyzed File**: `../keys/testdata/bigger_slow_query_log.json` + +## Tables +|Table Name|Reads|Writes| +|---|---|---| +|orders|14|0| +|products|10|0| +|users|6|0| +|categories|4|0| +|order_items|4|0| +|reviews|4|0| +|inventory|3|0| +|messages|3|0| +|payments|2|0| +|shipments|2|0| + +### Column Usage +#### Table: `orders` (14 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|created_at|WHERE RANGE|57%| +|id|JOIN|43%| +|user_id|JOIN|43%| + +#### Table: `products` (10 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|30%| +|category_id|JOIN|40%| + +#### Table: `users` (6 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|50%| + +#### Table: `categories` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|100%| + +#### Table: `order_items` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|order_id|JOIN|50%| + +#### Table: `reviews` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|75%| +|created_at|WHERE RANGE|50%| +|user_id|JOIN|25%| + +#### Table: `inventory` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|stock_level|WHERE RANGE|100%| + +#### Table: `messages` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|sender_id|GROUP|100%| + +#### Table: `payments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| +|payment_method|GROUP|100%| + +#### Table: `shipments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| + +## Tables Joined +``` +orders ↔ users (Occurrences: 3) +└─ orders.user_id = users.id + +categories ↔ products (Occurrences: 2) +└─ categories.id = products.category_id + +inventory ↔ products (Occurrences: 2) +└─ inventory.product_id = products.id + +order_items ↔ products (Occurrences: 2) +└─ order_items.product_id = products.id + +products ↔ reviews (Occurrences: 2) +└─ products.id = reviews.product_id + +order_items ↔ orders (Occurrences: 1) +└─ order_items.order_id = orders.id + +orders ↔ payments (Occurrences: 1) +└─ orders.id = payments.order_id + +orders ↔ shipments (Occurrences: 1) +└─ orders.id = shipments.order_id + +reviews ↔ users (Occurrences: 1) +└─ reviews.user_id = users.id + +``` +## Failures +|Error|Count| +|---|---| +|syntax error at position 2|1| +|syntax error at position 14 near 'timestamp'|1| + From dcea201d208eacca6fbc2bf3f408f0ca7e2bb03e Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 18 Nov 2024 14:37:45 +0100 Subject: [PATCH 08/10] test: collect all testdata into a single directory Signed-off-by: Andres Taylor --- go/data/query_log_parse_test.go | 4 ++-- go/data/slow_query_log_loader_test.go | 2 +- go/data/vtgate_log_parse_test.go | 10 +++++----- go/keys/keys_test.go | 14 +++++++------- go/summarize/reading.go | 13 +++++++------ go/summarize/summarize-keys_test.go | 9 +++++---- go/summarize/summarize.go | 6 +++++- go/summarize/summarize_test.go | 4 +++- go/{summarize => }/testdata/bigger_slow_log.md | 2 +- go/{keys => }/testdata/bigger_slow_query_log.json | 0 go/{data => }/testdata/bigger_slow_query_log.log | 0 go/{summarize => }/testdata/keys-log-vtgate.json | 0 go/{summarize => }/testdata/keys-log.json | 0 go/{summarize => }/testdata/keys-summary.md | 2 +- go/{data => }/testdata/mysql.query.log | 0 go/{data => }/testdata/mysql.small-query.log | 0 go/{summarize => }/testdata/slow-query-log.json | 0 go/{data => }/testdata/slow_query_log | 0 go/{summarize => }/testdata/trace-log.json | 0 go/{data => }/testdata/vtgate.query.log | 0 .../testdata/vtgate.query.log.parsed.bv.txt | 0 go/{data => }/testdata/vtgate.query.log.parsed.txt | 0 go/{data => }/testdata/vtgate.query.log.redacted | 0 23 files changed, 37 insertions(+), 29 deletions(-) rename go/{summarize => }/testdata/bigger_slow_log.md (97%) rename go/{keys => }/testdata/bigger_slow_query_log.json (100%) rename go/{data => }/testdata/bigger_slow_query_log.log (100%) rename go/{summarize => }/testdata/keys-log-vtgate.json (100%) rename go/{summarize => }/testdata/keys-log.json (100%) rename go/{summarize => }/testdata/keys-summary.md (98%) rename go/{data => }/testdata/mysql.query.log (100%) rename go/{data => }/testdata/mysql.small-query.log (100%) rename go/{summarize => }/testdata/slow-query-log.json (100%) rename go/{data => }/testdata/slow_query_log (100%) rename go/{summarize => }/testdata/trace-log.json (100%) rename go/{data => }/testdata/vtgate.query.log (100%) rename go/{data => }/testdata/vtgate.query.log.parsed.bv.txt (100%) rename go/{data => }/testdata/vtgate.query.log.parsed.txt (100%) rename go/{data => }/testdata/vtgate.query.log.redacted (100%) diff --git a/go/data/query_log_parse_test.go b/go/data/query_log_parse_test.go index d873dc2..d0eaf5d 100644 --- a/go/data/query_log_parse_test.go +++ b/go/data/query_log_parse_test.go @@ -23,14 +23,14 @@ import ( ) func TestParseMySQLQueryLog(t *testing.T) { - loader := MySQLLogLoader{}.Load("./testdata/mysql.query.log") + loader := MySQLLogLoader{}.Load("../testdata/mysql.query.log") gotQueries, err := makeSlice(loader) require.NoError(t, err) require.Equal(t, 1517, len(gotQueries), "expected 1517 queries") //nolint:testifylint // too many elements for the output to be readable } func TestSmallSnippet(t *testing.T) { - loader := MySQLLogLoader{}.Load("./testdata/mysql.small-query.log") + loader := MySQLLogLoader{}.Load("../testdata/mysql.small-query.log") gotQueries, err := makeSlice(loader) require.NoError(t, err) expected := []Query{ diff --git a/go/data/slow_query_log_loader_test.go b/go/data/slow_query_log_loader_test.go index 1b774d2..32de1b0 100644 --- a/go/data/slow_query_log_loader_test.go +++ b/go/data/slow_query_log_loader_test.go @@ -23,7 +23,7 @@ import ( ) func TestLoadSlowQueryLogWithMetadata(t *testing.T) { - loader := SlowQueryLogLoader{}.Load("./testdata/slow_query_log") + loader := SlowQueryLogLoader{}.Load("../testdata/slow_query_log") queries, err := makeSlice(loader) require.NoError(t, err) diff --git a/go/data/vtgate_log_parse_test.go b/go/data/vtgate_log_parse_test.go index a58ea03..6d4e2a6 100644 --- a/go/data/vtgate_log_parse_test.go +++ b/go/data/vtgate_log_parse_test.go @@ -25,19 +25,19 @@ import ( ) func TestParseVtGateQueryLogRedacted(t *testing.T) { - loader := VtGateLogLoader{NeedsBindVars: true}.Load("./testdata/vtgate.query.log.redacted") + loader := VtGateLogLoader{NeedsBindVars: true}.Load("../testdata/vtgate.query.log.redacted") _, err := makeSlice(loader) require.EqualError(t, err, "line 1: query has redacted bind variables, cannot parse them") } func TestParseVtGateQueryLog(t *testing.T) { - loader := VtGateLogLoader{NeedsBindVars: true}.Load("./testdata/vtgate.query.log") + loader := VtGateLogLoader{NeedsBindVars: true}.Load("../testdata/vtgate.query.log") gotQueries, err := makeSlice(loader) require.NoError(t, err) require.Len(t, gotQueries, 25) - expect, err := os.ReadFile("./testdata/vtgate.query.log.parsed.bv.txt") + expect, err := os.ReadFile("../testdata/vtgate.query.log.parsed.bv.txt") require.NoError(t, err) var got []string @@ -49,12 +49,12 @@ func TestParseVtGateQueryLog(t *testing.T) { } func TestParseVtGateQueryLogNoBindVars(t *testing.T) { - loader := VtGateLogLoader{NeedsBindVars: false}.Load("./testdata/vtgate.query.log") + loader := VtGateLogLoader{NeedsBindVars: false}.Load("../testdata/vtgate.query.log") gotQueries, err := makeSlice(loader) require.NoError(t, err) require.Len(t, gotQueries, 25) - expect, err := os.ReadFile("./testdata/vtgate.query.log.parsed.txt") + expect, err := os.ReadFile("../testdata/vtgate.query.log.parsed.txt") require.NoError(t, err) var got []string diff --git a/go/keys/keys_test.go b/go/keys/keys_test.go index bd47506..f588634 100644 --- a/go/keys/keys_test.go +++ b/go/keys/keys_test.go @@ -37,28 +37,28 @@ func TestKeys(t *testing.T) { FileName: "../../t/tpch_failing_queries.test", Loader: data.SlowQueryLogLoader{}, }, - expectedFile: "../summarize/testdata/keys-log.json", + expectedFile: "../testdata/keys-log.json", }, { cfg: Config{ - FileName: "../data/testdata/vtgate.query.log", + FileName: "../testdata/vtgate.query.log", Loader: data.VtGateLogLoader{NeedsBindVars: false}, }, - expectedFile: "../summarize/testdata/keys-log-vtgate.json", + expectedFile: "../testdata/keys-log-vtgate.json", }, { cfg: Config{ - FileName: "../data/testdata/slow_query_log", + FileName: "../testdata/slow_query_log", Loader: data.SlowQueryLogLoader{}, }, - expectedFile: "../summarize/testdata/slow-query-log.json", + expectedFile: "../testdata/slow-query-log.json", }, { cfg: Config{ - FileName: "../data/testdata/bigger_slow_query_log.log", + FileName: "../testdata/bigger_slow_query_log.log", Loader: data.SlowQueryLogLoader{}, }, - expectedFile: "./testdata/bigger_slow_query_log.json", + expectedFile: "../testdata/bigger_slow_query_log.json", }, } diff --git a/go/summarize/reading.go b/go/summarize/reading.go index 09b91b8..b8fb0f0 100644 --- a/go/summarize/reading.go +++ b/go/summarize/reading.go @@ -18,6 +18,8 @@ package summarize import ( "encoding/json" + "errors" + "fmt" "io" "os" "sort" @@ -26,11 +28,11 @@ import ( "github.com/vitessio/vt/go/keys" ) -func readTraceFile(fileName string) readingSummary { +func readTraceFile(fileName string) (readingSummary, error) { // Open the JSON file file, err := os.Open(fileName) if err != nil { - exit("Error opening file: " + err.Error()) + return readingSummary{}, fmt.Errorf("error opening file: %w", err) } defer file.Close() @@ -39,13 +41,12 @@ func readTraceFile(fileName string) readingSummary { // Determine the type based on the first delimiter of the JSON file switch val { case json.Delim('['): - return readTracedQueryFile(decoder, fileName) + return readTracedQueryFile(decoder, fileName), nil case json.Delim('{'): - return readAnalysedQueryFile(decoder, fileName) + return readAnalysedQueryFile(decoder, fileName), nil } - exit("Unknown file format") - panic("unreachable") + return readingSummary{}, errors.New("unknown file format") } func getDecoderAndDelim(file *os.File) (*json.Decoder, json.Delim) { diff --git a/go/summarize/summarize-keys_test.go b/go/summarize/summarize-keys_test.go index e51d5aa..c993086 100644 --- a/go/summarize/summarize-keys_test.go +++ b/go/summarize/summarize-keys_test.go @@ -70,16 +70,17 @@ func TestSummarizeKeysFile(t *testing.T) { inputFile string expectedFile string }{ - {"testdata/keys-log.json", "testdata/keys-summary.md"}, - {"../keys/testdata/bigger_slow_query_log.json", "testdata/bigger_slow_log.md"}, + {"keys-log.json", "keys-summary.md"}, + {"bigger_slow_query_log.json", "bigger_slow_log.md"}, } for _, tt := range tests { t.Run(fmt.Sprintf("input: %s, expected: %s", tt.inputFile, tt.expectedFile), func(t *testing.T) { - file := readTraceFile(tt.inputFile) + file, err := readTraceFile("../testdata/" + tt.inputFile) + require.NoError(t, err) sb := &strings.Builder{} printKeysSummary(sb, file, time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC)) - expected, err := os.ReadFile(tt.expectedFile) + expected, err := os.ReadFile("../testdata/" + tt.expectedFile) require.NoError(t, err) require.Equal(t, string(expected), sb.String()) }) diff --git a/go/summarize/summarize.go b/go/summarize/summarize.go index f328cd2..dbc939f 100644 --- a/go/summarize/summarize.go +++ b/go/summarize/summarize.go @@ -41,8 +41,12 @@ type ( func Run(args []string) { traces := make([]readingSummary, len(args)) + var err error for i, arg := range args { - traces[i] = readTraceFile(arg) + traces[i], err = readTraceFile(arg) + if err != nil { + exit(err.Error()) + } } firstTrace := traces[0] diff --git a/go/summarize/summarize_test.go b/go/summarize/summarize_test.go index c3c23da..4b03b6b 100644 --- a/go/summarize/summarize_test.go +++ b/go/summarize/summarize_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func tf1() readingSummary { @@ -183,7 +184,8 @@ Summary: } func TestSummarizeTraceFile(t *testing.T) { - file := readTraceFile("testdata/trace-log.json") + file, err := readTraceFile("testdata/trace-log.json") + require.NoError(t, err) sb := &strings.Builder{} printTraceSummary(sb, 80, noHighlight, file) expected := `Query: INSERT INTO region (R_REGIONKEY, R_NAME, R_COMMENT) VALUES (1, 'ASIA',... diff --git a/go/summarize/testdata/bigger_slow_log.md b/go/testdata/bigger_slow_log.md similarity index 97% rename from go/summarize/testdata/bigger_slow_log.md rename to go/testdata/bigger_slow_log.md index 1e80d4b..15f1336 100644 --- a/go/summarize/testdata/bigger_slow_log.md +++ b/go/testdata/bigger_slow_log.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../keys/testdata/bigger_slow_query_log.json` +**Analyzed File**: `../testdata/bigger_slow_query_log.json` ## Tables |Table Name|Reads|Writes| diff --git a/go/keys/testdata/bigger_slow_query_log.json b/go/testdata/bigger_slow_query_log.json similarity index 100% rename from go/keys/testdata/bigger_slow_query_log.json rename to go/testdata/bigger_slow_query_log.json diff --git a/go/data/testdata/bigger_slow_query_log.log b/go/testdata/bigger_slow_query_log.log similarity index 100% rename from go/data/testdata/bigger_slow_query_log.log rename to go/testdata/bigger_slow_query_log.log diff --git a/go/summarize/testdata/keys-log-vtgate.json b/go/testdata/keys-log-vtgate.json similarity index 100% rename from go/summarize/testdata/keys-log-vtgate.json rename to go/testdata/keys-log-vtgate.json diff --git a/go/summarize/testdata/keys-log.json b/go/testdata/keys-log.json similarity index 100% rename from go/summarize/testdata/keys-log.json rename to go/testdata/keys-log.json diff --git a/go/summarize/testdata/keys-summary.md b/go/testdata/keys-summary.md similarity index 98% rename from go/summarize/testdata/keys-summary.md rename to go/testdata/keys-summary.md index 6ce1434..303f10d 100644 --- a/go/summarize/testdata/keys-summary.md +++ b/go/testdata/keys-summary.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `testdata/keys-log.json` +**Analyzed File**: `../testdata/keys-log.json` ## Tables |Table Name|Reads|Writes| diff --git a/go/data/testdata/mysql.query.log b/go/testdata/mysql.query.log similarity index 100% rename from go/data/testdata/mysql.query.log rename to go/testdata/mysql.query.log diff --git a/go/data/testdata/mysql.small-query.log b/go/testdata/mysql.small-query.log similarity index 100% rename from go/data/testdata/mysql.small-query.log rename to go/testdata/mysql.small-query.log diff --git a/go/summarize/testdata/slow-query-log.json b/go/testdata/slow-query-log.json similarity index 100% rename from go/summarize/testdata/slow-query-log.json rename to go/testdata/slow-query-log.json diff --git a/go/data/testdata/slow_query_log b/go/testdata/slow_query_log similarity index 100% rename from go/data/testdata/slow_query_log rename to go/testdata/slow_query_log diff --git a/go/summarize/testdata/trace-log.json b/go/testdata/trace-log.json similarity index 100% rename from go/summarize/testdata/trace-log.json rename to go/testdata/trace-log.json diff --git a/go/data/testdata/vtgate.query.log b/go/testdata/vtgate.query.log similarity index 100% rename from go/data/testdata/vtgate.query.log rename to go/testdata/vtgate.query.log diff --git a/go/data/testdata/vtgate.query.log.parsed.bv.txt b/go/testdata/vtgate.query.log.parsed.bv.txt similarity index 100% rename from go/data/testdata/vtgate.query.log.parsed.bv.txt rename to go/testdata/vtgate.query.log.parsed.bv.txt diff --git a/go/data/testdata/vtgate.query.log.parsed.txt b/go/testdata/vtgate.query.log.parsed.txt similarity index 100% rename from go/data/testdata/vtgate.query.log.parsed.txt rename to go/testdata/vtgate.query.log.parsed.txt diff --git a/go/data/testdata/vtgate.query.log.redacted b/go/testdata/vtgate.query.log.redacted similarity index 100% rename from go/data/testdata/vtgate.query.log.redacted rename to go/testdata/vtgate.query.log.redacted From 2021a65f4924bd63353e3ec4fdb5483b3bb23386 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Tue, 19 Nov 2024 07:33:29 +0100 Subject: [PATCH 09/10] feat: list hot queries when query timing is available Signed-off-by: Andres Taylor --- go/summarize/summarize-keys.go | 73 ++++++++++++++++++++++++++--- go/summarize/summarize-keys_test.go | 6 ++- go/summarize/summarize_test.go | 2 +- go/testdata/bigger_slow_log.md | 65 +++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 9 deletions(-) diff --git a/go/summarize/summarize-keys.go b/go/summarize/summarize-keys.go index b47afcc..9e4fcbf 100644 --- a/go/summarize/summarize-keys.go +++ b/go/summarize/summarize-keys.go @@ -50,8 +50,9 @@ type ( } Summary struct { - tables []TableSummary - failures []FailuresSummary + tables []TableSummary + failures []FailuresSummary + hotQueries []keys.QueryAnalysisResult } TableSummary struct { @@ -164,6 +165,7 @@ func printKeysSummary(out io.Writer, file readingSummary, now time.Time) { summary := summarizeKeysQueries(file.AnalysedQueries) + renderHotQueries(md, summary.hotQueries) renderTableUsage(summary.tables, md) renderTablesJoined(md, file.AnalysedQueries) renderFailures(md, summary.failures) @@ -174,6 +176,57 @@ func printKeysSummary(out io.Writer, file readingSummary, now time.Time) { } } +func renderHotQueries(md *markdown.MarkDown, queries []keys.QueryAnalysisResult) { + if len(queries) == 0 { + return + } + + hasTime := false + // Sort the queries in descending order of total query time + sort.Slice(queries, func(i, j int) bool { + if queries[i].QueryTime != 0 { + hasTime = true + } + return queries[i].QueryTime > queries[j].QueryTime + }) + + if !hasTime { + return + } + + md.PrintHeader("Top Queries", 2) + + // Prepare table headers and rows + headers := []string{"Query ID", "Usage Count", "Total Query Time (ms)", "Avg Query Time (ms)", "Total Rows Examined"} + var rows [][]string + + for i, query := range queries { + queryID := fmt.Sprintf("Q%d", i+1) + avgQueryTime := query.QueryTime / float64(query.UsageCount) + rows = append(rows, []string{ + queryID, + strconv.Itoa(query.UsageCount), + fmt.Sprintf("%.2f", query.QueryTime), + fmt.Sprintf("%.2f", avgQueryTime), + strconv.Itoa(query.RowsExamined), + }) + } + + // Print the table + md.PrintTable(headers, rows) + + // After the table, list the full queries with their IDs + md.PrintHeader("Query Details", 3) + for i, query := range queries { + queryID := fmt.Sprintf("Q%d", i+1) + md.PrintHeader(queryID, 4) + md.Println("```sql") + md.Println(query.QueryStructure) + md.Println("```") + md.NewLine() + } +} + func renderTableUsage(tableSummaries []TableSummary, md *markdown.MarkDown) { if len(tableSummaries) == 0 { return @@ -367,7 +420,7 @@ func summarizeKeysQueries(queries *keys.Output) Summary { }) } - return Summary{tables: result, failures: failures} + return Summary{tables: result, failures: failures, hotQueries: hotQueries} } func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.QueryAnalysisResult) { @@ -376,13 +429,19 @@ func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.Que case len(*hotQueries) < HotQueryCount: // If we have not yet reached the limit, add the query *hotQueries = append(*hotQueries, query) - case query.UsageCount > (*hotQueries)[0].UsageCount: + case query.QueryTime > (*hotQueries)[0].QueryTime: // If the current query has more usage than the least used hot query, replace it (*hotQueries)[0] = query - sort.Slice(*hotQueries, func(i, j int) bool { - return (*hotQueries)[i].UsageCount > (*hotQueries)[j].UsageCount - }) + default: + // If the current query is not hot enough, just return + return } + + // Sort the hot queries by query time so that the least used query is always at the front + sort.Slice(*hotQueries, + func(i, j int) bool { + return (*hotQueries)[i].QueryTime < (*hotQueries)[j].QueryTime + }) } func gatherTableInfo(query keys.QueryAnalysisResult, tableSummaries map[string]*TableSummary, tableUsageWriteCounts map[string]int, tableUsageReadCounts map[string]int) { diff --git a/go/summarize/summarize-keys_test.go b/go/summarize/summarize-keys_test.go index c993086..cf89e2f 100644 --- a/go/summarize/summarize-keys_test.go +++ b/go/summarize/summarize-keys_test.go @@ -23,6 +23,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -82,7 +83,10 @@ func TestSummarizeKeysFile(t *testing.T) { printKeysSummary(sb, file, time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC)) expected, err := os.ReadFile("../testdata/" + tt.expectedFile) require.NoError(t, err) - require.Equal(t, string(expected), sb.String()) + assert.Equal(t, string(expected), sb.String()) + if t.Failed() { + _ = os.WriteFile("../testdata/"+tt.expectedFile+".correct", []byte(sb.String()), 0o644) + } }) } } diff --git a/go/summarize/summarize_test.go b/go/summarize/summarize_test.go index 4b03b6b..7cc0306 100644 --- a/go/summarize/summarize_test.go +++ b/go/summarize/summarize_test.go @@ -184,7 +184,7 @@ Summary: } func TestSummarizeTraceFile(t *testing.T) { - file, err := readTraceFile("testdata/trace-log.json") + file, err := readTraceFile("../testdata/trace-log.json") require.NoError(t, err) sb := &strings.Builder{} printTraceSummary(sb, 80, noHighlight, file) diff --git a/go/testdata/bigger_slow_log.md b/go/testdata/bigger_slow_log.md index 15f1336..ca8b4c4 100644 --- a/go/testdata/bigger_slow_log.md +++ b/go/testdata/bigger_slow_log.md @@ -3,6 +3,71 @@ **Date of Analysis**: 2024-01-01 01:02:03 **Analyzed File**: `../testdata/bigger_slow_query_log.json` +## Top Queries +|Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| +|---|---|---|---|---| +|Q1|3|0.61|0.20|30000| +|Q2|3|0.58|0.19|17000| +|Q3|2|0.49|0.25|16000| +|Q4|2|0.40|0.20|20000| +|Q5|2|0.37|0.19|16000| +|Q6|2|0.37|0.19|16000| +|Q7|2|0.34|0.17|8500| +|Q8|2|0.33|0.17|6000| +|Q9|2|0.31|0.16|15000| +|Q10|1|0.22|0.22|8000| + +### Query Details +#### Q1 +```sql +SELECT `m`.`sender_id`, COUNT(DISTINCT `m`.`receiver_id`) AS `unique_receivers` FROM `messages` AS `m` GROUP BY `m`.`sender_id` HAVING COUNT(DISTINCT `m`.`receiver_id`) > :_unique_receivers /* INT64 */ +``` + +#### Q2 +```sql +SELECT `u`.`username`, sum(`o`.`total_amount`) AS `total_spent` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`created_at` BETWEEN :1 /* VARCHAR */ AND :2 /* VARCHAR */ GROUP BY `u`.`id` HAVING sum(`o`.`total_amount`) > :_total_spent /* INT64 */ +``` + +#### Q3 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` LEFT JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`id` IS NULL +``` + +#### Q4 +```sql +SELECT `c`.`name`, sum(`oi`.`price` * `oi`.`quantity`) AS `total_sales` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` GROUP BY `c`.`id` ORDER BY sum(`oi`.`price` * `oi`.`quantity`) DESC LIMIT :1 /* INT64 */ +``` + +#### Q5 +```sql +SELECT `c`.`name`, COUNT(`o`.`id`) AS `order_count` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` JOIN `orders` AS `o` ON `oi`.`order_id` = `o`.`id` GROUP BY `c`.`id` +``` + +#### Q6 +```sql +SELECT DATE(`o`.`created_at`) AS `order_date`, count(*) AS `order_count` FROM `orders` AS `o` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) GROUP BY DATE(`o`.`created_at`) +``` + +#### Q7 +```sql +SELECT `o`.`id`, `o`.`created_at` FROM `orders` AS `o` LEFT JOIN `shipments` AS `s` ON `o`.`id` = `s`.`order_id` WHERE `s`.`shipped_date` IS NULL AND `o`.`created_at` < DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) +``` + +#### Q8 +```sql +SELECT `p`.`payment_method`, avg(`o`.`total_amount`) AS `avg_order_value` FROM `payments` AS `p` JOIN `orders` AS `o` ON `p`.`order_id` = `o`.`id` GROUP BY `p`.`payment_method` +``` + +#### Q9 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` < :_i_stock_level /* INT64 */ +``` + +#### Q10 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` JOIN `reviews` AS `r` ON `u`.`id` = `r`.`user_id` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) AND `r`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) +``` + ## Tables |Table Name|Reads|Writes| |---|---|---| From fc9bb649cbfcad86f7e3f3433797354343f93b94 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Tue, 19 Nov 2024 08:53:59 +0100 Subject: [PATCH 10/10] feat: make it possible to choose which metric to sort hotness by Signed-off-by: Andres Taylor --- go/cmd/summarize.go | 10 +- go/summarize/summarize-keys.go | 54 ++++-- go/summarize/summarize-keys_test.go | 40 ++-- go/summarize/summarize.go | 25 ++- .../bigger_slow_log_avg-rows-examined.md | 182 ++++++++++++++++++ go/testdata/bigger_slow_log_avg-time.md | 182 ++++++++++++++++++ .../bigger_slow_log_total-rows-examined.md | 182 ++++++++++++++++++ ...w_log.md => bigger_slow_log_total-time.md} | 0 go/testdata/bigger_slow_log_usage-count.md | 182 ++++++++++++++++++ 9 files changed, 821 insertions(+), 36 deletions(-) create mode 100644 go/testdata/bigger_slow_log_avg-rows-examined.md create mode 100644 go/testdata/bigger_slow_log_avg-time.md create mode 100644 go/testdata/bigger_slow_log_total-rows-examined.md rename go/testdata/{bigger_slow_log.md => bigger_slow_log_total-time.md} (100%) create mode 100644 go/testdata/bigger_slow_log_usage-count.md diff --git a/go/cmd/summarize.go b/go/cmd/summarize.go index 92f0bc7..24690d5 100644 --- a/go/cmd/summarize.go +++ b/go/cmd/summarize.go @@ -23,14 +23,20 @@ import ( ) func summarizeCmd() *cobra.Command { - return &cobra.Command{ + var hotMetric string + + cmd := &cobra.Command{ Use: "summarize old_file.json [new_file.json]", Aliases: []string{"benchstat"}, Short: "Compares and analyses a trace output", Example: "vt summarize old.json new.json", Args: cobra.RangeArgs(1, 2), Run: func(_ *cobra.Command, args []string) { - summarize.Run(args) + summarize.Run(args, hotMetric) }, } + + cmd.Flags().StringVar(&hotMetric, "hot-metric", "total-time", "Metric to determine hot queries (options: usage-count, total-rows-examined, avg-rows-examined, avg-time, total-time)") + + return cmd } diff --git a/go/summarize/summarize-keys.go b/go/summarize/summarize-keys.go index 9e4fcbf..d8ac285 100644 --- a/go/summarize/summarize-keys.go +++ b/go/summarize/summarize-keys.go @@ -152,7 +152,7 @@ func (ts TableSummary) UseCount() int { // printKeysSummary goes over all the analysed queries, gathers information about column usage per table, // and prints this summary information to the output. -func printKeysSummary(out io.Writer, file readingSummary, now time.Time) { +func printKeysSummary(out io.Writer, file readingSummary, now time.Time, hotMetric string) { md := &markdown.MarkDown{} msg := `# Query Analysis Report @@ -162,10 +162,10 @@ func printKeysSummary(out io.Writer, file readingSummary, now time.Time) { ` md.Printf(msg, now.Format(time.DateTime), file.Name) + metricReader := getMetricForHotness(hotMetric) + summary := summarizeKeysQueries(file.AnalysedQueries, metricReader) - summary := summarizeKeysQueries(file.AnalysedQueries) - - renderHotQueries(md, summary.hotQueries) + renderHotQueries(md, summary.hotQueries, metricReader) renderTableUsage(summary.tables, md) renderTablesJoined(md, file.AnalysedQueries) renderFailures(md, summary.failures) @@ -176,18 +176,48 @@ func printKeysSummary(out io.Writer, file readingSummary, now time.Time) { } } -func renderHotQueries(md *markdown.MarkDown, queries []keys.QueryAnalysisResult) { +type getMetric = func(q keys.QueryAnalysisResult) float64 + +func getMetricForHotness(metric string) getMetric { + switch metric { + case "usage-count": + return func(q keys.QueryAnalysisResult) float64 { + return float64(q.UsageCount) + } + case "total-rows-examined": + return func(q keys.QueryAnalysisResult) float64 { + return float64(q.RowsExamined) + } + case "avg-rows-examined": + return func(q keys.QueryAnalysisResult) float64 { + return float64(q.RowsExamined) / float64(q.UsageCount) + } + case "total-time", "": + return func(q keys.QueryAnalysisResult) float64 { + return q.QueryTime + } + case "avg-time": + return func(q keys.QueryAnalysisResult) float64 { + return q.QueryTime / float64(q.UsageCount) + } + default: + exit(fmt.Sprintf("unknown metric: %s", metric)) + panic("unreachable") + } +} + +func renderHotQueries(md *markdown.MarkDown, queries []keys.QueryAnalysisResult, metricReader getMetric) { if len(queries) == 0 { return } hasTime := false - // Sort the queries in descending order of total query time + // Sort the queries in descending order of hotness sort.Slice(queries, func(i, j int) bool { if queries[i].QueryTime != 0 { hasTime = true } - return queries[i].QueryTime > queries[j].QueryTime + return metricReader(queries[i]) > metricReader(queries[j]) }) if !hasTime { @@ -377,7 +407,7 @@ func makeKey(lhs, rhs operators.Column) graphKey { return graphKey{rhs.Table, lhs.Table} } -func summarizeKeysQueries(queries *keys.Output) Summary { +func summarizeKeysQueries(queries *keys.Output, metricReader getMetric) Summary { tableSummaries := make(map[string]*TableSummary) tableUsageWriteCounts := make(map[string]int) tableUsageReadCounts := make(map[string]int) @@ -386,7 +416,7 @@ func summarizeKeysQueries(queries *keys.Output) Summary { // First pass: collect all data and count occurrences for _, query := range queries.Queries { gatherTableInfo(query, tableSummaries, tableUsageWriteCounts, tableUsageReadCounts) - checkQueryForHotness(&hotQueries, query) + checkQueryForHotness(&hotQueries, query, metricReader) } // Second pass: calculate percentages @@ -423,13 +453,13 @@ func summarizeKeysQueries(queries *keys.Output) Summary { return Summary{tables: result, failures: failures, hotQueries: hotQueries} } -func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.QueryAnalysisResult) { +func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.QueryAnalysisResult, metricReader getMetric) { // todo: we should be able to choose different metrics for hotness - e.g. total time spent on query, number of rows examined, etc. switch { case len(*hotQueries) < HotQueryCount: // If we have not yet reached the limit, add the query *hotQueries = append(*hotQueries, query) - case query.QueryTime > (*hotQueries)[0].QueryTime: + case metricReader(query) > metricReader((*hotQueries)[0]): // If the current query has more usage than the least used hot query, replace it (*hotQueries)[0] = query default: @@ -440,7 +470,7 @@ func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.Que // Sort the hot queries by query time so that the least used query is always at the front sort.Slice(*hotQueries, func(i, j int) bool { - return (*hotQueries)[i].QueryTime < (*hotQueries)[j].QueryTime + return metricReader((*hotQueries)[i]) < metricReader((*hotQueries)[j]) }) } diff --git a/go/summarize/summarize-keys_test.go b/go/summarize/summarize-keys_test.go index cf89e2f..b06af43 100644 --- a/go/summarize/summarize-keys_test.go +++ b/go/summarize/summarize-keys_test.go @@ -67,25 +67,41 @@ func TestTableSummary(t *testing.T) { } func TestSummarizeKeysFile(t *testing.T) { - tests := []struct { - inputFile string - expectedFile string - }{ - {"keys-log.json", "keys-summary.md"}, - {"bigger_slow_query_log.json", "bigger_slow_log.md"}, + file, err := readTraceFile("../testdata/keys-log.json") + require.NoError(t, err) + sb := &strings.Builder{} + now := time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC) + printKeysSummary(sb, file, now, "") + expected, err := os.ReadFile("../testdata/keys-summary.md") + require.NoError(t, err) + assert.Equal(t, string(expected), sb.String()) + if t.Failed() { + _ = os.WriteFile("../testdata/keys-summary.md.correct", []byte(sb.String()), 0o644) + } +} + +func TestSummarizeKeysWithHotnessFile(t *testing.T) { + tests := []string{ + "usage-count", + "total-rows-examined", + "avg-rows-examined", + "avg-time", + "total-time", } - for _, tt := range tests { - t.Run(fmt.Sprintf("input: %s, expected: %s", tt.inputFile, tt.expectedFile), func(t *testing.T) { - file, err := readTraceFile("../testdata/" + tt.inputFile) + for _, metric := range tests { + t.Run(metric, func(t *testing.T) { + file, err := readTraceFile("../testdata/bigger_slow_query_log.json") require.NoError(t, err) sb := &strings.Builder{} - printKeysSummary(sb, file, time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC)) - expected, err := os.ReadFile("../testdata/" + tt.expectedFile) + now := time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC) + printKeysSummary(sb, file, now, metric) + expected, err := os.ReadFile(fmt.Sprintf("../testdata/bigger_slow_log_%s.md", metric)) require.NoError(t, err) assert.Equal(t, string(expected), sb.String()) if t.Failed() { - _ = os.WriteFile("../testdata/"+tt.expectedFile+".correct", []byte(sb.String()), 0o644) + _ = os.Mkdir("../testdata/expected", 0o755) + _ = os.WriteFile(fmt.Sprintf("../testdata/expected/bigger_slow_log_%s.md", metric), []byte(sb.String()), 0o644) } }) } diff --git a/go/summarize/summarize.go b/go/summarize/summarize.go index dbc939f..e2d7752 100644 --- a/go/summarize/summarize.go +++ b/go/summarize/summarize.go @@ -39,25 +39,30 @@ type ( } ) -func Run(args []string) { - traces := make([]readingSummary, len(args)) +func Run(files []string, hotMetric string) { + traces := make([]readingSummary, len(files)) var err error - for i, arg := range args { + for i, arg := range files { traces[i], err = readTraceFile(arg) if err != nil { exit(err.Error()) } } + if hotMetric != "" && traces[0].AnalysedQueries == nil { + exit("hotMetric flag is only supported for 'vt keys' output") + } + firstTrace := traces[0] - if len(traces) == 1 { - if firstTrace.AnalysedQueries == nil { - printTraceSummary(os.Stdout, terminalWidth(), highlightQuery, firstTrace) - } else { - printKeysSummary(os.Stdout, firstTrace, time.Now()) - } - } else { + if len(traces) != 1 { compareTraces(os.Stdout, terminalWidth(), highlightQuery, firstTrace, traces[1]) + return + } + + if firstTrace.AnalysedQueries == nil { + printTraceSummary(os.Stdout, terminalWidth(), highlightQuery, firstTrace) + } else { + printKeysSummary(os.Stdout, firstTrace, time.Now(), hotMetric) } } diff --git a/go/testdata/bigger_slow_log_avg-rows-examined.md b/go/testdata/bigger_slow_log_avg-rows-examined.md new file mode 100644 index 0000000..968e154 --- /dev/null +++ b/go/testdata/bigger_slow_log_avg-rows-examined.md @@ -0,0 +1,182 @@ +# Query Analysis Report + +**Date of Analysis**: 2024-01-01 01:02:03 +**Analyzed File**: `../testdata/bigger_slow_query_log.json` + +## Top Queries +|Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| +|---|---|---|---|---| +|Q1|2|0.40|0.20|20000| +|Q2|3|0.61|0.20|30000| +|Q3|1|0.22|0.22|8000| +|Q4|2|0.37|0.19|16000| +|Q5|2|0.37|0.19|16000| +|Q6|2|0.49|0.25|16000| +|Q7|2|0.31|0.16|15000| +|Q8|1|0.20|0.20|6500| +|Q9|3|0.58|0.19|17000| +|Q10|2|0.34|0.17|8500| + +### Query Details +#### Q1 +```sql +SELECT `c`.`name`, sum(`oi`.`price` * `oi`.`quantity`) AS `total_sales` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` GROUP BY `c`.`id` ORDER BY sum(`oi`.`price` * `oi`.`quantity`) DESC LIMIT :1 /* INT64 */ +``` + +#### Q2 +```sql +SELECT `m`.`sender_id`, COUNT(DISTINCT `m`.`receiver_id`) AS `unique_receivers` FROM `messages` AS `m` GROUP BY `m`.`sender_id` HAVING COUNT(DISTINCT `m`.`receiver_id`) > :_unique_receivers /* INT64 */ +``` + +#### Q3 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` JOIN `reviews` AS `r` ON `u`.`id` = `r`.`user_id` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) AND `r`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) +``` + +#### Q4 +```sql +SELECT `c`.`name`, COUNT(`o`.`id`) AS `order_count` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` JOIN `orders` AS `o` ON `oi`.`order_id` = `o`.`id` GROUP BY `c`.`id` +``` + +#### Q5 +```sql +SELECT DATE(`o`.`created_at`) AS `order_date`, count(*) AS `order_count` FROM `orders` AS `o` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) GROUP BY DATE(`o`.`created_at`) +``` + +#### Q6 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` LEFT JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`id` IS NULL +``` + +#### Q7 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` < :_i_stock_level /* INT64 */ +``` + +#### Q8 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` BETWEEN :1 /* INT64 */ AND :2 /* INT64 */ +``` + +#### Q9 +```sql +SELECT `u`.`username`, sum(`o`.`total_amount`) AS `total_spent` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`created_at` BETWEEN :1 /* VARCHAR */ AND :2 /* VARCHAR */ GROUP BY `u`.`id` HAVING sum(`o`.`total_amount`) > :_total_spent /* INT64 */ +``` + +#### Q10 +```sql +SELECT `o`.`id`, `o`.`created_at` FROM `orders` AS `o` LEFT JOIN `shipments` AS `s` ON `o`.`id` = `s`.`order_id` WHERE `s`.`shipped_date` IS NULL AND `o`.`created_at` < DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) +``` + +## Tables +|Table Name|Reads|Writes| +|---|---|---| +|orders|14|0| +|products|10|0| +|users|6|0| +|categories|4|0| +|order_items|4|0| +|reviews|4|0| +|inventory|3|0| +|messages|3|0| +|payments|2|0| +|shipments|2|0| + +### Column Usage +#### Table: `orders` (14 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|created_at|WHERE RANGE|57%| +|id|JOIN|43%| +|user_id|JOIN|43%| + +#### Table: `products` (10 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|30%| +|category_id|JOIN|40%| + +#### Table: `users` (6 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|50%| + +#### Table: `categories` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|100%| + +#### Table: `order_items` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|order_id|JOIN|50%| + +#### Table: `reviews` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|75%| +|created_at|WHERE RANGE|50%| +|user_id|JOIN|25%| + +#### Table: `inventory` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|stock_level|WHERE RANGE|100%| + +#### Table: `messages` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|sender_id|GROUP|100%| + +#### Table: `payments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| +|payment_method|GROUP|100%| + +#### Table: `shipments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| + +## Tables Joined +``` +orders ↔ users (Occurrences: 3) +└─ orders.user_id = users.id + +categories ↔ products (Occurrences: 2) +└─ categories.id = products.category_id + +inventory ↔ products (Occurrences: 2) +└─ inventory.product_id = products.id + +order_items ↔ products (Occurrences: 2) +└─ order_items.product_id = products.id + +products ↔ reviews (Occurrences: 2) +└─ products.id = reviews.product_id + +order_items ↔ orders (Occurrences: 1) +└─ order_items.order_id = orders.id + +orders ↔ payments (Occurrences: 1) +└─ orders.id = payments.order_id + +orders ↔ shipments (Occurrences: 1) +└─ orders.id = shipments.order_id + +reviews ↔ users (Occurrences: 1) +└─ reviews.user_id = users.id + +``` +## Failures +|Error|Count| +|---|---| +|syntax error at position 2|1| +|syntax error at position 14 near 'timestamp'|1| + diff --git a/go/testdata/bigger_slow_log_avg-time.md b/go/testdata/bigger_slow_log_avg-time.md new file mode 100644 index 0000000..4d70414 --- /dev/null +++ b/go/testdata/bigger_slow_log_avg-time.md @@ -0,0 +1,182 @@ +# Query Analysis Report + +**Date of Analysis**: 2024-01-01 01:02:03 +**Analyzed File**: `../testdata/bigger_slow_query_log.json` + +## Top Queries +|Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| +|---|---|---|---|---| +|Q1|2|0.49|0.25|16000| +|Q2|1|0.22|0.22|8000| +|Q3|3|0.61|0.20|30000| +|Q4|2|0.40|0.20|20000| +|Q5|1|0.20|0.20|6500| +|Q6|3|0.58|0.19|17000| +|Q7|2|0.37|0.19|16000| +|Q8|2|0.37|0.19|16000| +|Q9|2|0.34|0.17|8500| +|Q10|2|0.33|0.17|6000| + +### Query Details +#### Q1 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` LEFT JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`id` IS NULL +``` + +#### Q2 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` JOIN `reviews` AS `r` ON `u`.`id` = `r`.`user_id` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) AND `r`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) +``` + +#### Q3 +```sql +SELECT `m`.`sender_id`, COUNT(DISTINCT `m`.`receiver_id`) AS `unique_receivers` FROM `messages` AS `m` GROUP BY `m`.`sender_id` HAVING COUNT(DISTINCT `m`.`receiver_id`) > :_unique_receivers /* INT64 */ +``` + +#### Q4 +```sql +SELECT `c`.`name`, sum(`oi`.`price` * `oi`.`quantity`) AS `total_sales` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` GROUP BY `c`.`id` ORDER BY sum(`oi`.`price` * `oi`.`quantity`) DESC LIMIT :1 /* INT64 */ +``` + +#### Q5 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` BETWEEN :1 /* INT64 */ AND :2 /* INT64 */ +``` + +#### Q6 +```sql +SELECT `u`.`username`, sum(`o`.`total_amount`) AS `total_spent` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`created_at` BETWEEN :1 /* VARCHAR */ AND :2 /* VARCHAR */ GROUP BY `u`.`id` HAVING sum(`o`.`total_amount`) > :_total_spent /* INT64 */ +``` + +#### Q7 +```sql +SELECT `c`.`name`, COUNT(`o`.`id`) AS `order_count` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` JOIN `orders` AS `o` ON `oi`.`order_id` = `o`.`id` GROUP BY `c`.`id` +``` + +#### Q8 +```sql +SELECT DATE(`o`.`created_at`) AS `order_date`, count(*) AS `order_count` FROM `orders` AS `o` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) GROUP BY DATE(`o`.`created_at`) +``` + +#### Q9 +```sql +SELECT `o`.`id`, `o`.`created_at` FROM `orders` AS `o` LEFT JOIN `shipments` AS `s` ON `o`.`id` = `s`.`order_id` WHERE `s`.`shipped_date` IS NULL AND `o`.`created_at` < DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) +``` + +#### Q10 +```sql +SELECT `p`.`payment_method`, avg(`o`.`total_amount`) AS `avg_order_value` FROM `payments` AS `p` JOIN `orders` AS `o` ON `p`.`order_id` = `o`.`id` GROUP BY `p`.`payment_method` +``` + +## Tables +|Table Name|Reads|Writes| +|---|---|---| +|orders|14|0| +|products|10|0| +|users|6|0| +|categories|4|0| +|order_items|4|0| +|reviews|4|0| +|inventory|3|0| +|messages|3|0| +|payments|2|0| +|shipments|2|0| + +### Column Usage +#### Table: `orders` (14 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|created_at|WHERE RANGE|57%| +|id|JOIN|43%| +|user_id|JOIN|43%| + +#### Table: `products` (10 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|30%| +|category_id|JOIN|40%| + +#### Table: `users` (6 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|50%| + +#### Table: `categories` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|100%| + +#### Table: `order_items` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|order_id|JOIN|50%| + +#### Table: `reviews` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|75%| +|created_at|WHERE RANGE|50%| +|user_id|JOIN|25%| + +#### Table: `inventory` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|stock_level|WHERE RANGE|100%| + +#### Table: `messages` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|sender_id|GROUP|100%| + +#### Table: `payments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| +|payment_method|GROUP|100%| + +#### Table: `shipments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| + +## Tables Joined +``` +orders ↔ users (Occurrences: 3) +└─ orders.user_id = users.id + +categories ↔ products (Occurrences: 2) +└─ categories.id = products.category_id + +inventory ↔ products (Occurrences: 2) +└─ inventory.product_id = products.id + +order_items ↔ products (Occurrences: 2) +└─ order_items.product_id = products.id + +products ↔ reviews (Occurrences: 2) +└─ products.id = reviews.product_id + +order_items ↔ orders (Occurrences: 1) +└─ order_items.order_id = orders.id + +orders ↔ payments (Occurrences: 1) +└─ orders.id = payments.order_id + +orders ↔ shipments (Occurrences: 1) +└─ orders.id = shipments.order_id + +reviews ↔ users (Occurrences: 1) +└─ reviews.user_id = users.id + +``` +## Failures +|Error|Count| +|---|---| +|syntax error at position 2|1| +|syntax error at position 14 near 'timestamp'|1| + diff --git a/go/testdata/bigger_slow_log_total-rows-examined.md b/go/testdata/bigger_slow_log_total-rows-examined.md new file mode 100644 index 0000000..ce5ddd1 --- /dev/null +++ b/go/testdata/bigger_slow_log_total-rows-examined.md @@ -0,0 +1,182 @@ +# Query Analysis Report + +**Date of Analysis**: 2024-01-01 01:02:03 +**Analyzed File**: `../testdata/bigger_slow_query_log.json` + +## Top Queries +|Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| +|---|---|---|---|---| +|Q1|3|0.61|0.20|30000| +|Q2|2|0.40|0.20|20000| +|Q3|3|0.58|0.19|17000| +|Q4|2|0.37|0.19|16000| +|Q5|2|0.37|0.19|16000| +|Q6|2|0.49|0.25|16000| +|Q7|2|0.31|0.16|15000| +|Q8|2|0.34|0.17|8500| +|Q9|1|0.22|0.22|8000| +|Q10|1|0.20|0.20|6500| + +### Query Details +#### Q1 +```sql +SELECT `m`.`sender_id`, COUNT(DISTINCT `m`.`receiver_id`) AS `unique_receivers` FROM `messages` AS `m` GROUP BY `m`.`sender_id` HAVING COUNT(DISTINCT `m`.`receiver_id`) > :_unique_receivers /* INT64 */ +``` + +#### Q2 +```sql +SELECT `c`.`name`, sum(`oi`.`price` * `oi`.`quantity`) AS `total_sales` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` GROUP BY `c`.`id` ORDER BY sum(`oi`.`price` * `oi`.`quantity`) DESC LIMIT :1 /* INT64 */ +``` + +#### Q3 +```sql +SELECT `u`.`username`, sum(`o`.`total_amount`) AS `total_spent` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`created_at` BETWEEN :1 /* VARCHAR */ AND :2 /* VARCHAR */ GROUP BY `u`.`id` HAVING sum(`o`.`total_amount`) > :_total_spent /* INT64 */ +``` + +#### Q4 +```sql +SELECT `c`.`name`, COUNT(`o`.`id`) AS `order_count` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` JOIN `orders` AS `o` ON `oi`.`order_id` = `o`.`id` GROUP BY `c`.`id` +``` + +#### Q5 +```sql +SELECT DATE(`o`.`created_at`) AS `order_date`, count(*) AS `order_count` FROM `orders` AS `o` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) GROUP BY DATE(`o`.`created_at`) +``` + +#### Q6 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` LEFT JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`id` IS NULL +``` + +#### Q7 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` < :_i_stock_level /* INT64 */ +``` + +#### Q8 +```sql +SELECT `o`.`id`, `o`.`created_at` FROM `orders` AS `o` LEFT JOIN `shipments` AS `s` ON `o`.`id` = `s`.`order_id` WHERE `s`.`shipped_date` IS NULL AND `o`.`created_at` < DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) +``` + +#### Q9 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` JOIN `reviews` AS `r` ON `u`.`id` = `r`.`user_id` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) AND `r`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ month) +``` + +#### Q10 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` BETWEEN :1 /* INT64 */ AND :2 /* INT64 */ +``` + +## Tables +|Table Name|Reads|Writes| +|---|---|---| +|orders|14|0| +|products|10|0| +|users|6|0| +|categories|4|0| +|order_items|4|0| +|reviews|4|0| +|inventory|3|0| +|messages|3|0| +|payments|2|0| +|shipments|2|0| + +### Column Usage +#### Table: `orders` (14 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|created_at|WHERE RANGE|57%| +|id|JOIN|43%| +|user_id|JOIN|43%| + +#### Table: `products` (10 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|30%| +|category_id|JOIN|40%| + +#### Table: `users` (6 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|50%| + +#### Table: `categories` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|100%| + +#### Table: `order_items` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|order_id|JOIN|50%| + +#### Table: `reviews` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|75%| +|created_at|WHERE RANGE|50%| +|user_id|JOIN|25%| + +#### Table: `inventory` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|stock_level|WHERE RANGE|100%| + +#### Table: `messages` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|sender_id|GROUP|100%| + +#### Table: `payments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| +|payment_method|GROUP|100%| + +#### Table: `shipments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| + +## Tables Joined +``` +orders ↔ users (Occurrences: 3) +└─ orders.user_id = users.id + +categories ↔ products (Occurrences: 2) +└─ categories.id = products.category_id + +inventory ↔ products (Occurrences: 2) +└─ inventory.product_id = products.id + +order_items ↔ products (Occurrences: 2) +└─ order_items.product_id = products.id + +products ↔ reviews (Occurrences: 2) +└─ products.id = reviews.product_id + +order_items ↔ orders (Occurrences: 1) +└─ order_items.order_id = orders.id + +orders ↔ payments (Occurrences: 1) +└─ orders.id = payments.order_id + +orders ↔ shipments (Occurrences: 1) +└─ orders.id = shipments.order_id + +reviews ↔ users (Occurrences: 1) +└─ reviews.user_id = users.id + +``` +## Failures +|Error|Count| +|---|---| +|syntax error at position 2|1| +|syntax error at position 14 near 'timestamp'|1| + diff --git a/go/testdata/bigger_slow_log.md b/go/testdata/bigger_slow_log_total-time.md similarity index 100% rename from go/testdata/bigger_slow_log.md rename to go/testdata/bigger_slow_log_total-time.md diff --git a/go/testdata/bigger_slow_log_usage-count.md b/go/testdata/bigger_slow_log_usage-count.md new file mode 100644 index 0000000..015a38a --- /dev/null +++ b/go/testdata/bigger_slow_log_usage-count.md @@ -0,0 +1,182 @@ +# Query Analysis Report + +**Date of Analysis**: 2024-01-01 01:02:03 +**Analyzed File**: `../testdata/bigger_slow_query_log.json` + +## Top Queries +|Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| +|---|---|---|---|---| +|Q1|3|0.58|0.19|17000| +|Q2|3|0.61|0.20|30000| +|Q3|2|0.21|0.11|3000| +|Q4|2|0.37|0.19|16000| +|Q5|2|0.31|0.16|15000| +|Q6|2|0.40|0.20|20000| +|Q7|2|0.34|0.17|8500| +|Q8|2|0.33|0.17|6000| +|Q9|2|0.37|0.19|16000| +|Q10|2|0.49|0.25|16000| + +### Query Details +#### Q1 +```sql +SELECT `u`.`username`, sum(`o`.`total_amount`) AS `total_spent` FROM `users` AS `u` JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`created_at` BETWEEN :1 /* VARCHAR */ AND :2 /* VARCHAR */ GROUP BY `u`.`id` HAVING sum(`o`.`total_amount`) > :_total_spent /* INT64 */ +``` + +#### Q2 +```sql +SELECT `m`.`sender_id`, COUNT(DISTINCT `m`.`receiver_id`) AS `unique_receivers` FROM `messages` AS `m` GROUP BY `m`.`sender_id` HAVING COUNT(DISTINCT `m`.`receiver_id`) > :_unique_receivers /* INT64 */ +``` + +#### Q3 +```sql +SELECT `p`.`name`, avg(`r`.`rating`) AS `avg_rating` FROM `products` AS `p` JOIN `reviews` AS `r` ON `p`.`id` = `r`.`product_id` GROUP BY `p`.`id` ORDER BY avg(`r`.`rating`) DESC LIMIT :1 /* INT64 */ +``` + +#### Q4 +```sql +SELECT `c`.`name`, COUNT(`o`.`id`) AS `order_count` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` JOIN `orders` AS `o` ON `oi`.`order_id` = `o`.`id` GROUP BY `c`.`id` +``` + +#### Q5 +```sql +SELECT `p`.`name`, `i`.`stock_level` FROM `products` AS `p` JOIN `inventory` AS `i` ON `p`.`id` = `i`.`product_id` WHERE `i`.`stock_level` < :_i_stock_level /* INT64 */ +``` + +#### Q6 +```sql +SELECT `c`.`name`, sum(`oi`.`price` * `oi`.`quantity`) AS `total_sales` FROM `categories` AS `c` JOIN `products` AS `p` ON `c`.`id` = `p`.`category_id` JOIN `order_items` AS `oi` ON `p`.`id` = `oi`.`product_id` GROUP BY `c`.`id` ORDER BY sum(`oi`.`price` * `oi`.`quantity`) DESC LIMIT :1 /* INT64 */ +``` + +#### Q7 +```sql +SELECT `o`.`id`, `o`.`created_at` FROM `orders` AS `o` LEFT JOIN `shipments` AS `s` ON `o`.`id` = `s`.`order_id` WHERE `s`.`shipped_date` IS NULL AND `o`.`created_at` < DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) +``` + +#### Q8 +```sql +SELECT `p`.`payment_method`, avg(`o`.`total_amount`) AS `avg_order_value` FROM `payments` AS `p` JOIN `orders` AS `o` ON `p`.`order_id` = `o`.`id` GROUP BY `p`.`payment_method` +``` + +#### Q9 +```sql +SELECT DATE(`o`.`created_at`) AS `order_date`, count(*) AS `order_count` FROM `orders` AS `o` WHERE `o`.`created_at` >= DATE_SUB(now(), INTERVAL :1 /* INT64 */ day) GROUP BY DATE(`o`.`created_at`) +``` + +#### Q10 +```sql +SELECT `u`.`id`, `u`.`username` FROM `users` AS `u` LEFT JOIN `orders` AS `o` ON `u`.`id` = `o`.`user_id` WHERE `o`.`id` IS NULL +``` + +## Tables +|Table Name|Reads|Writes| +|---|---|---| +|orders|14|0| +|products|10|0| +|users|6|0| +|categories|4|0| +|order_items|4|0| +|reviews|4|0| +|inventory|3|0| +|messages|3|0| +|payments|2|0| +|shipments|2|0| + +### Column Usage +#### Table: `orders` (14 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|created_at|WHERE RANGE|57%| +|id|JOIN|43%| +|user_id|JOIN|43%| + +#### Table: `products` (10 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|30%| +|category_id|JOIN|40%| + +#### Table: `users` (6 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|50%| + +#### Table: `categories` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|id|JOIN|100%| +||GROUP|100%| + +#### Table: `order_items` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|order_id|JOIN|50%| + +#### Table: `reviews` (4 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|75%| +|created_at|WHERE RANGE|50%| +|user_id|JOIN|25%| + +#### Table: `inventory` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|product_id|JOIN|100%| +|stock_level|WHERE RANGE|100%| + +#### Table: `messages` (3 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|sender_id|GROUP|100%| + +#### Table: `payments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| +|payment_method|GROUP|100%| + +#### Table: `shipments` (2 reads and 0 writes) +|Column|Position|Used %| +|---|---|---| +|order_id|JOIN|100%| + +## Tables Joined +``` +orders ↔ users (Occurrences: 3) +└─ orders.user_id = users.id + +categories ↔ products (Occurrences: 2) +└─ categories.id = products.category_id + +inventory ↔ products (Occurrences: 2) +└─ inventory.product_id = products.id + +order_items ↔ products (Occurrences: 2) +└─ order_items.product_id = products.id + +products ↔ reviews (Occurrences: 2) +└─ products.id = reviews.product_id + +order_items ↔ orders (Occurrences: 1) +└─ order_items.order_id = orders.id + +orders ↔ payments (Occurrences: 1) +└─ orders.id = payments.order_id + +orders ↔ shipments (Occurrences: 1) +└─ orders.id = shipments.order_id + +reviews ↔ users (Occurrences: 1) +└─ reviews.user_id = users.id + +``` +## Failures +|Error|Count| +|---|---| +|syntax error at position 2|1| +|syntax error at position 14 near 'timestamp'|1| +