Skip to content

Commit

Permalink
Merge pull request #63 from planetscale/txs
Browse files Browse the repository at this point in the history
Analyze transactions in query log
  • Loading branch information
systay authored Nov 26, 2024
2 parents 54a784d + f5fffc2 commit d4506d4
Show file tree
Hide file tree
Showing 27 changed files with 1,217 additions and 169 deletions.
21 changes: 0 additions & 21 deletions CONTRIBUTING.md

This file was deleted.

14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The `vt` binary encapsulates several utility tools for Vitess, providing a compr
## Tools Included
- **`vt test`**: A testing utility using the same test files as the [MySQL Test Framework](https://github.com/mysql/mysql-server/tree/8.0/mysql-test). It compares the results of identical queries executed on both MySQL and Vitess (vtgate), helping to ensure compatibility.
- **`vt keys`**: A utility that analyzes query logs and provides information about queries, tables, joins, and column usage.
- **`vt transactions`**: A tool that analyzes query logs to identify transaction patterns and outputs a JSON report detailing these patterns.
- **`vt trace`**: A tool that generates execution traces for queries without comparing against MySQL. It helps analyze query behavior and performance in Vitess environments.
- **`vt summarize`**: A tool used to summarize or compare trace logs or key logs for easier human consumption.

Expand Down Expand Up @@ -116,6 +117,11 @@ This command generates a `keys-log.json` file that contains a detailed analysis
This command summarizes the key analysis, providing insight into which tables and columns are used across queries, and how frequently they are involved in filters, groupings, and joins.
[Here](https://github.com/vitessio/vt/blob/main/go/summarize/testdata/keys-summary.md) is an example summary report.

## Transaction Analysis with vt transactions
The `vt transactions` command is designed to analyze query logs and identify patterns of transactional queries.
It processes the logs to find sequences of queries that form transactions and outputs a JSON report summarizing these patterns.
Read more about how to use and how to read the output in the [vt transactions documentation](./go/transactions/README.md).

## Using `--backup-path` Flag

The `--backup-path` flag allows `vt test` and `vt trace` to initialize tests from a database backup rather than an empty database.
Expand All @@ -136,7 +142,13 @@ We welcome contributions in the following areas:
- Fixing bugs in the test framework
- Adding features from the MySQL test framework that are missing in this implementation

For more details, see our [CONTRIBUTING.md](./CONTRIBUTING.md).
After cloning the repo, make sure to run

```bash
make install-hooks
```

to install the pre-commit hooks.

## License

Expand Down
1 change: 1 addition & 0 deletions go/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func Execute() {
root.AddCommand(tracerCmd())
root.AddCommand(keysCmd())
root.AddCommand(schemaCmd())
root.AddCommand(transactionsCmd())

err := root.Execute()
if err != nil {
Expand Down
53 changes: 53 additions & 0 deletions go/cmd/transactions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
Copyright 2024 The Vitess Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package cmd

import (
"github.com/spf13/cobra"

"github.com/vitessio/vt/go/transactions"
)

func transactionsCmd() *cobra.Command {
var inputType string

cmd := &cobra.Command{
Use: "transactions ",
Aliases: []string{"txs"},
Short: "Analyze transactions on a query log",
Example: "vt transactions file.log",
Args: cobra.ExactArgs(1),
RunE: func(_ *cobra.Command, args []string) error {
cfg := transactions.Config{
FileName: args[0],
}

loader, err := configureLoader(inputType, false)
if err != nil {
return err
}
cfg.Loader = loader

transactions.Run(cfg)
return nil
},
}

addInputTypeFlag(cmd, &inputType)

return cmd
}
5 changes: 3 additions & 2 deletions go/data/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type (
UsageCount int

// These fields are only set if the log file is a slow query log
ConnectionID int
QueryTime, LockTime float64
RowsSent, RowsExamined int
Timestamp int64
Expand Down Expand Up @@ -79,7 +80,7 @@ func ForeachSQLQuery(loader IteratorLoader, f func(Query) error) error {
return fmt.Errorf("unknown command type: %s", query.Type)
case Comment, CommentWithCommand, EmptyLine, WaitForAuthoritative, SkipIfBelowVersion:
// no-op for keys
case QueryT:
case SQLQuery:
if skip {
skip = false
continue
Expand All @@ -106,7 +107,7 @@ func (q *Query) getQueryType(qu string) error {
if q.Type != CommentWithCommand {
// A query that will sent to vitess
q.Query = qu
q.Type = QueryT
q.Type = SQLQuery
} else {
log.WithFields(log.Fields{"line": q.Line, "command": q.FirstWord, "arguments": q.Query}).Error("invalid command")
return fmt.Errorf("invalid command %s", q.FirstWord)
Expand Down
34 changes: 26 additions & 8 deletions go/data/query_log_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"io"
"os"
"regexp"
"strconv"
"sync"
)

Expand All @@ -41,8 +42,9 @@ type (

mysqlLogReaderState struct {
logReaderState
prevQuery string
queryStart int
prevQuery string
queryStart int
prevConnectionID int
}
)

Expand Down Expand Up @@ -98,6 +100,12 @@ func (s *mysqlLogReaderState) Next() (Query, bool) {
if matches[3] == "Query" {
s.prevQuery = matches[4]
s.queryStart = s.lineNumber
connID, err := strconv.Atoi(matches[2])
if err != nil {
s.err = fmt.Errorf("invalid connection id at line %d: %w", s.lineNumber, err)
return Query{}, false
}
s.prevConnectionID = connID
}
}
s.closed = true
Expand Down Expand Up @@ -137,26 +145,36 @@ func (s *logReaderState) readLine() (string, bool, error) {

func (s *mysqlLogReaderState) finalizeQuery() Query {
query := Query{
Query: s.prevQuery,
Line: s.queryStart,
Type: QueryT,
Query: s.prevQuery,
Line: s.queryStart,
Type: SQLQuery,
ConnectionID: s.prevConnectionID,
}
s.prevQuery = ""
s.prevConnectionID = 0
return query
}

func (s *mysqlLogReaderState) processQuery(matches []string) Query {
query := Query{
Query: s.prevQuery,
Line: s.queryStart,
Type: QueryT,
Query: s.prevQuery,
Line: s.queryStart,
Type: SQLQuery,
ConnectionID: s.prevConnectionID,
}
s.prevQuery = ""
s.prevConnectionID = 0

// If the new line is a query, store it for next iteration
if matches[3] == "Query" {
s.prevQuery = matches[4]
s.queryStart = s.lineNumber
connID, err := strconv.Atoi(matches[2])
if err != nil {
s.err = fmt.Errorf("invalid connection id at line %d: %w", s.lineNumber, err)
return Query{}
}
s.prevConnectionID = connID
}
return query
}
Expand Down
23 changes: 15 additions & 8 deletions go/data/query_log_parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ package data
import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestParseMySQLQueryLog(t *testing.T) {
loader := MySQLLogLoader{}.Load("../testdata/mysql.query.log")
gotQueries, err := makeSlice(loader)
require.NoError(t, err)
for _, query := range gotQueries {
assert.NotZero(t, query.ConnectionID, query.Query)
}
require.Equal(t, 1517, len(gotQueries), "expected 1517 queries") //nolint:testifylint // too many elements for the output to be readable
}

Expand All @@ -35,13 +39,15 @@ func TestSmallSnippet(t *testing.T) {
require.NoError(t, err)
expected := []Query{
{
Query: "SET GLOBAL log_output = 'FILE'",
Line: 4,
Type: QueryT,
Query: "SET GLOBAL log_output = 'FILE'",
Line: 4,
Type: SQLQuery,
ConnectionID: 32,
}, {
Query: "show databases",
Line: 5,
Type: QueryT,
Query: "show databases",
Line: 5,
Type: SQLQuery,
ConnectionID: 32,
}, {
Query: `UPDATE _vt.schema_migrations
SET
Expand All @@ -67,8 +73,9 @@ WHERE
AND retries=0
)
LIMIT 1`,
Line: 6,
Type: QueryT,
Line: 6,
Type: SQLQuery,
ConnectionID: 24,
},
}

Expand Down
16 changes: 11 additions & 5 deletions go/data/slow_query_log_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (s *slowQueryLogReaderState) processLine(line string, state *lineProcessorS
}

func (s *slowQueryLogReaderState) processCommentLine(line string, state *lineProcessorState) (bool, error) {
if strings.HasPrefix(line, "# Query_time:") {
if strings.HasPrefix(line, "# Query_time:") || strings.HasPrefix(line, "# User@Host:") {
if err := parseQueryMetrics(line, &state.currentQuery); err != nil {
return false, err
}
Expand Down Expand Up @@ -190,17 +190,16 @@ func parseQueryMetrics(line string, q *Query) error {
for i < len(fields) {
field := fields[i]
if !strings.HasSuffix(field, ":") {
return fmt.Errorf("unexpected field format '%s'", field)
i++
continue
}

// Remove the trailing colon to get the key
key := strings.TrimSuffix(field, ":")
if i+1 >= len(fields) {
return fmt.Errorf("missing value for key '%s'", key)
}
value := fields[i+1]

// Assign to Query struct based on key
switch key {
case "Query_time":
fval, err := strconv.ParseFloat(value, 64)
Expand All @@ -214,6 +213,12 @@ func parseQueryMetrics(line string, q *Query) error {
return fmt.Errorf("invalid Lock_time value '%s'", value)
}
q.LockTime = fval
case "Id":
ival, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("invalid connection id value '%s'", value)
}
q.ConnectionID = ival
case "Rows_sent":
ival, err := strconv.Atoi(value)
if err != nil {
Expand All @@ -227,7 +232,7 @@ func parseQueryMetrics(line string, q *Query) error {
}
q.RowsExamined = ival
}
i += 2 // Move to the next key-value pair
i += 2
}

return nil
Expand Down Expand Up @@ -285,6 +290,7 @@ func parseQuery(rs Query) (*Query, error) {
RowsSent: rs.RowsSent,
RowsExamined: rs.RowsExamined,
Timestamp: rs.Timestamp,
ConnectionID: rs.ConnectionID,
}

if len(s) < 3 {
Expand Down
30 changes: 16 additions & 14 deletions go/data/slow_query_log_loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,23 @@ func TestLoadSlowQueryLogWithMetadata(t *testing.T) {
require.NoError(t, err)

expected := []Query{
{FirstWord: "/bin/mysqld,", Query: "/bin/mysqld, Version: 8.0.26 (Source distribution). started with:\nTcp port: 3306 Unix socket: /tmp/mysql.sock\nTime Id Command Argument\nuse testdb;", Line: 1, Type: 0, QueryTime: 0.000153, LockTime: 6.3e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 0},
{FirstWord: "/bin/mysqld,", Query: "/bin/mysqld, Version: 8.0.26 (Source distribution). started with:\nTcp port: 3306 Unix socket: /tmp/mysql.sock\nTime Id Command Argument\nuse testdb;", Line: 1, Type: 0, QueryTime: 0.000153, LockTime: 6.3e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 0, ConnectionID: 780496},
{FirstWord: "SET", Query: "SET timestamp=1690891201;", Line: 8},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343274;", Line: 9},
{FirstWord: "FLUSH", Query: "FLUSH SLOW LOGS;", Line: 19, QueryTime: 0.005047, LockTime: 0, RowsSent: 0, RowsExamined: 0, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343272;", Line: 24, QueryTime: 0.000162, LockTime: 6.7e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select s1_0.id, s1_0.code, s1_0.token, s1_0.date from stores s1_0 where s1_0.id=11393;", Line: 29, QueryTime: 0.000583, LockTime: 0.000322, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343265;", Line: 34, QueryTime: 0.000148, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343188;", Line: 39, QueryTime: 0.000159, LockTime: 6.5e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343180;", Line: 44, QueryTime: 0.000152, LockTime: 6.3e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343011;", Line: 49, QueryTime: 0.000149, LockTime: 6.1e-05, RowsSent: 666, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342469;", Line: 54, QueryTime: 0.000153, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342465;", Line: 59, QueryTime: 0.000151, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342439;", Line: 64, QueryTime: 0.000148, LockTime: 6.1e-05, RowsSent: 1, RowsExamined: 731, Timestamp: 1690891201},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342389;", Line: 69, QueryTime: 0.000163, LockTime: 6.7e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201},
{FirstWord: "FLUSH", Query: "FLUSH SLOW LOGS;", Line: 19, QueryTime: 0.005047, LockTime: 0, RowsSent: 0, RowsExamined: 0, Timestamp: 1690891201, ConnectionID: 341291},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343272;", Line: 24, QueryTime: 0.000162, LockTime: 6.7e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select s1_0.id, s1_0.code, s1_0.token, s1_0.date from stores s1_0 where s1_0.id=11393;", Line: 29, QueryTime: 0.000583, LockTime: 0.000322, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780506},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343265;", Line: 34, QueryTime: 0.000148, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343188;", Line: 39, QueryTime: 0.000159, LockTime: 6.5e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343180;", Line: 44, QueryTime: 0.000152, LockTime: 6.3e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2343011;", Line: 49, QueryTime: 0.000149, LockTime: 6.1e-05, RowsSent: 666, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342469;", Line: 54, QueryTime: 0.000153, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342465;", Line: 59, QueryTime: 0.000151, LockTime: 6.2e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342439;", Line: 64, QueryTime: 0.000148, LockTime: 6.1e-05, RowsSent: 1, RowsExamined: 731, Timestamp: 1690891201, ConnectionID: 780496},
{FirstWord: "select", Query: "select m1_0.id, m1_0.name, m1_0.value, m1_0.date from items m1_0 where m1_0.id=2342389;", Line: 69, QueryTime: 0.000163, LockTime: 6.7e-05, RowsSent: 1, RowsExamined: 1, Timestamp: 1690891201, ConnectionID: 780496},
}
for i, expectedQuery := range expected {
query := queries[i]
require.Equal(t, expectedQuery, query, "Unexpected query at index %d", i)
}

require.Equal(t, expected, queries)
}
4 changes: 2 additions & 2 deletions go/data/typ.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import "strings"
type CmdType int

const (
QueryT CmdType = iota
SQLQuery CmdType = iota
Error
Skip
Unknown
Expand All @@ -38,7 +38,7 @@ const (
)

var commandMap = map[string]CmdType{ //nolint:gochecknoglobals // this is instead of a const
"query": QueryT,
"query": SQLQuery,
"error": Error,
"skip": Skip,
"skip_if_below_version": SkipIfBelowVersion,
Expand Down
Loading

0 comments on commit d4506d4

Please sign in to comment.