From 51eb05981d12df0daf190fe702e83c76646d7e2a Mon Sep 17 00:00:00 2001 From: Maximilian Hoffman Date: Fri, 7 Mar 2025 12:12:24 -0800 Subject: [PATCH] Sql debug cmd (#8920) * debug command * basic debugging working * type * bump * basic bats * fmt * license * fmt --- go/cmd/dolt/commands/debug.go | 438 ++++++++++++++++++++++++++++++ go/cmd/dolt/commands/sql.go | 2 + go/cmd/dolt/dolt.go | 1 + go/cmd/dolt/doltcmd/doltcmd.go | 1 + integration-tests/bats/debug.bats | 65 +++++ 5 files changed, 507 insertions(+) create mode 100644 go/cmd/dolt/commands/debug.go create mode 100644 integration-tests/bats/debug.bats diff --git a/go/cmd/dolt/commands/debug.go b/go/cmd/dolt/commands/debug.go new file mode 100644 index 0000000000..79339b90b7 --- /dev/null +++ b/go/cmd/dolt/commands/debug.go @@ -0,0 +1,438 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "archive/tar" + "bufio" + "bytes" + "compress/gzip" + "context" + "errors" + "fmt" + "io" + "io/fs" + "log" + "os" + "path/filepath" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/analyzer" + "github.com/pkg/profile" + "github.com/sirupsen/logrus" + textunicode "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" + + "github.com/dolthub/dolt/go/cmd/dolt/cli" + "github.com/dolthub/dolt/go/cmd/dolt/commands/engine" + "github.com/dolthub/dolt/go/cmd/dolt/errhand" + eventsapi "github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi/v1alpha1" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/utils/argparser" +) + +type DebugCmd struct { + VersionStr string +} + +const defaultDebugTime = 10 + +// The SQL shell installs its own signal handlers so that you can cancel a running query without ending the entire +// process +func (cmd DebugCmd) InstallsSignalHandlers() bool { + return true +} + +var _ cli.SignalCommand = DebugCmd{} + +// Name returns the name of the Dolt cli command. This is what is used on the command line to invoke the command +func (cmd DebugCmd) Name() string { + return "debug" +} + +// Description returns a description of the command +func (cmd DebugCmd) Description() string { + return "Run a query in profile and trace mode" +} + +func (cmd DebugCmd) Docs() *cli.CommandDocumentation { + ap := cmd.ArgParser() + return cli.NewCommandDocumentation(sqlDocs, ap) +} + +func (cmd DebugCmd) ArgParser() *argparser.ArgParser { + ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 0) + ap.SupportsString(QueryFlag, "q", "SQL query to run", "Runs a single query and exits.") + ap.SupportsString(FormatFlag, "r", "result output format", "How to format result output. Valid values are tabular, csv, json, vertical, and parquet. Defaults to tabular.") + ap.SupportsFlag(continueFlag, "c", "Continue running queries on an error. Used for batch mode only.") + ap.SupportsString(fileInputFlag, "f", "input file", "Execute statements from the file given.") + ap.SupportsInt(timeFlag, "t", "benchmark time", "Execute for at least time seconds.") + ap.SupportsString(outputFlag, "o", "output dir", "Result directory (Defaults to temporary director)") + return ap +} + +// EventType returns the type of the event to log +func (cmd DebugCmd) EventType() eventsapi.ClientEventType { + return eventsapi.ClientEventType_SQL +} + +// RequiresRepo indicates that this command does not have to be run from within a dolt data repository directory. +// In this case it is because this command supports the DataDirFlag which can pass in a directory. In the event that +// that parameter is not provided there is additional error handling within this command to make sure that this was in +// fact run from within a dolt data repository directory. +func (cmd DebugCmd) RequiresRepo() bool { + return false +} + +// Exec executes the command +// Unlike other commands, sql doesn't set a new working root directly, as the SQL layer updates the working set as +// necessary when committing work. +func (cmd DebugCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv, cliCtx cli.CliContext) int { + ap := cmd.ArgParser() + help, usage := cli.HelpAndUsagePrinters(cli.CommandDocsForCommandString(commandStr, sqlDocs, ap)) + apr, err := ap.Parse(args) + if err != nil { + if errors.Is(err, argparser.ErrHelp) { + help() + return 0 + } + return HandleVErrAndExitCode(errhand.VerboseErrorFromError(err), usage) + } + + // We need a username and password for many SQL commands, so set defaults if they don't exist + dEnv.Config.SetFailsafes(env.DefaultFailsafeConfig) + + format := engine.FormatTabular + if formatSr, ok := apr.GetValue(FormatFlag); ok { + var verr errhand.VerboseError + format, verr = GetResultFormat(formatSr) + if verr != nil { + return HandleVErrAndExitCode(verr, usage) + } + } + + // restrict LOAD FILE invocations to current directory + wd, err := os.Getwd() + if err != nil { + wd = "/dev/null" + } + err = sql.SystemVariables.AssignValues(map[string]interface{}{ + "secure_file_priv": wd, + }) + if err != nil { + return HandleVErrAndExitCode(errhand.VerboseErrorFromError(err), usage) + } + + queryist, sqlCtx, closeFunc, err := cliCtx.QueryEngine(ctx) + if err != nil { + return HandleVErrAndExitCode(errhand.VerboseErrorFromError(err), usage) + } + if closeFunc != nil { + defer closeFunc() + } + + sqlEng, ok := queryist.(*engine.SqlEngine) + if !ok { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("cannot run debug mode on a pre-existing server").Build(), usage) + } + + _, continueOnError := apr.GetValue(continueFlag) + outDir, outputDirSpecified := apr.GetValue(outputFlag) + if !outputDirSpecified { + outDir, err = os.MkdirTemp("", "dolt-debug-*") + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("couldn't create tempdir %s", err.Error()).Build(), usage) + } + } else { + err := os.Mkdir(outDir, os.ModePerm) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("failed to create output directory %s", err.Error()).Build(), usage) + } + } + + if outputDirSpecified { + defer func() { + file, err := os.Create(outDir + ".tar.gz") + if err != nil { + cli.Println("failed to create output file " + err.Error()) + return + } + defer file.Close() + + gzipWriter := gzip.NewWriter(file) + defer gzipWriter.Close() + + tarWriter := tar.NewWriter(gzipWriter) + defer tarWriter.Close() + + if err := filepath.WalkDir(outDir, func(path string, d fs.DirEntry, err error) error { + if d.IsDir() { + return nil + } + return addFileToTar(tarWriter, path) + }); err != nil { + cli.Println("failed to create output tar " + err.Error()) + } + + cli.Println("zipped results in: " + outDir + ".tar.gz") + }() + } + + runTime := apr.GetIntOrDefault(timeFlag, defaultDebugTime) + + var input io.Reader = os.Stdin + if query, queryOK := apr.GetValue(QueryFlag); queryOK { + input = bytes.NewBuffer([]byte(query)) + } else if fileInput, ok := apr.GetValue(fileInputFlag); ok { + input, err = os.OpenFile(fileInput, os.O_RDONLY, os.ModePerm) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("couldn't open file %s", fileInput).Build(), usage) + } + info, err := os.Stat(fileInput) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("couldn't get file size %s", fileInput).Build(), usage) + } + + input = transform.NewReader(input, textunicode.BOMOverride(transform.Nop)) + + // initialize fileReadProg global variable if there is a file to process queries from + fileReadProg = &fileReadProgress{bytesRead: 0, totalBytes: info.Size(), printed: 0, displayStrLen: 0} + defer fileReadProg.close() + } + + queryFile, err := os.Create(filepath.Join(outDir, "input.sql")) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("couldn't create file %s", err.Error()).Build(), usage) + } + defer queryFile.Close() + + input = bufio.NewReader(transform.NewReader(input, textunicode.BOMOverride(transform.Nop))) + _, err = io.Copy(queryFile, input) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("couldn't copy input sql %s", err.Error()).Build(), usage) + } + _, err = queryFile.Seek(0, 0) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.BuildDError("seek input sql %s", err.Error()).Build(), usage) + } + + err = debugAnalyze(sqlCtx, outDir, sqlEng, queryFile) + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.VerboseErrorFromError(err), usage) + } + + debugFile, err := os.Create(filepath.Join(outDir, "exec.txt")) + if err != nil { + if err != nil { + return sqlHandleVErrAndExitCode(queryist, errhand.VerboseErrorFromError(err), usage) + } + } + defer debugFile.Close() + + func() { + defer profile.Start(profile.ProfilePath(outDir), profile.CPUProfile).Stop() + cli.Println("starting cpu profile...") + + origStdout := cli.CliOut + origStderr := cli.CliErr + cli.CliOut = debugFile + cli.CliErr = debugFile + defer func() { + cli.CliOut = origStdout + cli.CliErr = origStderr + }() + + var done bool + wait := time.Tick(time.Duration(runTime) * time.Second) + for !done { + select { + case <-wait: + done = true + default: + execDebugMode(sqlCtx, sqlEng, queryFile, continueOnError, format) + } + } + }() + + func() { + defer profile.Start(profile.ProfilePath(outDir), profile.MemProfile).Stop() + cli.Println("starting mem profile...") + + origStdout := cli.CliOut + origStderr := cli.CliErr + cli.CliOut = debugFile + cli.CliErr = debugFile + defer func() { + cli.CliOut = origStdout + cli.CliErr = origStderr + }() + + var done bool + wait := time.Tick(time.Duration(runTime) * time.Second) + for !done { + select { + case <-wait: + done = true + default: + execDebugMode(sqlCtx, sqlEng, queryFile, continueOnError, format) + } + } + }() + + func() { + defer profile.Start(profile.ProfilePath(outDir), profile.TraceProfile).Stop() + cli.Println("starting trace profile...") + + origStdout := cli.CliOut + origStderr := cli.CliErr + cli.CliOut = debugFile + cli.CliErr = debugFile + defer func() { + cli.CliOut = origStdout + cli.CliErr = origStderr + }() + + var done bool + wait := time.Tick(time.Duration(runTime) * time.Second) + for !done { + select { + case <-wait: + done = true + default: + execDebugMode(sqlCtx, sqlEng, queryFile, continueOnError, format) + } + } + }() + + defer cli.Printf("debug results in: %s\n", outDir) + + return 0 +} + +func addFileToTar(tarWriter *tar.Writer, filePath string) error { + file, err := os.Open(filePath) + if err != nil { + return err + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + return err + } + + header, err := tar.FileInfoHeader(stat, stat.Name()) + if err != nil { + return err + } + + header.Name = filePath + err = tarWriter.WriteHeader(header) + if err != nil { + return err + } + + _, err = io.Copy(tarWriter, file) + return err +} + +func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sqlFile *os.File) error { + _, err := sqlFile.Seek(0, 0) + if err != nil { + return err + } + defer func() { + sqlFile.Seek(0, 0) + }() + + eng := sqlEng.GetUnderlyingEngine() + eng.Analyzer.Debug = true + defer func() { + eng.Analyzer.Debug = false + }() + analysisFile, err := os.Create(filepath.Join(tempDir, "analysis.txt")) + if err != nil { + return err + } + defer analysisFile.Close() + + planFile, err := os.Create(filepath.Join(tempDir, "plan.txt")) + if err != nil { + return err + } + defer planFile.Close() + planBuf := bufio.NewWriter(planFile) + + analyzer.SetOutput(analysisFile) + logrus.SetOutput(analysisFile) + log.SetOutput(analysisFile) + origStdout := os.Stdout + origStderr := os.Stderr + origCliErr := cli.CliErr + origCliOut := cli.CliOut + os.Stdout = analysisFile + os.Stderr = analysisFile + cli.CliErr = analysisFile + cli.CliOut = analysisFile + defer func() { + logrus.SetOutput(os.Stderr) + analyzer.SetOutput(os.Stderr) + log.SetOutput(os.Stderr) + os.Stdout = origStdout + os.Stderr = origStderr + cli.CliOut = origCliOut + cli.CliErr = origCliErr + }() + + scanner := NewStreamScanner(sqlFile) + var query string + for scanner.Scan() { + if fileReadProg != nil { + updateFileReadProgressOutput() + fileReadProg.setReadBytes(int64(len(scanner.Bytes()))) + } + query += scanner.Text() + if len(query) == 0 || query == "\n" { + continue + } + + cli.Println("analyzing: " + query[:min(len(query), 60)]) + + planned, err := eng.AnalyzeQuery(ctx, query) + if err != nil { + return err + } + + fmt.Fprintf(planBuf, "query: %s\n", query) + fmt.Fprintf(planBuf, "plan: \n%s", planned.String()) + fmt.Fprintf(planBuf, "debug plan: \n%s", sql.DebugString(planned)) + } + + return planBuf.Flush() +} + +func execDebugMode(ctx *sql.Context, qryist cli.Queryist, queryFile *os.File, continueOnErr bool, format engine.PrintResultFormat) error { + _, err := queryFile.Seek(0, 0) + if err != nil { + return err + } + defer func() { + queryFile.Seek(0, 0) + }() + input := bufio.NewReader(transform.NewReader(queryFile, textunicode.BOMOverride(transform.Nop))) + + return execBatchMode(ctx, qryist, input, continueOnErr, format) +} diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go index 4aa95c79b4..02f5735b60 100644 --- a/go/cmd/dolt/commands/sql.go +++ b/go/cmd/dolt/commands/sql.go @@ -93,6 +93,8 @@ const ( DefaultHost = "localhost" UseDbFlag = "use-db" ProfileFlag = "profile" + timeFlag = "time" + outputFlag = "output" welcomeMsg = `# Welcome to the DoltSQL shell. # Statements must be terminated with ';'. diff --git a/go/cmd/dolt/dolt.go b/go/cmd/dolt/dolt.go index 44dde399ec..83e2eb1f99 100644 --- a/go/cmd/dolt/dolt.go +++ b/go/cmd/dolt/dolt.go @@ -109,6 +109,7 @@ var commandsWithoutGlobalArgSupport = []cli.Command{ commands.VersionCmd{VersionStr: doltversion.Version}, commands.ConfigCmd{}, ci.Commands, + commands.DebugCmd{}, } // commands that do not need write access for the current directory diff --git a/go/cmd/dolt/doltcmd/doltcmd.go b/go/cmd/dolt/doltcmd/doltcmd.go index 1b00ad7d22..22814d2190 100644 --- a/go/cmd/dolt/doltcmd/doltcmd.go +++ b/go/cmd/dolt/doltcmd/doltcmd.go @@ -91,6 +91,7 @@ var doltSubCommands = []cli.Command{ commands.RebaseCmd{}, commands.ArchiveCmd{}, ci.Commands, + commands.DebugCmd{}, } var DoltCommand = cli.NewSubCommandHandler("dolt", "it's git for data", doltSubCommands) diff --git a/integration-tests/bats/debug.bats b/integration-tests/bats/debug.bats new file mode 100644 index 0000000000..2f827915ff --- /dev/null +++ b/integration-tests/bats/debug.bats @@ -0,0 +1,65 @@ +#!/usr/bin/env bats +load $BATS_TEST_DIRNAME/helper/common.bash +load $BATS_TEST_DIRNAME/helper/query-server-common.bash + +setup() { + skiponwindows "tests are flaky on Windows" + if [ "$SQL_ENGINE" = "remote-engine" ]; then + skip "This test tests remote connections directly, SQL_ENGINE is not needed." + fi + + setup_common + + TMPDIRS=$(pwd)/tmpdirs + mkdir -p $TMPDIRS/repo1 + + cd $TMPDIRS/repo1 + dolt init + + dolt sql <