diff --git a/.gitignore b/.gitignore index cc7c3d2..0f6e5e8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ docs tests/testthat/doltdb inst/doc dolt +.sqlhistory diff --git a/DESCRIPTION b/DESCRIPTION index 4443d27..61a13ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,7 @@ VignetteBuilder: Encoding: UTF-8 Language: en-US Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 SystemRequirements: dolt Collate: 'cli.R' diff --git a/R/cli.R b/R/cli.R index e95ee43..a8b1211 100644 --- a/R/cli.R +++ b/R/cli.R @@ -40,7 +40,8 @@ dolt_init <- function(dir = Sys.getenv("DOLT_DIR", "doltdb")) { #' Export data from a dolt database #' @param dir path to dolt database on-disk -#' @param format the export data format. One of `"sql"`, `"csv"`, or `"json"` +#' @param format the export data format. One of `"sql"`, `"csv"`, `"json"`, or +#' `"parquet"` #' @param out the location on-disk for export. In the case of `"sql"`, format, #' a single file path (default `doltdump.sql`), otherwise a directory for all #' tables to be dumped as separate files (default "doltdump") @@ -49,7 +50,7 @@ dolt_init <- function(dir = Sys.getenv("DOLT_DIR", "doltdb")) { #' @importFrom R.utils getAbsolutePath #' @return the path(s) of exported files #' @export -dolt_dump <- function(format = c("sql", "csv", "json"), +dolt_dump <- function(format = c("sql", "csv", "json", "parquet"), out = NULL, overwrite = FALSE, dir = Sys.getenv("DOLT_DIR", "doltdb")) { diff --git a/R/dolt-diffs.R b/R/dolt-diffs.R index 3f310f5..038054c 100644 --- a/R/dolt-diffs.R +++ b/R/dolt-diffs.R @@ -2,12 +2,14 @@ #' #' @param table [character] the name of a table in the database #' @param to commit to compare to +#' @param from commit to compare from #' @inheritParams dolt_branches #' @export #' @rdname dolt-diffs -dolt_diffs <- function(table, to, conn = dolt(), collect = NULL, show_sql = NULL) { +dolt_diffs <- function(table, to, from, conn = dolt(), collect = NULL, show_sql = NULL) { collect <- .collect(collect); show_sql <- .show_sql(show_sql) - query <- paste0("select * from dolt_commit_diff_", table, " WHERE to_commit = ", to) + query <- paste0("select * from dolt_commit_diff_", table, + " where to_commit='", to, "' and from_commit='", from, "'") dolt_query(query, conn, collect, show_sql) } diff --git a/R/dolt-local-connection.R b/R/dolt-local-connection.R index 8195e7e..9176566 100644 --- a/R/dolt-local-connection.R +++ b/R/dolt-local-connection.R @@ -129,14 +129,14 @@ setMethod("dbDisconnect", "DoltLocalConnection", function(conn, ...) { if (dbIsValid(conn) && ps_is_running(conn@server)) { # On disconnection, kill the server only if it was started by doltr and no - # no other processes connect to it + # no other processes connect to it. is_doltr_server <- isTRUE(ps_environ(conn@server)["R_DOLT"] == "1") procs <- ps() - procs <- procs[procs$status == "running" & procs$pid != ps_pid(ps_handle()),] + procs <- procs[(procs$status == "running" | procs$status == "sleeping") & procs$pid != ps_pid(ps_handle()),] procs <- procs[vapply(procs$ps_handle, function(x) { conns <- try(ps_connections(x), silent = TRUE) - out <- !inherits(conns, "try-error") && nrow(conns) && conn@port %in% conns$rport + out <- !inherits(conns, "try-error") && nrow(conns) && conn@port %in% conns$lport out }, logical(1)),] other_sessions <- as.logical(nrow(procs)) @@ -149,9 +149,9 @@ setMethod("dbDisconnect", "DoltLocalConnection", function(conn, ...) { } getMethod(dbDisconnect, "DoltConnection")(conn) - if (kill_server) - try(dkill(conn@server), silent = TRUE) - invisible(TRUE) + if (kill_server) { + try(dkill(conn@server), silent = T) + } }) #' @export @@ -159,6 +159,7 @@ setMethod("dbDisconnect", "DoltLocalConnection", function(conn, ...) { #' @rdname dolt_local setMethod("dbIsValid", "DoltLocalConnection", function(dbObj, ...) { valid <- getMethod(dbIsValid, "MariaDBConnection")(dbObj) && + class(try(dbGetQuery(dbObj, "SELECT 1"), silent = TRUE)) != "try-error" && ps_is_running(dbObj@server) if (!valid && inherits(dbObj@server, "ps_handle")) try(dkill(dbObj@server), silent = TRUE) diff --git a/R/dolt-nav.R b/R/dolt-nav.R index d8e5228..e8a8f31 100644 --- a/R/dolt-nav.R +++ b/R/dolt-nav.R @@ -17,9 +17,9 @@ dolt_checkout <- function(branch, b = FALSE, start_point = NULL, branch = sql_quote(branch, "'") if (b) branch <- paste0(sql_quote("-b", "'"), ", ", branch) if (!is.null(start_point)) branch <- paste0(branch, ", ", start_point) - query <- paste0("select dolt_checkout(", branch, ")") - dolt_query(query, conn, collect, show_sql) - invisible(dolt_state()) + query <- paste0("CALL dolt_checkout(", branch, ")") + dolt_call(query, conn, show_sql) + dolt_state() # I don't think this needs to be invisible. } #' @export diff --git a/R/dolt-remote.R b/R/dolt-remote.R index 6eb469c..8721702 100644 --- a/R/dolt-remote.R +++ b/R/dolt-remote.R @@ -1,6 +1,7 @@ #' Work with dolt repository remotes #' #' @param remote the name of the remote. "origin" is used by default +#' @param remote_branch the name of the remote branch to use with set_upstream. Current local branch is used by default #' @param ref the branch reference #' @param set_upstream whether to set the remote branch reference to track #' @param force whether to overwrite any conflicting history @@ -10,17 +11,20 @@ #' @rdname dolt-remote #' @family dolt-sql-commands #' @importFrom dbplyr sql_quote -dolt_push <- function(remote = NULL, ref = NULL, set_upstream = FALSE, - force = FALSE, conn = dolt(), collect = NULL, - show_sql = NULL) { +dolt_push <- function(remote = NULL, remote_branch = NULL, ref = NULL, + set_upstream = FALSE, force = FALSE, conn = dolt(), + collect = NULL, show_sql = NULL) { collect <- .collect(collect); show_sql <- .show_sql(show_sql) args <- character(0) + if (set_upstream & is.null (remote)) remote = "origin" + if (set_upstream & is.null (remote_branch)) remote_branch = sub(".*/", "", dolt_state()$head_ref) if (!is.null (remote)) args <- c(args, sql_quote(remote, "'")) + if (!is.null (remote_branch)) args <- c(args, sql_quote(remote_branch, "'")) if (!is.null (ref)) args <- c(args, sql_quote(ref, "'")) - if (set_upstream) args <- c("'--set-upstream'", args) + if (set_upstream) args <- c("'--set-upstream' ", args) if (force) args <- c(args, "'--force'") - query <- paste0("select dolt_push(", paste0(args, collapse = ", "), ")") - dolt_query(query, conn, collect, show_sql) + query <- paste0("call dolt_push(", paste0(args, collapse = ", "), ")") + dolt_call(query, conn, show_sql) invisible(dolt_state()) } @@ -35,8 +39,8 @@ dolt_pull <- function(remote = NULL, squash = FALSE, conn = dolt(), args <- "" if (!is.null(remote)) args <- c(args, sql_quote(remote, "'")) if (squash) args <- c(args, "'--squash'") - query <- paste0("select dolt_pull(", paste0(args, collapse = ", "), ")") - dolt_query(query, conn, collect, show_sql) + query <- paste0("call dolt_pull(", paste0(args, collapse = ", "), ")") + dolt_call(query, conn, show_sql) invisible(dolt_state()) } @@ -50,8 +54,8 @@ dolt_fetch <- function(remote = NULL, ref = FALSE, force = FALSE, if (!is.null(remote)) args <- c(args, sql_quote(remote, "'")) if (!is.null(ref)) args <- c(args, sql_quote(remote, "'")) if (force) args <- c(args, "'--force'") - query <- paste0("select dolt_fetch(", paste0(args, collapse = ", "), ")") - dolt_query(query, conn, collect, show_sql) + query <- paste0("call dolt_fetch(", paste0(args, collapse = ", "), ")") + dolt_call(query, conn, show_sql) invisible(dolt_state()) } diff --git a/R/dolt-stage-commit.R b/R/dolt-stage-commit.R index 15cf58d..db4f524 100644 --- a/R/dolt-stage-commit.R +++ b/R/dolt-stage-commit.R @@ -9,8 +9,8 @@ dolt_add <- function(tables = NULL, conn = dolt(), collect = NULL, show_sql = NU tables <- "'--all'" else tables <- paste0("'", tables, "'", collapse = ", ") - query <- paste0("select dolt_add(", tables, ")"); - dolt_query(query, conn, collect, show_sql) + query <- paste0("call dolt_add(", tables, ")"); + dolt_call(query, conn, show_sql) invisible(dolt_status()) } @@ -44,9 +44,11 @@ dolt_commit <- function(all = TRUE, message = NULL, author = NULL, date = NULL, if (!is.null(author)) args <- c(args, "'--author'", paste0("'", author, "'")) if (!is.null(date)) args <- c(args, "'--date'", paste0("'", date, "'")) if (allow_empty) args <- c(args, "'--allow-empty'") - query <- paste0("select dolt_commit(", paste0(args, collapse = ", "), ")"); - dolt_query(query, conn, collect, show_sql) - invisible(dolt_state()) + query <- paste0("call dolt_commit(", paste0(args, collapse = ", "), ")"); + dolt_call(query, conn, show_sql) + state <- dolt_state() + message(paste0("head commit: ", state$head)) + invisible(state) } #' @param hard Reset working and staged tables? If FALSE (default), a "soft" @@ -62,7 +64,7 @@ dolt_reset <- function(hard = FALSE, tables = NULL, conn = dolt(), args <- c() if (!is.null(tables)) args <- paste0(sql_quote(tables, "'"), collapse = ", ") if (hard) args <- c(sql_quote("--hard", "'"), args) - query <- paste0("select dolt_reset(", paste(args, collapse = ", ", ")")) - dolt_query(query, conn, collect, show_sql) + query <- paste0("call dolt_reset(", paste(args, collapse = ", ", ")")) + dolt_call(query, conn, show_sql) invisible(dolt_state()) } diff --git a/R/dolt-types.R b/R/dolt-types.R index 16bb440..49aa9c6 100644 --- a/R/dolt-types.R +++ b/R/dolt-types.R @@ -80,7 +80,7 @@ dolt_text_type <- function(obj, min_varchar, max_varchar) { #' @importFrom blob as_blob dolt_blob_type <- function(obj) { if (!all(vapply(obj, is.raw, logical(1)))) "Stop only lists of raw vectors (blobs) allowed" - nb <- max(vapply(obj, \(x) length(x), 1), 1, na.rm = TRUE) + nb <- max(vapply(obj, length, 1), 1, na.rm = TRUE) if (nb <= 65535) { return(structure("BLOB", max_size = 65535)) } else if (nb > 65535L && nb <= 16777215) { diff --git a/R/query.R b/R/query.R index e39a3ef..a29d309 100644 --- a/R/query.R +++ b/R/query.R @@ -9,7 +9,6 @@ dolt_query <- function(query, conn = dolt(), result } - .collect <- function(collect) { if (is.null(collect)) return(Sys.getboolenv("DOLT_COLLECT", TRUE)) @@ -23,3 +22,13 @@ dolt_query <- function(query, conn = dolt(), else return(show_sql) } + +dolt_call <- function(query, conn = dolt(), + show_sql = Sys.getboolenv("DOLT_VERBOSE", FALSE)) { + + query <- sql(query) + if (show_sql) message(query) + result <- RMariaDB::dbExecute(conn, query) + result +} + diff --git a/R/read-table.R b/R/read-table.R index 8ba8478..92de575 100644 --- a/R/read-table.R +++ b/R/read-table.R @@ -28,7 +28,13 @@ NULL #' @export #' @rdname dolt-read setMethod("dbReadTable", c("DoltConnection", "character"), - function(conn, name, as_of = NULL, ..., row.names = FALSE, check.names = TRUE) { + function(conn, name, + as_of = NULL, + ..., + row.names = FALSE, + check.names = TRUE, + show_sql = F) { + row.names <- compatRowNames(row.names) if ((!is.logical(row.names) && !is.character(row.names)) || length(row.names) != 1L) { @@ -39,11 +45,21 @@ setMethod("dbReadTable", c("DoltConnection", "character"), stopc("`check.names` must be a logical scalar") } - name <- dbQuoteIdentifier(conn, name) - query <- paste("SELECT * FROM ", name) - if (!is.null(as_of)) query <- query_as_of(query, as_of) + if (!is.null(as_of)) { + table_type <- dbGetTableType(conn, name, as_of) + if(!length(table_type)) warning("table does not exist at as_of commit") + name <- query_hash_qualified(conn, name, as_of) + } else { + name <- dbQuoteIdentifier(conn, name) + } + + query <- paste("SELECT * FROM", name) - out <- dbGetQuery(conn, query, + if(!is.null(as_of)) query <- paste(query, "AS OF", DBI::dbQuoteString(dolt(), as_of)) + if(show_sql) print(query) + + out <- DBI::dbGetQuery(conn, + query, row.names = row.names) if (check.names) { @@ -54,26 +70,36 @@ setMethod("dbReadTable", c("DoltConnection", "character"), } ) -query_as_of <- function(query, as_of) { +query_as_of <- function(name, as_of) { as_of <- tryCatch( paste0("TIMESTAMP('", as.character(as.POSIXct(as_of)), "')"), - error = function(e) paste("'", as_of, "'") + error = function(e) paste0("'", as_of, "'") ) - query <- paste0(query, " AS OF ", as_of) + name <- paste0(name, " AS OF ", as_of) + name +} + +query_hash_qualified <- function(conn, name, as_of) { + dbname <- dbGetQuery(conn, "select DATABASE()")[[1]] + name <- paste0("`", dbname, "/", as_of, "`.", name) + name } #' @export #' @rdname dolt-read setMethod("dbListTables", "DoltConnection", function(conn, as_of = NULL, ...) { - # DATABASE(): https://stackoverflow.com/a/8096574/946850 - query <- paste0("SELECT table_name FROM INFORMATION_SCHEMA.tables\n", - "WHERE table_schema = DATABASE()") - if (!is.null(as_of)) query <- query_as_of(query, as_of) - - dbGetQuery(conn, query)[[1]] - + query <- 'show full tables' + if(!is.null(as_of)) query <- paste0(query, " as of '", as_of, "'") + out <- RMariaDB::dbGetQuery(conn, query) + out[[1]] }) +dbGetTableType <- function(conn, name, as_of = NULL) { + query <- 'show full tables' + if(!is.null(as_of)) query <- paste0(query, " as of '", as_of, "'") + out <- RMariaDB::dbGetQuery(conn, query) + out[out[,1] == name, 2] +} #' @export #' @inheritParams DBI::dbListObjects diff --git a/R/server.R b/R/server.R index 8e2f6fd..a47e5e3 100644 --- a/R/server.R +++ b/R/server.R @@ -20,7 +20,7 @@ #' to `std_out()`, if `NULL` (default), it is suppressed. Can also take #' a filename. See [processx::run()]. #' @param timeout Defines the timeout, in seconds, used for connections -#' A value of `0` represents an infinite timeout (default `28800000`) +#' (default `28800000`) #' @param query_parallelism Set the number of go routines spawned to handle each #' query (default `2`) #' @param max_connections Set the number of connections handled by the server @@ -44,7 +44,7 @@ dolt_server <- function(dir = Sys.getenv("DOLT_DIR", "doltdb"), read_only = FALSE, log_level = "info", log_out = NULL, - timeout = 0, + timeout = 28800000, query_parallelism = 2, max_connections = 100, config_file = Sys.getenv("DOLT_CONFIG_FILE", "")) { @@ -90,6 +90,7 @@ dolt_server <- function(dir = Sys.getenv("DOLT_DIR", "doltdb"), stdout = log_out, stderr = "2>&1", env = c("current", R_DOLT=1), supervise = FALSE, cleanup = FALSE, cleanup_tree = FALSE) + p <- proc$as_ps_handle() rm(proc) @@ -131,7 +132,7 @@ setOldClass("dolt_server") #' @importFrom ps ps ps_connections ps_cwd ps_environ ps_cmdline dolt_server_find <- function(dir = NULL, port = NULL, doltr_only = FALSE) { dp <- ps() - dp <- dp[dp$name == "dolt" & dp$status == "running",] + dp <- dp[dp$name == "dolt" & (dp$status == "running" | dp$status == "sleeping"),] if (nrow(dp)) dp <- dp[vapply(dp$ps_handle, function(x) { isTRUE(try(ps_cmdline(x)[2], silent = TRUE) == "sql-server") @@ -163,18 +164,35 @@ dolt_server_kill <- function(dir = NULL, port = NULL, doltr_only = FALSE, verbos dp <- dolt_server_find(dir, port, doltr_only) lapply(dp$ps_handle, dkill) if (verbose) message(nrow(dp), " processes killed") + + # dolt now uses a lock file which needs to be cleaned up after process killed + # This might be a little complicated with multi_db = T invisible(dp) } #' @importFrom ps signals ps_terminate ps_kill dkill <- function(p = ps_handle) { + # We should prefer SIGTERM over SIGKILL when possible + # is.null(ps::signals()$SIGTERM)) asks if SIGTERM + # is NOT available. if (is.null(ps::signals()$SIGTERM)) { - ps_terminate(p) - } else { + # If SIGTERM signal is NOT available kill the + # process and clean up the lock file manually. ps_kill(p) + unlink(paste0(ps::ps_cwd(p), "/.dolt/sql-server.lock")) + } else { + ps_terminate(p) # sql-server.lock should be cleaned up automatically. Just in case though... + unlink(paste0(ps::ps_cwd(p), "/.dolt/sql-server.lock")) } + invisible(NULL) } - +# +# is_dolt_server_valid <- function(srv) { +# dbConnect(dolt_remote(), dbname = basename(ps_cwd(srv)), username = username, +# password = password, host = host, port = ps_connections(srv)$lport, +# autocommit = autocommit, ...) +# } +# diff --git a/R/utils.R b/R/utils.R index ead9bce..5b8109e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -121,3 +121,8 @@ as_table <- function(schema, table) { args <- args[!is.na(args) & args != ""] do.call(Id, as.list(args)) } + +compact <- function(x) { + is_empty <- vapply(x, function(x) length(x) == 0, logical(1)) + x[!is_empty] +} diff --git a/doltr.Rproj b/doltr.Rproj index 517abe7..70ecf7a 100644 --- a/doltr.Rproj +++ b/doltr.Rproj @@ -17,5 +17,6 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes +PackageCleanBeforeInstall: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace diff --git a/man/dolt-diffs.Rd b/man/dolt-diffs.Rd index a21b2de..e5c4aaf 100644 --- a/man/dolt-diffs.Rd +++ b/man/dolt-diffs.Rd @@ -5,7 +5,7 @@ \alias{dolt_table_history} \title{Examine information about dolt tables and diffs} \usage{ -dolt_diffs(table, to, conn = dolt(), collect = NULL, show_sql = NULL) +dolt_diffs(table, to, from, conn = dolt(), collect = NULL, show_sql = NULL) dolt_table_history(table, conn = dolt(), collect = NULL, show_sql = NULL) } @@ -14,6 +14,8 @@ dolt_table_history(table, conn = dolt(), collect = NULL, show_sql = NULL) \item{to}{commit to compare to} +\item{from}{commit to compare from} + \item{conn}{the database connection} \item{collect}{whether to collect the result into R or return a \code{\link[dbplyr:tbl_lazy]{dbplyr::tbl_lazy()}} diff --git a/man/dolt-remote.Rd b/man/dolt-remote.Rd index 69b1ca1..a7749c7 100644 --- a/man/dolt-remote.Rd +++ b/man/dolt-remote.Rd @@ -9,6 +9,7 @@ \usage{ dolt_push( remote = NULL, + remote_branch = NULL, ref = NULL, set_upstream = FALSE, force = FALSE, @@ -44,6 +45,8 @@ dolt_clone( \arguments{ \item{remote}{the name of the remote. "origin" is used by default} +\item{remote_branch}{the name of the remote branch to use with set_upstream. Current local branch is used by default} + \item{ref}{the branch reference} \item{set_upstream}{whether to set the remote branch reference to track} diff --git a/man/dolt_dump.Rd b/man/dolt_dump.Rd index 7abdd91..30aea17 100644 --- a/man/dolt_dump.Rd +++ b/man/dolt_dump.Rd @@ -5,14 +5,15 @@ \title{Export data from a dolt database} \usage{ dolt_dump( - format = c("sql", "csv", "json"), + format = c("sql", "csv", "json", "parquet"), out = NULL, overwrite = FALSE, dir = Sys.getenv("DOLT_DIR", "doltdb") ) } \arguments{ -\item{format}{the export data format. One of \code{"sql"}, \code{"csv"}, or \code{"json"}} +\item{format}{the export data format. One of \code{"sql"}, \code{"csv"}, \code{"json"}, or +\code{"parquet"}} \item{out}{the location on-disk for export. In the case of \code{"sql"}, format, a single file path (default \code{doltdump.sql}), otherwise a directory for all diff --git a/man/dolt_server.Rd b/man/dolt_server.Rd index 531013d..6b7987e 100644 --- a/man/dolt_server.Rd +++ b/man/dolt_server.Rd @@ -17,7 +17,7 @@ dolt_server( read_only = FALSE, log_level = "info", log_out = NULL, - timeout = 0, + timeout = 28800000, query_parallelism = 2, max_connections = 100, config_file = Sys.getenv("DOLT_CONFIG_FILE", "") @@ -59,7 +59,7 @@ to \code{std_out()}, if \code{NULL} (default), it is suppressed. Can also take a filename. See \code{\link[processx:run]{processx::run()}}.} \item{timeout}{Defines the timeout, in seconds, used for connections -A value of \code{0} represents an infinite timeout (default \code{28800000})} +(default \code{28800000})} \item{query_parallelism}{Set the number of go routines spawned to handle each query (default \code{2})} diff --git a/vignettes/doltr.Rmd b/vignettes/doltr.Rmd new file mode 100644 index 0000000..7c6ec98 --- /dev/null +++ b/vignettes/doltr.Rmd @@ -0,0 +1,182 @@ +--- +title: "Getting Started with Doltr" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Getting Started with Doltr} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +`{doltr}` is a package to interface with [Dolt](https://www.dolthub.com), an +SQL database with git-like versioning. + +# Installation + +You will need the **dolt** command-line utility installed on your computer to use `{doltr}`. Installation instructions +for Linux, macOS, and Windows can be found [here](https://docs.dolthub.com/getting-started/installation). + +Install the R package with + +``` r +remotes::install_github("ecohealthalliance/doltr") +``` + +# Usage + +`{doltr}` package provides two [DBI-compliant drivers](https://github.com/r-dbi/DBI#dbi) +to connect to a dolt database `dolt_remote()` connects to a dolt server via TCP, +and is a thin wrapper around the [RMariaDB](https://rmariadb.r-dbi.org/) package +because Dolt shares a communication protocol with MySQL and MariaDB. +`dolt_local()` connects to a Dolt database directory locally on-disk. Behind the +scenes `dolt_local()` launches and manages a background server process, which +can also be done manually with `dolt_server()`. Both can be used to connect to +a database as you would with other DBI packages: + + +``` +library(doltr) +remote_conn <- DBI::dbConnect(dolt_remote(), dname = "dbname", username = "user", ...) +``` + +``` +local_conn <- DBI::dbConnect(dolt_local(), dir = "/path/to/my/dolt/db/directory") +``` + +Since Dolt has git-like versioning concepts, `{doltr}`'s API design includes +both components drawn from `{DBI}` and also from git interfaces like `{gert}` +and `{git2r}` (as well as Dolt's [command-line interface](https://docs.dolthub.com/interfaces/cli)). + + +`{doltr}` has the concept of a "default database" for a project. When +working with git (or `{git2r}` or `{gert}`), commands apply to the current +working directory by default. Similarly, with `{doltr}`, many commands use +the default database. The default database is set with the environment +directory `DOLT_DIR`, which is `doltdb` by default. +For a project you might set `DOLT_DIR` in a project-level `.Renviron` or +[`.env` file](https://cran.r-project.org/web/packages/dotenv/) + +To explore `{doltr}`'s capabilities, let's pull an existing database. `dolt_clone()`, +like `git clone` clones a database to a local directory, using [`DoltHub`](https://www.dolthub.com/discover) +as the default remote source (though [dolt Database remotes can be hosted elsewhere](https://www.dolthub.com/discover)). +We'll clone [`doltr/nycflights`](https://www.dolthub.com/repositories/doltr/nycflights), which contains a subset of +the data from the [{`nycflights13`} package](https://nycflights13.tidyverse.org/). + +```{r cleanfirst, include = FALSE} +if (dir.exists("nycflights")) unlink("nycflights", force = TRUE, recursive = TRUE) +``` + +```{r clone-flights} +library(doltr) +dolt_clone("doltr/nycflights") +``` + +This creates an `nycflights` directory. Let's set it as our default database for +this session: + +```{r setenv} +Sys.setenv(DOLT_DIR="nycflights") +``` + +You can use the `dolt()` function to connect to the database. `dolt()` is a +shortcut for `dbConnect(dolt_local/dolt_remote(), ...)`. It also caches the database +connection, so it can be called repeatedly in place of a connection variable. +`dolt()` is also the default argument for a database connection in many functions. + +Running `dolt()` prints a summary of the database state: + +```{r doltcmd} +dolt() +``` + +You can use `dolt()` with `{DBI}` or `{dbplyr}` functions to read from or write +to the database: + +```{r pkgs, message=FALSE} +library(DBI) +library(dbplyr) +library(dplyr) +``` + +```{r dbi} +dbListTables(dolt()) +dbReadTable(dolt(), "airlines") +``` + +```{r dbplyr} +tbl(dolt(), "flights") %>% + filter(origin == "EWR", dest == "MDW") %>% + head() %>% + collect() +``` + +```{r writetbl} +dbWriteTable(dolt(), "mtcars", mtcars) +``` + +With the last command, we changed the database by adding a table. This is +reflected in the change to the database working state when we print `dolt()` + +```{r state} +dolt() +``` + +The summary no longer says "Working database clean" but shows that the _working state_ +of the database now includes a new table called `mtcars`. As with a new file in +a git repository, we can _stage_ this table for comitting, with `dolt_add()`. +Rather than printing the whole database summary, we can get just the last +bullet with `dolt_status()` + +```{r} +dolt_add("mtcars") +dolt_status() +``` + +`dolt_status()` pretty-prints but actually yields a table of working or stages +changes to the database: + +```{r} +as.data.frame(dolt_status()) +``` + +With the table staged, we can commit it to the database and provide a message: + +```{r} +dolt_commit(message = "Add mtcars table") +dolt_status() +dolt_last_commit() +``` + +# Exploring Dolt history + +You can view the commit history of the database with `dolt_log()`, which collects +the Dolt system table `dolt_log`: + +```{r} +dolt_log() +``` + +# + +## The Connection Pane + +For RStudio users, `{doltr}` provides a [connection pane](https://db.rstudio.com/tooling/connections/) with which you can explore the database. + + +```{r, eval=FALSE} +dolt_pane() +``` + +`{doltr}`'s connection pane shows information about the versioning state of your +database in addition to your tables, the Dolt system tables and the database +information schema. + + + +[![Created by EcoHealth Alliance](figures/eha-footer.png)](https://www.ecohealthalliance.org/)