tidymodels · simonpcouch · Apr 25, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,18 +6,26 @@ Authors@R: c(
     person("Hannah", "Frick", , "[email protected]", role = "aut"),
     person("Emil", "HvitFeldt", , "[email protected]", role = "aut"),
     person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre")),
-    person(given = "Posit Software, PBC", role = c("cph", "fnd"))
+    person("Posit Software, PBC", role = c("cph", "fnd"))
   )
 Description: Sandbox for a postprocessor object.
 License: MIT + file LICENSE
+URL: https://github.com/tidymodels/container
+BugReports: https://github.com/tidymodels/container/issues
+Imports: 
+    cli,
+    dplyr,
+    generics,
+    hardhat,
+    probably,
+    purrr,
+    rlang (>= 1.1.0),
+    tibble,
+    tidyselect
 Suggests: 
+    modeldata,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
-URL: https://github.com/tidymodels/container
-BugReports: https://github.com/tidymodels/container/issues
-Imports: 
-    cli,
-    rlang (>= 1.1.0)
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,63 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(fit,container)
+S3method(fit,equivocal_zone)
+S3method(fit,numeric_calibration)
+S3method(fit,numeric_range)
+S3method(fit,predictions_custom)
+S3method(fit,probability_calibration)
+S3method(fit,probability_threshold)
+S3method(predict,container)
+S3method(predict,equivocal_zone)
+S3method(predict,numeric_calibration)
+S3method(predict,numeric_range)
+S3method(predict,predictions_custom)
+S3method(predict,probability_calibration)
+S3method(predict,probability_threshold)
+S3method(print,container)
+S3method(print,equivocal_zone)
+S3method(print,numeric_calibration)
+S3method(print,numeric_range)
+S3method(print,predictions_custom)
+S3method(print,probability_calibration)
+S3method(print,probability_threshold)
+S3method(required_pkgs,equivocal_zone)
+S3method(required_pkgs,numeric_calibration)
+S3method(required_pkgs,numeric_range)
+S3method(required_pkgs,predictions_custom)
+S3method(required_pkgs,probability_calibration)
+S3method(required_pkgs,probability_threshold)
+S3method(tunable,equivocal_zone)
+S3method(tunable,numeric_calibration)
+S3method(tunable,numeric_range)
+S3method(tunable,predictions_custom)
+S3method(tunable,probability_calibration)
+S3method(tunable,probability_threshold)
+export("%>%")
+export(adjust_equivocal_zone)
+export(adjust_numeric_calibration)
+export(adjust_numeric_range)
+export(adjust_predictions_custom)
+export(adjust_probability_calibration)
+export(adjust_probability_threshold)
+export(container)
+export(extract_parameter_dials)
+export(extract_parameter_set_dials)
+export(fit)
+export(required_pkgs)
+export(tidy)
+export(tunable)
+export(tune_args)
 import(rlang)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_inform)
 importFrom(cli,cli_warn)
+importFrom(dplyr,"%>%")
+importFrom(generics,fit)
+importFrom(generics,required_pkgs)
+importFrom(generics,tidy)
+importFrom(generics,tunable)
+importFrom(generics,tune_args)
+importFrom(hardhat,extract_parameter_dials)
+importFrom(hardhat,extract_parameter_set_dials)
+importFrom(stats,predict)
diff --git a/R/container-package.R b/R/container-package.R
@@ -1,8 +1,12 @@
 #' @import rlang
 #' @importFrom cli cli_abort cli_warn cli_inform
+#' @importFrom stats predict
 #' @keywords internal
 "_PACKAGE"
 
 ## usethis namespace: start
+utils::globalVariables("data")
 ## usethis namespace: end
 NULL
+
+
diff --git a/R/container.R b/R/container.R
@@ -0,0 +1,174 @@
+#' Declare post-processing for model predictions
+#'
+#' @param mode The model's mode, one of `"unknown"`, `"classification"`, or
+#' `"regression"`. Modes of `"censored regression"` are not currently supported.
+#' @param type The model sub-type. Possible values are `"unknown"`, `"regression"`,
+#' `"binary"`, or `"multiclass"`.
+#' @param outcome The name of the outcome variable.
+#' @param estimate The name of the point estimate (e.g. predicted class)
+#' @param probabilities The names of class probability estimates (if any). For
+#' classification, these should be given in the order of the factor levels of
+#' the `estimate`.
+#' @param time The name of the predicted event time. (not yet supported)
+#' @param call The call to be displayed in warnings or errors.
+#' @examples
+#'
+#' container()
+#' @export
+container <- function(mode = "unknown", type = "unknown", outcome = character(0),
+                      estimate = character(0), probabilities = character(0),
+                      time = character(0), call = rlang::current_env()) {
+  dat <-
+    list(
+      outcome = outcome,
+      type = type,
+      estimate = estimate,
+      probabilities = probabilities,
+      time = time
+    )
+  new_container(
+    mode,
+    type,
+    operations = list(),
+    columns = dat,
+    ptype = tibble::tibble(),
+    call = call
+  )
+}
+
+new_container <- function(mode, type, operations, columns, ptype, call) {
+  mode <- rlang::arg_match0(mode, c("unknown", "regression", "classification", "censored regression"))
+
+  if ( mode == "regression" ) {
+    type <- "regression"
+  }
+
+  type <- rlang::arg_match0(type, c("unknown", "regression", "binary", "multiclass"))
+
+  if ( !is.list(operations) ) {
+    cli::cli_abort("The {.arg operations} argument should be a list.", call = call)
+  }
+
+  is_oper <- purrr::map_lgl(operations, ~ inherits(.x, "operation"))
+  if ( length(is_oper) > 0 & !any(is_oper) ) {
+    bad_oper <- names(is_oper)[!is_oper]
+    cli::cli_abort("The following {.arg operations} do not have the class \\
+                   {.val operation}: {bad_oper}.", call = call)
+  }
+
+  # validate operation order and check duplicates
+  validate_oper_order(operations, mode, call)
+
+
+  # check columns
+
+  res <- list(mode = mode, type = type, operations = operations,
+              columns = columns, ptype = ptype)
+  class(res) <- "container"
+  res
+}
+
+#' @export
+print.container <- function(x, ...) {
+  # todo emulate Emil's recipe printing
+
+  num_op <- length(x$operations)
+  cli::cli_inform("{x$type} post-processing object with {num_op} operation{?s}")
+
+  if (num_op > 0) {
+    cat("\n")
+    res <- purrr::map(x$operations, ~ print(.x))
+  }
+
+  invisible(x)
+}
+
+
+# ------------------------------------------------------------------------------
+
+#' @export
+fit.container <- function(object, .data, outcome, estimate, probabilities = c(),
+                          time = c(), call = rlang::current_env(), ...) {
+
+  # ------------------------------------------------------------------------------
+  # set columns via tidyselect
+
+  dat <- list()
+  dat$outcome <- names(tidyselect::eval_select(rlang::enquo(outcome), .data))
+  dat$estimate <- names(tidyselect::eval_select(rlang::enquo(estimate), .data))
+
+  probabilities <- tidyselect::eval_select(rlang::enquo(probabilities), .data)
+  if (length(probabilities) > 0) {
+    dat$probabilities <- names(probabilities)
+  } else {
+    dat$probabilities <- character(0)
+  }
+
+  time <- tidyselect::eval_select(rlang::enquo(time), .data)
+  if (length(time) > 0) {
+    dat$time <- names(time)
+  }    else {
+    dat$time <- character(0)
+  }
+
+  .data <- .data[, names(.data) %in% unlist(dat)]
+  .data <- tibble::as_tibble(.data)
+  ptype <- .data[0,]
+
+
+
+  object <- set_container_type(object, .data[[ dat$outcome ]])
+
+  object <- new_container(object$mode, object$type,
+                          operations = object$operations,
+                          columns = dat, ptype = ptype, call = call)
+
+  # ------------------------------------------------------------------------------
+
+  num_oper <- length(object$operations)
+  for (op in 1:num_oper) {
+    object$operations[[op]] <- fit(object$operations[[op]], data, object)
+    .data <- predict(object$operations[[op]], .data, object)
+  }
+
+  # todo Add a fitted container class?
+  object
+}
+
+#' @export
+predict.container <- function(object, new_data, ...) {
+
+  # validate levels/classes
+  num_oper <- length(object$operations)
+  for (op in 1:num_oper) {
+    new_data <- predict(object$operations[[op]], new_data, object)
+  }
+  tibble::as_tibble(new_data)
+}
+
+set_container_type <- function(object, y) {
+  if (object$type != "unknown") {
+    return(object)
+  }
+  if (is.factor(y)) {
+    lvls <- levels(y)
+    if (length(lvls) == 2) {
+      object$type <- "binary"
+    } else {
+      object$type <- "multiclass"
+    }
+  } else if (is.numeric(y)) {
+    object$type <- "regression"
+  } else {
+    cli::cli_abort("Only factor and numeric outcomes are currently supported.")
+  }
+  object
+}
+
+# todo: where to validate #levels?
+# todo setup eval_time
+# todo missing methods:
+# todo tune_args
+# todo tidy
+# todo extract_parameter_set_dials
+
diff --git a/R/equivocal_zone.R b/R/equivocal_zone.R
@@ -0,0 +1,110 @@
+#' Apply an equivocal zone to a binary classification model.
+#'
+#' @param x A [container()].
+#' @param value A numeric value (between zero and 1/2) or [hardhat::tune()]. The
+#' value is the size of the buffer around the threshold.
+#' @param threshold A numeric value (between zero and one) or [hardhat::tune()].
+#' @examples
+#' library(dplyr)
+#' library(modeldata)
+#'
+#' post_obj <-
+#'   container(mode = "classification") %>%
+#'   adjust_equivocal_zone(value = 1 / 4)
+#'
+#'
+#' post_res <- fit(
+#'   post_obj,
+#'   two_class_example,
+#'   outcome = c(truth),
+#'   estimate = c(predicted),
+#'   probabilities = c(Class1, Class2)
+#' )
+#'
+#' predict(post_res, two_class_example)
+#' @export
+adjust_equivocal_zone <- function(x, value = 0.1, threshold = 1 / 2) {
+
+  if ( !is_tune(value) ) {
+    check_number_decimal(value, min = 0, max = 1 / 2)
+  }
+  if ( !is_tune(threshold) ) {
+    check_number_decimal(threshold, min = 10^-10, max = 1 - 10^-10)
+  }
+
+  op <-
+    new_operation(
+      "equivocal_zone",
+      inputs = "probability",
+      outputs = "class",
+      arguments = list(value = value, threshold = threshold),
+      results = list(trained = FALSE)
+    )
+
+  new_container(
+    mode = x$mode,
+    type = x$type,
+    operations = c(x$operations, list(op)),
+    columns = x$dat,
+    ptype = x$ptype,
+    call = rlang::current_env()
+  )
+}
+
+#' @export
+print.equivocal_zone <- function(x, ...) {
+  # check for tune() first
+
+  if ( is_tune(x$arguments$value) ) {
+    cli::cli_inform("Add equivocal zone to optimized value.")
+  } else {
+    trn <- ifelse(x$results$trained, " [trained]", "")
+    cli::cli_inform(c("Add equivocal zone of size   \\
+                    {signif(x$arguments$value, digits = 3)}{trn}"))
+  }
+  invisible(x)
+}
+
+#' @export
+fit.equivocal_zone <- function(object, data, parent = NULL, ...) {
+  new_operation(
+    class(object),
+    inputs = object$inputs,
+    outputs = object$outputs,
+    arguments = object$arguments,
+    results = list(trained = TRUE)
+  )
+}
+
+#' @export
+predict.equivocal_zone <- function(object, new_data, parent, ...) {
+  est_nm <- parent$columns$estimate
+  prob_nm <- parent$columns$probabilities[1]
+  lvls <- levels(new_data[[ est_nm ]])
+  col_syms <- rlang::syms(prob_nm[1])
+  cls_pred <- probably::make_two_class_pred(new_data[[prob_nm]], levels = lvls,
+                                            buffer = object$arguments$value,
+                                            threshold = object$arguments$threshold)
+  new_data[[ est_nm ]] <- cls_pred # todo convert to factor?
+  new_data
+}
+
+#' @export
+required_pkgs.equivocal_zone <- function(x, ...) {
+  c("container", "probably")
+}
+
+#' @export
+tunable.equivocal_zone <- function(x, ...) {
+  tibble::tibble(
+    name = "buffer",
+    call_info = list(list(pkg = "dials", fun = "buffer")),
+    source = "container",
+    component = "equivocal_zone",
+    component_id = "equivocal_zone")
+}
+
+# todo missing methods:
+# todo tune_args
+# todo tidy
+# todo extract_parameter_set_dials