Skip to content

Commit

Permalink
Merge pull request #138 from nt-williams/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
nt-williams authored Jun 26, 2024
2 parents 432551c + 5dfc790 commit d16cb0f
Show file tree
Hide file tree
Showing 39 changed files with 870 additions and 460 deletions.
3 changes: 3 additions & 0 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Version: 1.4.0
Date: 2024-06-13 18:14:55 UTC
SHA: 2452cb048b6037b7a1ff84e89ec102ef6883ca62
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: lmtp
Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies
Version: 1.3.3
Version: 1.4.0
Authors@R:
c(person(given = "Nicholas",
family = "Williams",
Expand All @@ -14,7 +14,7 @@ Authors@R:
comment = c(ORCID = "0000-0001-9056-2047")))
Description: Non-parametric estimators for casual effects based on longitudinal modified treatment
policies as described in Diaz, Williams, Hoffman, and Schenck <doi:10.1080/01621459.2021.1955691>, traditional point treatment,
and traditional longitudinal effects. Continuous, binary, and categorical treatments are allowed as well are
and traditional longitudinal effects. Continuous, binary, categorical treatments, and multivariate treatments are allowed as well are
censored outcomes. The treatment mechanism is estimated via a density ratio classification procedure
irrespective of treatment variable type. For both continuous and binary outcomes, additive treatment effects
can be calculated and relative risks and odds ratios may be calculated for binary outcomes.
Expand All @@ -24,7 +24,7 @@ License: AGPL-3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
RoxygenNote: 7.3.1
Imports:
stats,
nnls,
Expand All @@ -36,7 +36,8 @@ Imports:
progressr,
data.table (>= 1.13.0),
checkmate (>= 2.1.0),
SuperLearner
SuperLearner,
schoolmath
URL: http://beyondtheate.com, https://github.com/nt-williams/lmtp
BugReports: https://github.com/nt-williams/lmtp/issues
Suggests:
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ S3method(print,lmtp_contrast)
S3method(tidy,lmtp)
export(create_node_list)
export(event_locf)
export(ipsi)
export(lmtp_contrast)
export(lmtp_control)
export(lmtp_ipw)
export(lmtp_sdr)
export(lmtp_sub)
Expand All @@ -31,6 +33,7 @@ importFrom(stats,predict)
importFrom(stats,qlogis)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,var)
importFrom(stats,weighted.mean)
18 changes: 18 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# lmtp 1.4.0

### New Features

- Can now estimate the effects of simultaneous interventions on multiple variables.
- New pre-packaged shift function, `ipsi()` for estimating IPSI effects using the risk ratio.
- `lmtp_control()` now replaces extra estimator arguments.

### Bug Fixes

- Standard errors now incorporate survey weights (see issue \#134).
- Bug fix when shift is NULL and data is a tibble (see issue \#137)

### General

- The `intervention_type` argument has been fully deprecated.
- Now attempting to detect intervention type errors (see issue \#98).

# lmtp 1.3.3

### New Features
Expand Down
63 changes: 60 additions & 3 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ check_lmtp_data <- function(x, trt, outcome, baseline, time_vary, cens, id) {
for (t in 1:determine_tau(outcome, trt)) {
ci <- censored(x, cens, t)$j
di <- at_risk(x, risk_indicators(outcome), t, TRUE)
trt_t <- ifelse(length(trt) == 1, trt, trt[t])
if (length(trt) > 1) {
trt_t <- trt[[t]]
} else {
trt_t <- trt[[1]]
}
data_t <- x[ci & di, c(trt_t, baseline, unlist(time_vary[t])), drop = FALSE]

if (any(is.na(data_t))) {
Expand All @@ -15,6 +19,14 @@ check_lmtp_data <- function(x, trt, outcome, baseline, time_vary, cens, id) {

assertLmtpData <- checkmate::makeAssertionFunction(check_lmtp_data)

assert_trt <- function(trt, tau) {
is_list <- is.list(trt)
if (!isTRUE(is_list)) {
return(assertTrtCharacter(trt, tau))
}
assertTrtList(trt, tau)
}

check_trt_character <- function(trt, tau) {
is_character <- checkmate::check_character(trt)
if (!isTRUE(is_character)) {
Expand All @@ -30,6 +42,21 @@ check_trt_character <- function(trt, tau) {

assertTrtCharacter <- checkmate::makeAssertionFunction(check_trt_character)

check_trt_list <- function(trt, tau) {
is_list <- checkmate::check_list(trt)
if (!isTRUE(is_list)) {
return(is_list)
}

if (length(trt) != 1 && length(trt) != tau) {
return(paste0("'trt' should be of length 1 or ", tau))
}

TRUE
}

assertTrtList <- checkmate::makeAssertionFunction(check_trt_list)

check_reserved_names <- function(x) {
bad_names <- c("lmtp_id", "tmp_lmtp_stack_indicator", "tmp_lmtp_scaled_outcome") %in% names(x)
if (!any(bad_names)) {
Expand Down Expand Up @@ -135,12 +162,42 @@ assertDr <- checkmate::makeAssertionFunction(check_dr)

check_ref_class <- function(x) {
if (!is.lmtp(x)) {
is_num <- checkmate::check_number(x)
if (!isTRUE(is_num)) {
is_num <- checkmate::test_number(x)
if (isFALSE(is_num)) {
return("Must either be a single numeric value or another lmtp object")
}
}
TRUE
}

assertRefClass <- checkmate::makeAssertionFunction(check_ref_class)

check_trt_type <- function(data, trt, mtp) {
is_decimal <- vector("logical", length(trt))
for (i in seq_along(trt)) {
a <- data[[trt[i]]]
if (is.character(a) | is.factor(a)) next
is_decimal[i] <- any(schoolmath::is.decimal(a[!is.na(a)]))
}
if (any(is_decimal) & isFALSE(mtp)) {
cli::cli_warn("Detected decimalish `trt` values and {.code mtp = FALSE}. Consider setting {.code mtp = TRUE} if getting errors.")
}
}

check_same_weights <- function(weights) {
if (length(weights) == 1) {
check <- TRUE
} else if (length(weights) == 2) {
check <- identical(weights[[1]], weights[[2]])
} else {
check <- all(sapply(1:(length(weights) - 1), function(i) identical(weights[[i]], weights[[i + 1]])))
}

if (isFALSE(check)) {
return("Weights must all be the same.")
}
TRUE
}

assertSameWeights <- checkmate::makeAssertionFunction(check_same_weights)

14 changes: 11 additions & 3 deletions R/contrasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ lmtp_contrast <- function(..., ref, type = c("additive", "rr", "or")) {
assertRefClass(ref)
assertContrastType(match.arg(type), fits, .var.name = "type")

weights <- lapply(fits, function(x) x$weights)
if (isFALSE(is.numeric(ref))) {
weights <- append(weights, list(ref$weights))
}
assertSameWeights(weights)

if (is.numeric(ref)) {
type <- "additive"
message("Non-estimated reference value, defaulting type = 'additive'")
Expand Down Expand Up @@ -68,7 +74,7 @@ contrast_additive_single <- function(fit, ref) {
fit$id <- 1:length(eif)
}

clusters <- split(eif, fit$id)
clusters <- split(eif*fit$weights, fit$id)
j <- length(clusters)
std.error <- sqrt(var(vapply(clusters, function(x) mean(x), 1)) / j)
conf.low <- theta - qnorm(0.975) * std.error
Expand Down Expand Up @@ -107,7 +113,8 @@ contrast_rr <- function(fits, ref) {

contrast_rr_single <- function(fit, ref) {
theta <- fit$theta / ref$theta
log_eif <- (fit$eif / fit$theta) - (ref$eif / ref$theta)
log_eif <- (fit$eif*fit$weights / fit$theta) -
(ref$eif*ref$weights / ref$theta)

if (is.null(fit$id)) {
fit$id <- 1:length(eif)
Expand Down Expand Up @@ -151,7 +158,8 @@ contrast_or <- function(fits, ref) {

contrast_or_single <- function(fit, ref) {
theta <- (fit$theta / (1 - fit$theta)) / (ref$theta / (1 - ref$theta))
log_eif <- (fit$eif / (fit$theta * (1 - fit$theta))) - (ref$eif / (ref$theta * (1 - ref$theta)))
log_eif <- (fit$eif*fit$weights / (fit$theta * (1 - fit$theta))) -
(ref$eif*ref$weights / (ref$theta * (1 - ref$theta)))

if (is.null(fit$id)) {
fit$id <- 1:length(eif)
Expand Down
16 changes: 16 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,19 @@
#' \item{Y2}{Final outcome node.}
#' }
"sim_timevary_surv"

#' Simulated Multivariate Exposure Data
#'
#' A dataset with a continuous outcome, three baseline covariates,
#' and two treatment variables.
#'
#' @format A data frame with 2000 rows and 6 variables:
#' \describe{
#' \item{C1}{Continuous baseline variable.}
#' \item{C2}{Continuous baseline variable.}
#' \item{C3}{Continuous baseline variable.}
#' \item{D1}{Treatment variable one at baseline.}
#' \item{D2}{Treatment variable two at baseline.}
#' \item{Y}{Continuous outcome}
#' }
"multivariate_data"
48 changes: 28 additions & 20 deletions R/density_ratios.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,36 @@
cf_r <- function(Task, learners, mtp, lrnr_folds, trim, full_fits, pb) {
fopts <- options("lmtp.bound", "lmtp.trt.length")
out <- list()

cf_r <- function(task, learners, mtp, control, pb) {
out <- vector("list", length = length(task$folds))

if (length(learners) == 1 && learners == "SL.mean") {
warning("Using 'SL.mean' as the only learner of the density ratios will always result in a misspecified model! If your exposure is randomized, consider using `c('SL.glm', 'SL.glmnet')`.",
call. = FALSE)
}

for (fold in seq_along(Task$folds)) {
for (fold in seq_along(task$folds)) {
out[[fold]] <- future::future({
options(fopts)

estimate_r(
get_folded_data(Task$natural, Task$folds, fold),
get_folded_data(Task$shifted, Task$folds, fold),
Task$trt, Task$cens, Task$risk, Task$tau, Task$node_list$trt,
learners, pb, mtp, lrnr_folds, full_fits
get_folded_data(task$natural, task$folds, fold),
get_folded_data(task$shifted, task$folds, fold),
task$trt,
task$cens,
task$risk,
task$tau,
task$node_list$trt,
learners,
pb,
mtp,
control
)
},
seed = TRUE)
}

trim_ratios(recombine_ratios(future::value(out), Task$folds), trim)
trim_ratios(recombine_ratios(future::value(out), task$folds), control$.trim)
}

estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, lrnr_folds, full_fits) {
estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, control) {
densratios <- matrix(nrow = nrow(natural$valid), ncol = tau)
fits <- list()
fits <- vector("list", length = tau)

for (t in 1:tau) {
jrt <- rep(censored(natural$train, cens, t)$j, 2)
Expand All @@ -35,9 +39,13 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne
jrv <- censored(natural$valid, cens, t)$j
drv <- at_risk(natural$valid, risk, t)

trt_t <- ifelse(length(trt) > 1, trt[t], trt)
if (length(trt) > 1) {
trt_t <- trt[[t]]
} else {
trt_t <- trt[[1]]
}

frv <- followed_rule(natural$valid[[trt_t]], shifted$valid[[trt_t]], mtp)
frv <- followed_rule(natural$valid[, trt_t], shifted$valid[, trt_t], mtp)

vars <- c(node_list[[t]], cens[[t]])
stacked <- stack_data(natural$train, shifted$train, trt, cens, t)
Expand All @@ -48,10 +56,10 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne
learners,
"binomial",
stacked[jrt & drt, ]$lmtp_id,
lrnr_folds
control$.learners_trt_folds
)

if (full_fits) {
if (control$.return_full_fits) {
fits[[t]] <- fit
} else {
fits[[t]] <- extract_sl_weights(fit)
Expand All @@ -73,7 +81,7 @@ stack_data <- function(natural, shifted, trt, cens, tau) {
shifted_half <- natural

if (length(trt) > 1 || tau == 1) {
shifted_half[[trt[tau]]] <- shifted[[trt[tau]]]
shifted_half[, trt[[tau]]] <- shifted[, trt[[tau]]]
}

if (!is.null(cens)) {
Expand Down
Loading

0 comments on commit d16cb0f

Please sign in to comment.