Skip to content

Commit

Permalink
Fixes for other engines to project with data.frame #136
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin-Jung committed Feb 2, 2025
1 parent 1b6e0cd commit 05b87cf
Show file tree
Hide file tree
Showing 14 changed files with 319 additions and 221 deletions.
64 changes: 35 additions & 29 deletions R/engine_bart.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,28 +264,30 @@ engine_bart <- function(x,
!predictors %in% vf)
}

# Prediction container
pred_cov <- model$predictors[,c('x','y',model$biodiversity[[1]]$predictors_names)]
if(any(model$predictors_types$type=='factor')){
vf <- model$predictors_types$predictors[which(model$predictors_types$type == "factor")]
# Get factors
for (i in 1:length(vf)) {
z <- explode_factor(pred_cov[[vf[i]]], name = vf[i])
# Remove variables from train_cov and append
pred_cov[[vf[i]]] <- NULL
pred_cov <- cbind(pred_cov, z)
model$predictors_types <- rbind(model$predictors_types,
data.frame(predictors = colnames(z), type = "numeric"))
}
if(!settings$get("inference_only")){
# Prediction container
pred_cov <- model$predictors[,c('x','y',model$biodiversity[[1]]$predictors_names)]
if(any(model$predictors_types$type=='factor')){
vf <- model$predictors_types$predictors[which(model$predictors_types$type == "factor")]
# Get factors
for (i in 1:length(vf)) {
z <- explode_factor(pred_cov[[vf[i]]], name = vf[i])
# Remove variables from train_cov and append
pred_cov[[vf[i]]] <- NULL
pred_cov <- cbind(pred_cov, z)
model$predictors_types <- rbind(model$predictors_types,
data.frame(predictors = colnames(z), type = "numeric"))
}

pred_cov <- pred_cov[,c("x", "y", colnames(train_cov))]
model$predictors <- pred_cov # Save new in model object
pred_cov <- pred_cov[,c("x", "y", colnames(train_cov))]
model$predictors <- pred_cov # Save new in model object

model$biodiversity[[1]]$predictors_names <- colnames(train_cov)
model$predictors_names <- colnames(pred_cov)
assertthat::assert_that(all( colnames(train_cov) %in% colnames(pred_cov) ))
model$biodiversity[[1]]$predictors_names <- colnames(train_cov)
model$predictors_names <- colnames(pred_cov)
assertthat::assert_that(all( colnames(train_cov) %in% colnames(pred_cov) ))
}
rm(train_cov, pred_cov)
}
rm(train_cov, pred_cov)

# Process and add priors if set
params <- self$get_data("params")
Expand Down Expand Up @@ -334,20 +336,24 @@ engine_bart <- function(x,
data <- subset(data, select = c('observed', model$biodiversity[[1]]$predictors_names) )
if(model$biodiversity[[1]]$family=='binomial') data$observed <- factor(data$observed)
w <- model$biodiversity[[1]]$expect # The expected weight
full <- model$predictors # All predictors

# Select predictors
full <- subset(full, select = c('x','y', model$biodiversity[[1]]$predictors_names))
full$cellid <- rownames(full) # Add rownames
full <- subset(full, stats::complete.cases(full))
# Select predictors for full prediction if needed
if(!settings$get("inference_only")){
full <- model$predictors # All predictors
full <- subset(full, select = c('x','y', model$biodiversity[[1]]$predictors_names))
full$cellid <- rownames(full) # Add rownames
full <- subset(full, stats::complete.cases(full))

# Clamp?
if( settings$get("clamp") ) full <- clamp_predictions(model, full)
# Clamp?
if( settings$get("clamp") ) full <- clamp_predictions(model, full)
assertthat::assert_that(
all( model$biodiversity[[1]]$predictors_names %in% names(full) )
)
}

assertthat::assert_that(
is.null(w) || length(w) == nrow(data),
is.formula(equation),
all( model$biodiversity[[1]]$predictors_names %in% names(full) )
is.formula(equation)
)

if(!is.Waiver(model$offset)){
Expand Down Expand Up @@ -632,7 +638,7 @@ engine_bart <- function(x,
settings$set("type", type)

# Clamp?
if( settings$get("clamp") ) newdata <- clamp_predictions(model, newdata)
if( settings$get("clamp") ) newdata <- clamp_predictions(model = model, pred = newdata)

if(!is.Waiver(settings$get('bias_variable'))){
for(i in 1:length(settings$get('bias_variable'))){
Expand Down
42 changes: 23 additions & 19 deletions R/engine_breg.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,29 +267,33 @@ engine_breg <- function(x,
model$biodiversity[[1]]$predictors_types <- dplyr::filter(model$biodiversity[[1]]$predictors_types,
!predictors %in% vf)
}
rm(train_cov)

# Prediction container
pred_cov <- model$predictors[,c('x','y',model$biodiversity[[1]]$predictors_names)]
if(any(model$predictors_types$type=='factor')){
vf <- model$predictors_types$predictors[which(model$predictors_types$type == "factor")]
# Get factors
for (i in 1:length(vf)) {
z <- explode_factor(pred_cov[[vf[i]]], name = vf[i])
# Remove variables from train_cov and append
pred_cov[[vf[i]]] <- NULL
pred_cov <- cbind(pred_cov, z)
model$predictors_types <- rbind(model$predictors_types,
data.frame(predictors = colnames(z), type = "numeric"))
}
# Create prediction container if needed
if(!settings$get('inference_only')){
# Prediction container
pred_cov <- model$predictors[,c('x','y',model$biodiversity[[1]]$predictors_names)]
if(any(model$predictors_types$type=='factor')){
vf <- model$predictors_types$predictors[which(model$predictors_types$type == "factor")]
# Get factors
for (i in 1:length(vf)) {
z <- explode_factor(pred_cov[[vf[i]]], name = vf[i])
# Remove variables from train_cov and append
pred_cov[[vf[i]]] <- NULL
pred_cov <- cbind(pred_cov, z)
model$predictors_types <- rbind(model$predictors_types,
data.frame(predictors = colnames(z), type = "numeric"))
}

pred_cov <- pred_cov[,c("x", "y", colnames(train_cov))]
model$predictors <- pred_cov # Save new in model object
pred_cov <- pred_cov[,c("x", "y", colnames(train_cov))]
model$predictors <- pred_cov # Save new in model object

model$biodiversity[[1]]$predictors_names <- colnames(train_cov)
model$predictors_names <- colnames(pred_cov)
assertthat::assert_that(all( colnames(train_cov) %in% colnames(pred_cov) ))
model$biodiversity[[1]]$predictors_names <- colnames(train_cov)
model$predictors_names <- colnames(pred_cov)
assertthat::assert_that(all( colnames(train_cov) %in% colnames(pred_cov) ))
}
rm(pred_cov)
}
rm(train_cov, pred_cov)

# Instead of invisible return the model object
return( model )
Expand Down
52 changes: 27 additions & 25 deletions R/engine_gdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -330,48 +330,50 @@ engine_gdb <- function(x,
data.frame(observed = model$biodiversity[[1]]$observations[,'observed', drop = TRUE]) )
w <- model$biodiversity[[1]]$expect

# Select predictors
full <- model$predictors
full <- subset(full, select = c('x','y',model$biodiversity[[1]]$predictors_names))
full$cellid <- rownames(full) # Add row.names
full$w <- model$exposure
full$Intercept <- 1
full <- subset(full, stats::complete.cases(full))
# Select predictors for full if needed
if(!settings$get('inference_only')){
full <- model$predictors
full <- subset(full, select = c('x','y',model$biodiversity[[1]]$predictors_names))
full$cellid <- rownames(full) # Add row.names
full$w <- model$exposure
full$Intercept <- 1
full <- subset(full, stats::complete.cases(full))
# Clamp?
if( settings$get("clamp") ) full <- clamp_predictions(model, full)

# Clamp?
if( settings$get("clamp") ) full <- clamp_predictions(model, full)
full$w <- scale_weight(full$w)
assertthat::assert_that(
all(model$biodiversity[[1]]$predictors_names %in% names(full)),
all(names(full[,model$biodiversity[[1]]$predictors_names]) %in% names(data)),
all( model$biodiversity[[1]]$predictors_names %in% names(full) )
)
} else { full <- NULL }

# Rescale exposure
check_package('scales')
w <- scales::rescale(w, to = c(1e-6, 1))
full$w <- scales::rescale(full$w, to = c(1e-6, 1))
if(anyNA(w)){
w[is.na(w)] <- 1e-6
full$w[is.na(full$w)] <- 1e-6
}
w <- scale_weight(w)

assertthat::assert_that(
is.null(w) || length(w) == nrow(data),
is.formula(equation),
all(model$biodiversity[[1]]$predictors_names %in% names(full)),
all(names(full[,model$biodiversity[[1]]$predictors_names]) %in% names(data)),
all( model$biodiversity[[1]]$predictors_names %in% names(full) )
msg = "(Internal) Number of weights do not match number of observations?"
)

if(!is.Waiver(model$offset)){
# Add offset to full prediction and load vector
n <- data.frame(model$offset[as.numeric(full$cellid), "spatial_offset"], model$offset[as.numeric(full$cellid), "spatial_offset"] )
names(n) <- c( "spatial_offset", paste0('offset(',"spatial_offset",')') )
# Add weights
# n <- n + full$w
full <- cbind(full, n)
# And for biodiversity object
n <- cbind(model$biodiversity[[1]]$offset[,"spatial_offset"],
model$biodiversity[[1]]$offset[,"spatial_offset"]) |> as.data.frame()
names(n) <- c( "spatial_offset", paste0('offset(',"spatial_offset",')') )
# Add weights
# n <- n + w
data <- cbind(data, n)
if(!settings$get("inference_only")){
# Add offset to full prediction and load vector
n <- data.frame(model$offset[as.numeric(full$cellid), "spatial_offset"], model$offset[as.numeric(full$cellid), "spatial_offset"] )
names(n) <- c( "spatial_offset", paste0('offset(',"spatial_offset",')') )
# Add weights
# n <- n + full$w
full <- cbind(full, n)
}
}

# --- #
Expand Down
41 changes: 22 additions & 19 deletions R/engine_stan.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,25 +150,28 @@ engine_stan <- function(x,
# Set cores
options(mc.cores = self$stan_param$cores)

# FIXME: Stan should handle factors directly. For now outsourced to split up
if(any(model$predictors_types$type=="factor")){
vf <- model$predictors_types$predictors[model$predictors_types$type=="factor"]
for(k in vf){
o <- explode_factor(model$predictors[[k]],name = k)
model$predictors <- cbind(model$predictors, o)
model$predictors_names <- c(model$predictors_names, colnames(o))
model$predictors_types <- rbind(model$predictors_types,
data.frame(predictors = colnames(o), type = "numeric") )
# Finally remove the original column from the predictor object
model$predictors[[k]] <- NULL
model$predictors_names <- model$predictors_names[-which( model$predictors_names == k )]
model$predictors_types <- subset(model$predictors_types, subset = predictors != k)
# Explode the columns in the raster object
model$predictors_object$data <- c(
model$predictors_object$data,
explode_factorized_raster(model$predictors_object$data[[k]])
)
model$predictors_object$data <- terra::subset(model$predictors_object$data, k, negate = TRUE)
if(!settings$get('inference_only')){
# FIXME: Stan should handle factors directly. For now outsourced to split up
cli::cli_alert_warning("Splitting factors up for prediction!")
if(any(model$predictors_types$type=="factor")){
vf <- model$predictors_types$predictors[model$predictors_types$type=="factor"]
for(k in vf){
o <- explode_factor(model$predictors[[k]],name = k)
model$predictors <- cbind(model$predictors, o)
model$predictors_names <- c(model$predictors_names, colnames(o))
model$predictors_types <- rbind(model$predictors_types,
data.frame(predictors = colnames(o), type = "numeric") )
# Finally remove the original column from the predictor object
model$predictors[[k]] <- NULL
model$predictors_names <- model$predictors_names[-which( model$predictors_names == k )]
model$predictors_types <- subset(model$predictors_types, subset = predictors != k)
# Explode the columns in the raster object
model$predictors_object$data <- c(
model$predictors_object$data,
explode_factorized_raster(model$predictors_object$data[[k]])
)
model$predictors_object$data <- terra::subset(model$predictors_object$data, k, negate = TRUE)
}
}
}

Expand Down
Loading

0 comments on commit 05b87cf

Please sign in to comment.