diff --git a/NAMESPACE b/NAMESPACE index ce4a5818..6665b1d9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -135,6 +135,7 @@ export(predictor_homogenize_na) export(predictor_transform) export(priors) export(project.BiodiversityScenario) +export(project.DistributionModel) export(pseudoabs_settings) export(render_html) export(rm_biodiversity) diff --git a/NEWS.md b/NEWS.md index 2e4efc72..d15c2932 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # ibis.iSDM 0.1.6 (current dev branch) #### New features +* Support for [`data.frame`] as predictors in `add_predictors()` #136 +* Convenience function to allow [`data.frame`] and [`SpatRaster`] to be supplied directly via `project()` #136 #### Minor improvements and bug fixes * IIASA internal functionalities such as preparation of GLOBIOM data have been transferred to [BNRTools](https://github.com/iiasa/BNRTools) diff --git a/R/add_predictors.R b/R/add_predictors.R index 76342470..e7f0650a 100644 --- a/R/add_predictors.R +++ b/R/add_predictors.R @@ -10,7 +10,7 @@ NULL #' create derivates of provided predictors. #' #' @param x [distribution()] (i.e. [`BiodiversityDistribution-class`]) object. -#' @param env A [`SpatRaster`] or [`stars`] object. +#' @param env A [`SpatRaster`], [`stars`] or [`data.frame`] object. #' @param names A [`vector`] of character names describing the environmental #' stack in case they should be renamed. #' @param transform A [`vector`] stating whether predictors should be preprocessed @@ -49,9 +49,9 @@ NULL #' * \code{'pca'} - Converts the predictors to principal components. Note that this #' results in a renaming of the variables to principal component axes! #' * \code{'scale'} - Transforms all predictors by applying [scale] on them. -#' * \code{'norm'} - Normalizes all predictors by transforming them to a scale from 0 to 1. +#' * \code{'norm'} - Normalizes all predictors by transforming them to a scale from \code{0} to \code{1}. #' * \code{'windsor'} - Applies a windsorization to the target predictors. By default -#' this effectively cuts the predictors to the 0.05 and 0.95, thus helping to +#' this effectively cuts the predictors to the \code{0.05} and \code{0.95}, thus helping to #' remove extreme outliers. #' #' Available options for creating derivates are: @@ -79,7 +79,7 @@ NULL #' Certain names such \code{"offset"} are forbidden as predictor variable names. #' The function will return an error message if these are used. #' -#' Some engines use binary variables regardless of the parameter \code{explode_factors} +#' Some engines use binary variables regardless of the parameter \code{"explode_factors"} #' set here. #' #' @examples @@ -292,6 +292,72 @@ methods::setMethod( } ) +#' @rdname add_predictors +methods::setMethod( + "add_predictors", + methods::signature(x = "BiodiversityDistribution", env = "data.frame"), + function(x, env, names = NULL, priors = NULL, state = NULL, ... ) { + assertthat::assert_that(inherits(x, "BiodiversityDistribution"), + is.data.frame(env), + is.null(names) || assertthat::is.scalar(names) || is.vector(names), + is.null(priors) || inherits(priors,'PriorList'), + is.matrix(state) || is.null(state) + ) + # Messenger + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','green','Adding predictors...') + + if(!is.null(names)) { + assertthat::assert_that(colnames(env) == length(names), + all(is.character(names)), + msg = 'Provided names not of same length as environmental data.') + # Set names of env + colnames(env) <- names + } + + # Check that all names allowed + problematic_names <- grep("offset|w|weight|spatial_offset|observed|Intercept|spatial.field", names(env),fixed = TRUE) + if( length(problematic_names)>0 ){ + cli::cli_abort(paste0("Some predictor names are not allowed as they might interfere with model fitting:", paste0(names(env)[problematic_names],collapse = " | "))) + } + + # Make a clone copy of the object + y <- x$clone(deep = TRUE) + + # If priors have been set, save them in the distribution object + if(!is.null(priors)) { + assertthat::assert_that( all( priors$varnames() %in% names(env) ) ) + y <- y$set_priors(priors) + } + + # Grep all vars that are not x or y + vars <- colnames(env) + assertthat::assert_that(all(c("x","y") %in% vars), + msg = "Coordinates x & y need to be in the supplied data.frame!") + vars <- grep("x|y", vars,value = TRUE, invert = TRUE,fixed = FALSE,useBytes = TRUE) + assertthat::assert_that( + nrow(env)>1, length(vars)>0, + msg = "Supplied data.frame not correct?" + ) + + # Check for attribute to this object to keep track of it + transform <- attr(env,'transform') + if(is.null(transform)) transform <- "none" + + # Check whether predictors already exist, if so overwrite + if(!is.Waiver(x$predictors)) myLog('[Setup]','yellow','Overwriting existing predictors.') + + # Sanitize names if specified + if(getOption('ibis.cleannames', default = TRUE)) names(env) <- sanitize_names(names(env)) + + # Finally set the data to the BiodiversityDistribution object + pd <- PredictorDataset$new(id = new_id(), + data = env, + transformed = ifelse('none' %notin% transform, TRUE, FALSE ), + ...) + y$set_predictors(pd) + } +) + # Add elevational delineation as predictor ---- #' Create lower and upper limits for an elevational range and add them as diff --git a/R/class-predictors.R b/R/class-predictors.R index 354779e2..0dffad67 100644 --- a/R/class-predictors.R +++ b/R/class-predictors.R @@ -23,11 +23,13 @@ PredictorDataset <- R6::R6Class( #' @field name A name for this object. #' @field transformed Saves whether the predictors have been transformed somehow. #' @field timeperiod A timeperiod field + #' @field is.spatial A [`logical`] flag on whether the predictor is spatial or not. id = character(0), data = new_waiver(), name = character(0), transformed = logical(0), timeperiod = new_waiver(), + is.spatial = NULL, #' @description #' Initializes the object and creates an empty list @@ -50,6 +52,8 @@ PredictorDataset <- R6::R6Class( for(el in names(dots)){ self[[el]] <- dots[[el]] } + # Check for spatial + self$is.spatial <- ifelse(is.Raster(data) || inherits(data, "stars"), TRUE, FALSE) }, #' @description @@ -117,7 +121,7 @@ PredictorDataset <- R6::R6Class( terra::as.data.frame(self$data, xy = TRUE, na.rm = na.rm, ...) } } else { - # For scenario files + # For scenario files and other data.frames as.data.frame( self$data ) } } else self$data @@ -131,18 +135,22 @@ PredictorDataset <- R6::R6Class( # Get data d <- self$get_data() if(is.Waiver(d)) return(new_waiver()) - if(!inherits(d, 'stars')){ + if(is.Raster(d)){ # Try and get a z dimension from the raster object terra::time(d) } else { - # Get dimensions - o <- stars::st_dimensions(d) - # Take third entry as the one likely to be the time variable - return( - to_POSIXct( - stars::st_get_dimension_values(d, names(o)[3], center = TRUE) + if(inherits(d, 'stars')){ + # Get dimensions + o <- stars::st_dimensions(d) + # Take third entry as the one likely to be the time variable + return( + to_POSIXct( + stars::st_get_dimension_values(d, names(o)[3], center = TRUE) + ) ) - ) + } else { + return( new_waiver() ) + } } }, @@ -150,6 +158,10 @@ PredictorDataset <- R6::R6Class( #' Get Projection #' @return A [`vector`] with the geographical projection of the object. get_projection = function(){ + if(is.data.frame(self$data)){ + cli::cli_alert_danger("Predictors not in spatial format but projection requested?") + return( NULL ) + } assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars')) sf::st_crs(self$data) }, @@ -158,6 +170,10 @@ PredictorDataset <- R6::R6Class( #' Get Resolution #' @return A [`numeric`] [`vector`] with the spatial resolution of the data. get_resolution = function(){ + if(is.data.frame(self$data)){ + cli::cli_alert_danger("Predictors not in spatial format but resolution requested?") + return( NULL ) + } assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars')) if(is.Raster(self$data)){ terra::res(self$data) @@ -170,6 +186,10 @@ PredictorDataset <- R6::R6Class( #' Get Extent of predictors #' @return A [`numeric`] [`vector`] with the spatial resolution of the data. get_ext = function(){ + if(is.data.frame(self$data)){ + cli::cli_alert_danger("Predictors not in spatial format but extent requested?") + return( NULL ) + } assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars')) if(is.Raster(self$data)){ terra::ext(self$get_data()) |> sf::st_bbox() @@ -181,12 +201,17 @@ PredictorDataset <- R6::R6Class( #' @description #' Utility function to clip the predictor dataset by another dataset #' @details - #' This code now also + #' This code now also is able to determine the temporally closest + #' layer. In case a [`data.frame`] exists as predictor, only that is returned. #' - #' @param pol A [`sf`] object used for cropping the data + #' @param pol A [`sf`] object used for cropping the data. #' @param apply_time A [`logical`] flag indicating if time should be acknowledged in cropping. #' @return Invisible TRUE crop_data = function(pol, apply_time = FALSE){ + if(is.data.frame(self$data)){ + cli::cli_abort("Predictors not in spatial format but should be cropped with another layer?") + return( NULL ) + } assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars'), inherits(pol, 'sf'), is.logical(apply_time), @@ -256,6 +281,10 @@ PredictorDataset <- R6::R6Class( #' @param ... Any other parameters passed on to masking. #' @return Invisible mask = function(mask, inverse = FALSE, ...){ + if(is.data.frame(self$data)){ + cli::cli_abort("Predictors not in spatial format but should be masked with another layer?") + return( NULL ) + } if(inherits(self$get_data(), "stars")) invisible(self) # Check whether prediction has been created prediction <- self$get_data(df = FALSE) @@ -282,13 +311,18 @@ PredictorDataset <- R6::R6Class( #' @param value A new [`SpatRaster`] or [`stars`] object. #' @return This object set_data = function(value){ - assertthat::assert_that(is.Raster(value) || inherits(value, "stars")) + assertthat::assert_that((is.Raster(value) || inherits(value, "stars"))||is.data.frame(value)) if(is.Raster(self$get_data()) && is.Raster(value)){ assertthat::assert_that(is_comparable_raster(self$get_data(), value)) self$data <- suppressWarnings( c(self$get_data(), value) ) } else if(inherits(self$get_data(), "stars") && is.Raster(value)) { # Stars assumed self$data <- st_add_raster(self$get_data(), value) + } else if(is.data.frame(self$get_data())) { + # Data.frame supplied + assertthat::assert_that(nrow(value)>0, + nrow(value)==nrow(self$data)) + self$data <- cbind(self$get_data(), value) } else { # Simply try combining them self$data <- suppressWarnings( c(self$get_data(), value) ) @@ -354,10 +388,16 @@ PredictorDataset <- R6::R6Class( summary( as.data.frame(d) ) ) } else { - # Assume raster - return( - terra::summary( d, digits = digits) - ) + if(is.Raster(d)){ + # Assume raster + return( + terra::summary( d, digits = digits) + ) + } else { + return( + summary(d) + ) + } } } rm(d) @@ -383,6 +423,15 @@ PredictorDataset <- R6::R6Class( ) }, + #' @description + #' Is Predictor dataset spatial? + #' @return A [`logical`] flag. + is_spatial = function(){ + return( + self$is.spatial + ) + }, + #' @description #' Get transformation params. #' @seealso [predictor_transform()] @@ -411,6 +460,8 @@ PredictorDataset <- R6::R6Class( ncell = function() { if(inherits(self$get_data(),'SpatRaster')) terra::ncell(self$get_data()) + else if(is.data.frame(self$get_data())) + nrow(self$get_data()) else terra::ncell(self$get_data()) |> as.numeric() }, @@ -424,8 +475,17 @@ PredictorDataset <- R6::R6Class( if(is.Raster(self$data)){ terra::plot( self$data, col = ibis_colours[['viridis_cividis']] ) } else { - # Assume stars scenario files - stars:::plot.stars(self$data, col = ibis_colours[['viridis_cividis']]) + if(inherits(self$data, "stars")){ + # Assume stars scenario files + stars:::plot.stars(self$data, col = ibis_colours[['viridis_cividis']]) + } else { + if(ncol(self$data)>5) cli::cli_alert_info("Printing only the first 5 columns") + if(ncol(self$data)>5) { + graphics::pairs.default(self$data[,1:5]) + } else { + graphics::pairs.default(self$data) + } + } } graphics::par(par.ori) } @@ -433,7 +493,5 @@ PredictorDataset <- R6::R6Class( # Any private entries private = list( - finalize = function() { - } ) ) diff --git a/R/engine_bart.R b/R/engine_bart.R index e9d05a23..cefc6961 100644 --- a/R/engine_bart.R +++ b/R/engine_bart.R @@ -78,25 +78,14 @@ engine_bart <- function(x, if(nburn > iter) nburn <- floor( iter / 4) # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(x$background), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Set up dbarts control with some parameters, rest default dc <- dbarts::dbartsControl(keepTrees = TRUE, # Keep trees diff --git a/R/engine_breg.R b/R/engine_breg.R index be8c4ec9..2aa3d32e 100644 --- a/R/engine_breg.R +++ b/R/engine_breg.R @@ -64,25 +64,14 @@ engine_breg <- function(x, if(type=="predictor") type <- "link" # Convenience conversion # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(x$background), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Set up the parameter list params <- list( diff --git a/R/engine_gdb.R b/R/engine_gdb.R index c537fbd2..25422a49 100644 --- a/R/engine_gdb.R +++ b/R/engine_gdb.R @@ -86,25 +86,14 @@ engine_gdb <- function(x, background <- x$background # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(background), - crs = terra::crs(background), - res = c(diff( (sf::st_bbox(background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } - - # Burn in the background - template <- terra::rasterize(background, template, field = 0) + template <- create_background(x) - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Set up boosting control bc <- mboost::boost_control(mstop = iter, @@ -272,7 +261,7 @@ engine_gdb <- function(x, # Add exposure to full model predictor model$exposure <- w_full * (1/unique(model$biodiversity[[1]]$expect)[1]) - } else if(model$biodiversity[[1]]$family != 'poisson'){ + } else if(model$biodiversity[[1]]$family == 'binomial'){ # calculating the case weights (equal weights) # the order of weights should be the same as presences and backgrounds in the training data prNum <- as.numeric(table(model$biodiversity[[1]]$observations[['observed']])["1"]) # number of presences diff --git a/R/engine_glm.R b/R/engine_glm.R index 397ea6d0..553dced4 100644 --- a/R/engine_glm.R +++ b/R/engine_glm.R @@ -73,25 +73,14 @@ engine_glm <- function(x, if(type=="predictor") type <- "link" # Convenience conversion # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(x$background), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Specify default control if(is.null(control)){ diff --git a/R/engine_glmnet.R b/R/engine_glmnet.R index e10ab760..1c739ac4 100644 --- a/R/engine_glmnet.R +++ b/R/engine_glmnet.R @@ -97,25 +97,14 @@ engine_glmnet <- function(x, if(type=="predictor") type <- "link" # Convenience conversion # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(x$background), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Set up the parameter list params <- list( @@ -150,7 +139,7 @@ engine_glmnet <- function(x, invisible() },overwrite = TRUE) - # Setup function + #### Setup function ---- eg$set("public", "setup", function(model, settings = NULL, ...){ # Simple security checks assertthat::assert_that( @@ -190,47 +179,55 @@ engine_glmnet <- function(x, bg <- self$get_data("template") assertthat::assert_that(!is.na( terra::global(bg, "min", na.rm = TRUE)[,1])) - # Add pseudo-absence points - presabs <- add_pseudoabsence(df = model$biodiversity[[1]]$observations, - field_occurrence = 'observed', - template = bg, - settings = model$biodiversity[[1]]$pseudoabsence_settings) - if(inherits(presabs, 'sf')) presabs <- presabs |> sf::st_drop_geometry() - # Sample environmental points for absence only points - abs <- subset(presabs, observed == 0) - # Re-extract environmental information for absence points - envs <- get_rastervalue(coords = abs[,c('x','y')], - env = model$predictors_object$get_data(df = FALSE), - rm.na = FALSE) - if(assertthat::has_name(model$biodiversity[[1]]$predictors, "Intercept")){ envs$Intercept <- 1} - - # Format out - df <- rbind(model$biodiversity[[1]]$predictors[,c('x','y','Intercept', model$biodiversity[[1]]$predictors_names)], - envs[,c('x','y','Intercept', model$biodiversity[[1]]$predictors_names)] ) - any_missing <- which(apply(df, 1, function(x) any(is.na(x)))) - if(length(any_missing)>0) { - presabs <- presabs[-any_missing,] # This works as they are in the same order - model$biodiversity[[1]]$expect <- model$biodiversity[[1]]$expect[-any_missing] - } - df <- subset(df, stats::complete.cases(df)) - assertthat::assert_that(nrow(presabs) == nrow(df)) - - # Check that expect matches - if(length(model$biodiversity[[1]]$expect)!=nrow(df)){ - # Fill the absences with 1 as multiplier. This works since absences follow the presences - model$biodiversity[[1]]$expect <- c( model$biodiversity[[1]]$expect, - rep(1, nrow(presabs)-length(model$biodiversity[[1]]$expect) )) - } + len <- nrow(model$biodiversity[[1]]$observations) + if(sum(model$biodiversity[[1]]$observations[['observed']]==0) sf::st_drop_geometry() + # Sample environmental points for absence only points + abs <- subset(presabs, observed == 0) + # Re-extract environmental information for absence points + envs <- get_rastervalue(coords = abs[,c('x','y')], + env = model$predictors_object$get_data(df = FALSE), + rm.na = FALSE) + if(assertthat::has_name(model$biodiversity[[1]]$predictors, "Intercept")){ envs$Intercept <- 1} + + # Format out + df <- rbind(model$biodiversity[[1]]$predictors[,c('x','y','Intercept', model$biodiversity[[1]]$predictors_names)], + envs[,c('x','y','Intercept', model$biodiversity[[1]]$predictors_names)] ) + any_missing <- which(apply(df, 1, function(x) any(is.na(x)))) + if(length(any_missing)>0) { + presabs <- presabs[-any_missing,] # This works as they are in the same order + model$biodiversity[[1]]$expect <- model$biodiversity[[1]]$expect[-any_missing] + } + df <- subset(df, stats::complete.cases(df)) + assertthat::assert_that(nrow(presabs) == nrow(df)) + + # Check that expect matches + if(length(model$biodiversity[[1]]$expect)!=nrow(df)){ + # Fill the absences with 1 as multiplier. This works since absences follow the presences + model$biodiversity[[1]]$expect <- c( model$biodiversity[[1]]$expect, + rep(1, nrow(presabs)-length(model$biodiversity[[1]]$expect) )) + } - # Overwrite observation data - model$biodiversity[[1]]$observations <- presabs + # Overwrite observation data + model$biodiversity[[1]]$observations <- presabs - # Preprocessing security checks - assertthat::assert_that( all( model$biodiversity[[1]]$observations[['observed']] >= 0 ), - any(!is.na(presabs[['observed']])), - length(model$biodiversity[[1]]$expect) == nrow(model$biodiversity[[1]]$observations), - nrow(df) == nrow(model$biodiversity[[1]]$observations) - ) + # Preprocessing security checks + assertthat::assert_that( all( model$biodiversity[[1]]$observations[['observed']] >= 0 ), + any(!is.na(presabs[['observed']])), + length(model$biodiversity[[1]]$expect) == nrow(model$biodiversity[[1]]$observations), + nrow(df) == nrow(model$biodiversity[[1]]$observations) + ) + } else { + if(getOption('ibis.setupmessages', default = TRUE)) cli::cli_alert_info("Not adding pseudo-absence points.") + df <- model$biodiversity[[1]]$observations + } # Add offset if existent if(!is.Waiver(model$offset)){ @@ -257,15 +254,19 @@ engine_glmnet <- function(x, # Rasterize observed presences pres <- terra::rasterize( guess_sf(model$biodiversity[[1]]$observations[,c("x","y")]), bg, fun = 'count', background = 0) - # Get for the full dataset - w_full <- ppm_weights(df = model$predictors, - pa = pres[], - bg = bg, - weight = 1 # Set those to 1 so that absences become ratio of pres/abs - ) - # Add exposure to full model predictor - model$exposure <- w_full * (1/unique(model$biodiversity[[1]]$expect)[1]) # Multiply with prior weight (first value) + # Unless set to inference only, also use full weights + if(!settings$get("inference_only")){ + # Get for the full dataset + w_full <- ppm_weights(df = model$predictors, + pa = pres[], + bg = bg, + weight = 1 # Set those to 1 so that absences become ratio of pres/abs + ) + + # Add exposure to full model predictor + model$exposure <- w_full * (1/unique(model$biodiversity[[1]]$expect)[1]) # Multiply with prior weight (first value) + } } else if(fam == "binomial"){ # Check that observations are all <=1 diff --git a/R/engine_scampr.R b/R/engine_scampr.R index a03a09f6..5680009a 100644 --- a/R/engine_scampr.R +++ b/R/engine_scampr.R @@ -81,25 +81,14 @@ engine_scampr <- function(x, dens <- match.arg(dens, choices = c("posterior", "prior"), several.ok = FALSE) # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(x$background), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Set up the parameter list params <- list( diff --git a/R/engine_stan.R b/R/engine_stan.R index bd9ae3a7..54a1f564 100644 --- a/R/engine_stan.R +++ b/R/engine_stan.R @@ -100,25 +100,14 @@ engine_stan <- function(x, if(is.null(cores)) cores <- getOption('ibis.nthread') # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(x$background), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Define new engine object of class eg <- Engine diff --git a/R/engine_xgboost.R b/R/engine_xgboost.R index c9f58a62..124c0c2e 100644 --- a/R/engine_xgboost.R +++ b/R/engine_xgboost.R @@ -98,25 +98,14 @@ engine_xgboost <- function(x, ) # Create a background raster - if(is.Waiver(x$predictors)){ - # Create from background - template <- terra::rast( - ext = terra::ext(x$background), - crs = terra::crs(sf::st_crs(x$background)$wkt), - res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution - diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 - ) - ) - } else { - # If predictor existing, use them - template <- emptyraster(x$predictors$get_data() ) - } + template <- create_background(x) - # Burn in the background - template <- terra::rasterize(x$background, template, field = 0) - - # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases - if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases + if (!is.Waiver(x$predictors)){ + if(x$predictors$is_spatial()){ + template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + } + } # Set up the parameter list params <- list( diff --git a/R/project.R b/R/project.R index aefd8220..74602968 100644 --- a/R/project.R +++ b/R/project.R @@ -13,6 +13,8 @@ NULL #' #' @param x A [`BiodiversityScenario`] object with set predictors. Note that some #' constrains such as \code{MigClim} can still simulate future change without projections. +#' Alternatively a [`DistributionModel`] object can be supplied if other scenario +#' functions are not needed. In this case provide a \code{env} parameter value. #' @param date_interpolation A [`character`] on whether dates should be interpolated. #' Options include \code{"none"} (Default), \code{"annual"}, \code{"monthly"}, \code{"daily"}. #' @param stabilize A [`logical`] value indicating whether the suitability projection @@ -20,6 +22,8 @@ NULL #' @param stabilize_method [`character`] stating the stabilization method to be #' applied. Currently supported is \code{`loess`}. #' @param layer A [`character`] specifying the layer to be projected (Default: \code{"mean"}). +#' @param env An optional [`SpatRaster`] or [`data.frame`] object for prediction. +#' Ignored unless a [`DistributionModel`] object is supplied (Default: \code{NULL}). #' @param verbose Setting this [`logical`] value to \code{TRUE} prints out further #' information during the model fitting (Default: \code{FALSE}). #' @param ... passed on parameters. @@ -97,7 +101,7 @@ methods::setMethod( "project", methods::signature(x = "BiodiversityScenario"), function(x, date_interpolation = "none", stabilize = FALSE, stabilize_method = "loess", - layer = "mean", verbose = getOption('ibis.setupmessages', default = TRUE), ...){ + layer = "mean", env = NULL, verbose = getOption('ibis.setupmessages', default = TRUE), ...){ # MJ: Workaround to ensure project generic does not conflict with terra::project mod <- x # date_interpolation = "none"; stabilize = FALSE; stabilize_method = "loess"; layer="mean" @@ -742,3 +746,71 @@ methods::setMethod( return(out) } ) + +#' @rdname project +#' @export +project.DistributionModel <- function(x,...) project(x,...) + +#' @rdname project +#' @export +methods::setMethod( + "project", + methods::signature(x = "DistributionModel"), + function(x, env, layer = "mean", verbose = getOption('ibis.setupmessages', default = TRUE), ...){ + assertthat::assert_that( + is.Raster(env) || is.data.frame(env), + is.character(layer) + ) + # Get coefficients and model + # co <- x$get_coefficients()[,1] + co <- x$model$biodiversity[[1]]$predictors_names + model <- x$model + settings <- x$settings + + # Further checks + assertthat::assert_that( + length(co)>0, + is.list(model) + ) + # If names are to be sanitized, cleanl + if(settings$get("ibis.cleannames")){ + nn <- sanitize_names(names(env)) + names(env) <- nn + } + + # Check that all predictor names are present + assertthat::assert_that( + all(co %in% names(env)), + msg = "Not all coefficients are found in the fitted model..." + ) + + # Make a template + if(is.Raster(env)) { + template <- emptyraster(env) + } else { + assertthat::assert_that( + hasName(env, "x") && hasName(env, "y"), + msg = "Coordinates as x and y need to be supplied!" + ) + # Create template + template <- try({ + terra::rast(env[,c("x", "y")], + crs = terra::crs(model$background), + type = "xyz") |> + emptyraster() + },silent = TRUE) + } + + # If raster convert to data.frame for further predictions + if(is.Raster(env)) env <- as.data.frame(env, xy = TRUE) + + # --- # + # Now predict + out <- x$project(newdata = env, layer = layer) + names(out) <- paste0("suitability", "_", layer) + if(is.na(terra::crs(out))) terra::crs(out) <- terra::crs( model$background ) + # --- # + + return(out) + } +) diff --git a/R/train.R b/R/train.R index 27c9ca55..1056af3e 100644 --- a/R/train.R +++ b/R/train.R @@ -242,7 +242,50 @@ methods::setMethod( model[['background']] <- x$background # Get overall Predictor data - if(is.Waiver(x$get_predictor_names())) { + if(!is.Waiver(x$get_predictor_names())) { + # Check for predictor type + if(x$predictors$is_spatial()){ + # Convert Predictors to data.frame + model[['predictors']] <- x$predictors$get_data(df = TRUE, na.rm = FALSE) + + # Check whether any of the variables are fully NA, if so exclude + if( any( apply(model[['predictors']], 2, function(z) all(is.na(z))) )){ + chk <- which( apply(model[['predictors']], 2, function(z) all(is.na(z))) ) + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','red', + paste0('The following variables are fully missing and are removed:\n', + paste(names(chk),collapse = " | ")) + ) + model[['predictors']] <- model[['predictors']][,-chk] + x$predictors$rm_data(names(chk)) # Remove the variables + } + + # Also set predictor names + model[['predictors_names']] <- x$get_predictor_names() + # Get predictor types + model[['predictors_types']] <- predictor_type(model[['predictors']]) + # Assign attribute to predictors to store the name of object + model[['predictors_object']] <- x$predictors$clone(deep = TRUE) + } else { + if(!inference_only){ + cli::cli_alert_warning("No spatial predictors found. Set to inference only!") + inference_only <- TRUE + settings$set('inference_only', inference_only) + } + # Dummy covariate of background raster + if(is.Raster(x$engine$get_data("template"))){ + dummy <- emptyraster(x$engine$get_data("template"));names(dummy) <- "dummy" + dummy[] <- 1 ; dummy <- terra::mask(dummy, x$background) + } else { + dummy <- terra::rast( terra::ext(x$background), + nrow=100, ncol=100, val=1, + crs = terra::crs(x$background));names(dummy) <- 'dummy' + } + model[['predictors']] <- terra::as.data.frame(dummy, xy = TRUE, na.rm = FALSE) + model[['predictors_names']] <- 'dummy' + model[['predictors_types']] <- predictor_type(dummy) + model[['predictors_object']] <- PredictorDataset$new(id = new_id(), data = dummy) + } + } else { if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','yellow',paste0('No predictor terms found. Using dummy.')) # Dummy covariate of background raster # Check if the engine has a template and if so use that one @@ -256,32 +299,8 @@ methods::setMethod( } model[['predictors']] <- terra::as.data.frame(dummy, xy = TRUE, na.rm = FALSE) model[['predictors_names']] <- 'dummy' - model[['predictors_types']] <- data.frame(predictors = 'dummy', type = 'numeric') + model[['predictors_types']] <- predictor_type(dummy) model[['predictors_object']] <- PredictorDataset$new(id = new_id(), data = dummy) - } else { - # Convert Predictors to data.frame - model[['predictors']] <- x$predictors$get_data(df = TRUE, na.rm = FALSE) - - # Check whether any of the variables are fully NA, if so exclude - if( any( apply(model[['predictors']], 2, function(z) all(is.na(z))) )){ - chk <- which( apply(model[['predictors']], 2, function(z) all(is.na(z))) ) - if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','red', - paste0('The following variables are fully missing and are removed:\n', - paste(names(chk),collapse = " | ")) - ) - model[['predictors']] <- model[['predictors']][,-chk] - x$predictors$rm_data(names(chk)) # Remove the variables - } - - # Also set predictor names - model[['predictors_names']] <- x$get_predictor_names() - # Get predictor types - lu <- sapply(model[['predictors']][model[['predictors_names']]], is.factor) - model[['predictors_types']] <- data.frame(predictors = names(lu), type = ifelse(lu,'factor', 'numeric'), - row.names = NULL) - # Assign attribute to predictors to store the name of object - model[['predictors_object']] <- x$predictors$clone(deep = TRUE) - rm(lu) } # Calculate latent variables if set @@ -300,8 +319,10 @@ methods::setMethod( if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','yellow',paste0(m, ' terms are not supported for engine. Switching to poly...')) x$set_latent(type = '', 'poly') } + # Calculate latent spatial terms (saved in engine data) if( length( grep('Spatial',x$get_latent() ) ) > 0 ){ + if(!x$predictors$is_spatial()) cli::cli_abort("Spatial terms currently not supported without providing predictors.") # If model is polynominal, get coordinates of first entry for names of transformation if(m == 'poly' & x$get_engine() %notin% c("","")){ # And the full predictor container @@ -309,7 +330,7 @@ methods::setMethod( model$predictors <- cbind(model$predictors, coords_poly) model$predictors_names <- c(model$predictors_names, names(coords_poly)) model$predictors_types <- rbind(model$predictors_types, - data.frame(predictors = names(coords_poly), type = "numeric")) + predictor_type(coords_poly)) # Also add to predictor object pred <- model$predictors_object$get_data(df = FALSE) new <- fill_rasters(coords_poly, emptyraster(pred)) @@ -332,19 +353,18 @@ methods::setMethod( o <- subset(o, select = "geometry") poi <- rbind(poi, o) } - # Ensure we have a backgroudn raster + # Ensure we have a background raster if(inherits(x$background, "sf")){ bg <- terra::rasterize(x$background, model$predictors_object$get_data(), 1) } else {bg <- x$background } - # Then calculate + # Then calculate Kernel density with a small bandwidth ras <- st_kde(points = poi, background = bg, bandwidth = 3) # Add to predictor objects, names, types and the object model[['predictors']] <- cbind.data.frame( model[['predictors']], terra::as.data.frame(ras, na.rm = FALSE) ) model[['predictors_names']] <- c( model[['predictors_names']], names(ras) ) model[['predictors_types']] <- rbind.data.frame(model[['predictors_types']], - data.frame(predictors = names(ras), - type = "numeric" ) - ) + predictor_type(ras) + ) if( !all(names(ras) %in% model[['predictors_object']]$get_names()) ){ model[['predictors_object']]$data <- c(model[['predictors_object']]$data, ras) } @@ -369,14 +389,12 @@ methods::setMethod( model[['predictors']] <- cbind.data.frame( model[['predictors']], terra::as.data.frame(cc, na.rm = FALSE) ) model[['predictors_names']] <- c( model[['predictors_names']], names(cc) ) model[['predictors_types']] <- rbind.data.frame(model[['predictors_types']], - data.frame(predictors = names(cc), - type = "numeric" ) - ) + predictor_type(cc) + ) if( !all(names(cc) %in% model[['predictors_object']]$get_names()) ){ model[['predictors_object']]$data <- c(model[['predictors_object']]$data, cc) } rm(cc, biodiversity_ids) - } } @@ -385,7 +403,7 @@ methods::setMethod( } else { model[["latent"]] <- new_waiver() settings$set('has_latent', FALSE) - }# End of latent factor loop + } # End of latent factor loop # Set offset if existing if(!is.Waiver(x$offset)){ @@ -430,10 +448,8 @@ methods::setMethod( model[['predictors']] <- model$predictors_object$get_data(df = TRUE, na.rm = FALSE) # Get predictor types lu <- sapply(model[['predictors']][model[['predictors_names']]], is.factor) - model[['predictors_types']] <- data.frame(predictors = names(lu), - type = ifelse(lu, 'factor', 'numeric') ) + model[['predictors_types']] <- predictor_type(lu) } - assertthat::assert_that(nrow(model[['predictors']]) == terra::ncell(model$predictors_object$get_data())) } } } @@ -442,6 +458,9 @@ methods::setMethod( model[['biodiversity']] <- list() # Specify list of ids biodiversity_ids <- as.character( x$biodiversity$get_ids() ) + if(length(biodiversity_ids)>1 && !(x$predictors$is_spatial())) + cli::cli_abort("Non-spatial predictors do currently not work for more than one biodiversity dataset!") + for(id in biodiversity_ids) { model[['biodiversity']][[id]][['name']] <- x$biodiversity$data[[id]]$name # Name of the species model[['biodiversity']][[id]][['observations']] <- x$biodiversity$get_data(id) # Observational data @@ -500,24 +519,40 @@ methods::setMethod( # Aggregate observations if poipo if(aggregate_observations && model[['biodiversity']][[id]][['type']] == "poipo"){ + if(!(x$predictors$is_spatial())) cli::cli_alert_warning("Aggregation with non-spatial predictors likely does not work...") model$biodiversity[[id]]$observations <- aggregate_observations2grid( df = model$biodiversity[[id]]$observations, - template = emptyraster(x$predictors$get_data(df = FALSE)), + template = emptyraster(model$predictors_object$get_data()), field_occurrence = "observed") # Check and reset multiplication weights model[['biodiversity']][[id]][['expect']] <- rep(unique( model[['biodiversity']][[id]][['expect']] )[1], nrow(model$biodiversity[[id]]$observations)) } - # Now extract coordinates and extract estimates, shifted to raster extraction by default to improve speed! - env <- get_rastervalue(coords = model$biodiversity[[id]]$observations, - env = model$predictors_object$get_data(df = FALSE), - rm.na = FALSE) + if(x$predictors$is_spatial()){ + # Now extract coordinates and extract estimates, shifted to raster extraction by default to improve speed! + env <- get_rastervalue(coords = model$biodiversity[[id]]$observations, + env = model$predictors_object$get_data(df = FALSE), + rm.na = FALSE) - # select only columns needed by equation - if (model$biodiversity[[id]]$equation != "") { - env <- subset(env, select = c("ID", "x", "y", attr(stats::terms.formula(model$biodiversity[[id]]$equation), - "term.labels"))) + # select only columns needed by equation + if (model$biodiversity[[id]]$equation != "") { + env <- subset(env, select = c("ID", "x", "y", attr(stats::terms.formula(model$biodiversity[[id]]$equation), + "term.labels"))) + } + } else { + # Predictors provided directly + assertthat::assert_that( + nrow(model$biodiversity[[id]]$observations) == nrow(x$predictors$get_data()), + msg = "There are different number of observations and predictor rows!" + ) + env <- x$predictors$get_data() + # Filter to target variables + if (model$biodiversity[[id]]$equation != "") { + env <- env |> dplyr::select(dplyr::any_of(c("x","y", + attr(stats::terms.formula(model$biodiversity[[id]]$equation), + "term.labels")))) + } } # Check for common issues and exclude variables if affected (could be outsourced) @@ -603,11 +638,16 @@ methods::setMethod( # Save predictors extracted for biodiversity extraction model[['biodiversity']][[id]][['predictors']] <- env model[['biodiversity']][[id]][['predictors_names']] <- names(env)[names(env) %notin% c("ID", "x", "y", "Intercept")] - model[['biodiversity']][[id]][['predictors_types']] <- model[['predictors_types']][model[['predictors_types']][, "predictors"] %in% names(env), ] - # makes sure ordering is identical - sort_id <- match(model[['biodiversity']][[id]][['predictors_names']], model[['biodiversity']][[id]][['predictors_types']]$predictors) - model[['biodiversity']][[id]][['predictors_types']] <- model[['biodiversity']][[id]][['predictors_types']][sort_id, ] + if(!x$predictors$is_spatial()){ + # Small work around for cases when none spatial predictors are supplied + model[['biodiversity']][[id]][['predictors_types']] <- predictor_type(env) + } else { + model[['biodiversity']][[id]][['predictors_types']] <- model[['predictors_types']][model[['predictors_types']][, "predictors"] %in% names(env), ] + # makes sure ordering is identical + sort_id <- match(model[['biodiversity']][[id]][['predictors_names']], model[['biodiversity']][[id]][['predictors_types']]$predictors) + model[['biodiversity']][[id]][['predictors_types']] <- model[['biodiversity']][[id]][['predictors_types']][sort_id, ] } + } # If the method of integration is weights and there are more than 2 datasets, combine if(method_integration == "weight" && length(model$biodiversity)>=2){ @@ -912,17 +952,20 @@ methods::setMethod( if(model$biodiversity[[id]]$family == 'binomial') model$biodiversity[[id]][['expect']] <- rep(1, nrow(model$biodiversity[[id]]$predictors) ) * model$biodiversity[[id]]$expect } - # remove unused predictors - pred_tmp <- unique(unlist(lapply(model$biodiversity, function(i) i$predictors_names), use.names = FALSE)) - pred_prs <- model$predictors_object$get_names() - model$predictors_names <- pred_tmp - model$predictors_types <- model$predictors_types[model$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model$predictors_types <- model$predictors_types[match(model$predictors_names, model$predictors_types$predictors), ] - model$predictors <- dplyr::select(model$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - model$predictors_object <- model$predictors_object$clone(deep = TRUE) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- unique(unlist(lapply(model$biodiversity, function(i) i$predictors_names), use.names = FALSE)) + pred_prs <- model$predictors_object$get_names() + model$predictors_names <- pred_tmp + model$predictors_types <- model$predictors_types[model$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model$predictors_types <- model$predictors_types[match(model$predictors_names, model$predictors_types$predictors), ] + model$predictors <- dplyr::select(model$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + model$predictors_object <- model$predictors_object$clone(deep = TRUE) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # Run the engine setup script @@ -956,17 +999,20 @@ methods::setMethod( settings = settings) } - # remove unused predictors - pred_tmp <- unique(unlist(lapply(model$biodiversity, function(i) i$predictors_names), use.names = FALSE)) - pred_prs <- model$predictors_object$get_names() - model$predictors_names <- pred_tmp - model$predictors_types <- model$predictors_types[model$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model$predictors_types <- model$predictors_types[match(model$predictors_names, model$predictors_types$predictors), ] - model$predictors <- dplyr::select(model$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - model$predictors_object <- model$predictors_object$clone(deep = TRUE) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- unique(unlist(lapply(model$biodiversity, function(i) i$predictors_names), use.names = FALSE)) + pred_prs <- model$predictors_object$get_names() + model$predictors_names <- pred_tmp + model$predictors_types <- model$predictors_types[model$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model$predictors_types <- model$predictors_types[match(model$predictors_names, model$predictors_types$predictors), ] + model$predictors <- dplyr::select(model$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + model$predictors_object <- model$predictors_object$clone(deep = TRUE) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # Run the engine setup script @@ -991,17 +1037,20 @@ methods::setMethod( model2 <- model model2$biodiversity <- NULL; model2$biodiversity[[id]] <- model$biodiversity[[id]] - # remove unused predictors - pred_tmp <- model2$biodiversity[[1]]$predictors_names - pred_prs <- model$predictors_object$get_names() - model2$predictors_object <- model$predictors_object$clone(deep = TRUE) - model2$predictors_names <- pred_tmp - model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] - model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- model2$biodiversity[[1]]$predictors_names + pred_prs <- model$predictors_object$get_names() + model2$predictors_object <- model$predictors_object$clone(deep = TRUE) + model2$predictors_names <- pred_tmp + model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # remove unused priors @@ -1124,17 +1173,20 @@ methods::setMethod( model2 <- model model2$biodiversity <- NULL; model2$biodiversity[[id]] <- model$biodiversity[[id]] - # remove unused predictors - pred_tmp <- model2$biodiversity[[1]]$predictors_names - pred_prs <- model$predictors_object$get_names() - model2$predictors_object <- model$predictors_object$clone(deep = TRUE) - model2$predictors_names <- pred_tmp - model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] - model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- model2$biodiversity[[1]]$predictors_names + pred_prs <- model$predictors_object$get_names() + model2$predictors_object <- model$predictors_object$clone(deep = TRUE) + model2$predictors_names <- pred_tmp + model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # remove unused priors @@ -1256,17 +1308,20 @@ methods::setMethod( model2 <- model model2$biodiversity <- NULL; model2$biodiversity[[id]] <- model$biodiversity[[id]] - # remove unused predictors - pred_tmp <- model2$biodiversity[[1]]$predictors_names - pred_prs <- model$predictors_object$get_names() - model2$predictors_object <- model$predictors_object$clone(deep = TRUE) - model2$predictors_names <- pred_tmp - model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] - model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- model2$biodiversity[[1]]$predictors_names + pred_prs <- model$predictors_object$get_names() + model2$predictors_object <- model$predictors_object$clone(deep = TRUE) + model2$predictors_names <- pred_tmp + model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # remove unused priors @@ -1412,17 +1467,20 @@ methods::setMethod( model2 <- model model2$biodiversity <- NULL; model2$biodiversity[[id]] <- model$biodiversity[[id]] - # remove unused predictors - pred_tmp <- model2$biodiversity[[1]]$predictors_names - pred_prs <- model$predictors_object$get_names() - model2$predictors_object <- model$predictors_object$clone(deep = TRUE) - model2$predictors_names <- pred_tmp - model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] - model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- model2$biodiversity[[1]]$predictors_names + pred_prs <- model$predictors_object$get_names() + model2$predictors_object <- model$predictors_object$clone(deep = TRUE) + model2$predictors_names <- pred_tmp + model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # remove unused priors @@ -1540,17 +1598,20 @@ methods::setMethod( model2 <- model model2$biodiversity <- NULL; model2$biodiversity[[id]] <- model$biodiversity[[id]] - # remove unused predictors - pred_tmp <- model2$biodiversity[[1]]$predictors_names - pred_prs <- model$predictors_object$get_names() - model2$predictors_object <- model$predictors_object$clone(deep = TRUE) - model2$predictors_names <- pred_tmp - model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] - model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- model2$biodiversity[[id]]$predictors_names + pred_prs <- model$predictors_object$get_names() + model2$predictors_object <- model$predictors_object$clone(deep = TRUE) + model2$predictors_names <- pred_tmp + model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # remove unused priors @@ -1670,17 +1731,20 @@ methods::setMethod( model2 <- model model2$biodiversity <- NULL; model2$biodiversity[[id]] <- model$biodiversity[[id]] - # remove unused predictors - pred_tmp <- model2$biodiversity[[1]]$predictors_names - pred_prs <- model$predictors_object$get_names() - model2$predictors_object <- model$predictors_object$clone(deep = TRUE) - model2$predictors_names <- pred_tmp - model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - # make sure all in same order - model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] - model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) - if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ - model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + # Work around for cases with non-spatial predictors + if(x$predictors$is_spatial()){ + # remove unused predictors + pred_tmp <- model2$biodiversity[[1]]$predictors_names + pred_prs <- model$predictors_object$get_names() + model2$predictors_object <- model$predictors_object$clone(deep = TRUE) + model2$predictors_names <- pred_tmp + model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) + if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ + model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) + } } # remove unused priors diff --git a/R/utils-predictors.R b/R/utils-predictors.R index afb51cdb..b3d86959 100644 --- a/R/utils-predictors.R +++ b/R/utils-predictors.R @@ -1108,6 +1108,101 @@ predictor_check <- function(env){ return(env) } +#' Helper function to obtain the data types for a given predictor set +#' +#' @description +#' This function checks a provided predictor either in the form of a +#' [`SpatRaster`] or [`data.frame`] object. Multidimensional datasets such as +#' \code{'stars'} are also supported. +#' +#' Returns a [`data.frame`] with the name and type of the predictor. +#' +#' @details +#' Variables can be checked on whether they are factors either by type or by +#' values. For example, in cases where very few integer values are found, a factor +#' value could be assumed. +#' +#' If \code{env} is set to NULL, then a dummy [`data.frame`] is returned. +#' +#' @param env A [`data.frame`], [`SpatRaster`] or [`stars`] object with +#' all predictor variables. +#' @param guess_factor A [`logical`] estimation on whether a given variable is likely a +#' factor (Default: \code{FALSE}). +#' @return A [`data.frame`] with the name and type. +#' +#' @examples +#' # Example +#' predictor_type(datasets::trees) +#' +#' @keywords utils, internal +#' +#' @author Martin Jung +#' @noRd +predictor_type <- function(env, guess_factor = FALSE){ + assertthat::assert_that( + is.logical(guess_factor) + ) + + # Internal factor guessing method + gf <- function(x, threshold = 0.05) { + unique_count <- length(unique(x)) + len <- length(x) + + # Heuristic: If the number of unique values is small or < threshold% of total rows, it’s categorical + if(is.character(x) || is.factor(x) || unique_count < max(10, len * threshold)) { + return("factor") + } else { + return("numeric") + } + } + + # Process depending on type + if(is.null(env)){ + result <- data.frame( + predictors = "dummy", + type = "numeric" + ) + } else if(is.data.frame(env)){ + # Get variable names and types for a data frame + result <- data.frame( + predictors = colnames(env), + type = sapply(env, class), + stringsAsFactors = FALSE, + row.names = NULL + ) + # Guess factors if set + if(guess_factor) result$type <- sapply(env, gf) + + } else if(is.Raster(env)){ + # Get variable names + result <- data.frame( + predictors = names(env), + stringsAsFactors = FALSE, + row.names = NULL + ) + # Check for factors + result$type <- ifelse(terra::is.factor(env),"factor", "numeric") + + # Guess factors if set + if(guess_factor) result$type <- terra::global(predictors, function(i) gf(i))[,1] + } else if(inherits(env, "stars")){ + stop("Not yet implemented as no need (yet)?") + } else { + # Dummy + result <- data.frame( + predictors = "dummy", + type = "numeric" + ) + } + + assertthat::assert_that( + is.data.frame(result), + nrow(result)>0 + ) + + return(result) +} + #### Filter predictor functions ---- #' Filter a set of correlated predictors to fewer ones diff --git a/R/utils-spatial.R b/R/utils-spatial.R index f253b08b..2bb5d6dd 100644 --- a/R/utils-spatial.R +++ b/R/utils-spatial.R @@ -932,6 +932,55 @@ emptyraster <- function(x, ...) { # add name, filename, } } +#' @title Create a background layer for prediction. +#' +#' @description This function creates a background layer to be used for predictions. +#' Opposed to [emptyraster()] this function internally works with the class objects. +#' +#' @param x A \code{BiodiversityDistribution} object. +#' +#' @return an empty [`SpatRaster`] object. +#' +#' @keywords internal +#' @noRd +create_background <- function(x) { + assertthat::assert_that( + R6::is.R6(x), + inherits(x, "BiodiversityDistribution") + ) + # Create a background raster + if(is.Waiver(x$predictors)){ + # Create from background + template <- terra::rast( + ext = terra::ext(x$background), + crs = terra::crs(x$background), + res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution + diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 + ) + ) + } else { + # If predictor existing, use them + dat <- x$predictors$get_data() + if(is.Raster(dat)){ + template <- emptyraster(dat) + } else { + template <- terra::rast( + ext = terra::ext(x$background), + crs = terra::crs(x$background), + res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution + diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100 + ) + ) + } + } + # Burn in the background + if(is.Raster(template)){ + template <- terra::rasterize(x$background, template, field = 0) + } + + return(template) +} + #' Function to extract nearest neighbour predictor values of provided points #' #' @description This function performs nearest neighbour matching between diff --git a/ibis.Rproj b/ibis.Rproj index 75e8d80e..3e8e406e 100644 --- a/ibis.Rproj +++ b/ibis.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 7177649d-318e-45cd-bf97-845fed56daea RestoreWorkspace: No SaveWorkspace: No diff --git a/man/PredictorDataset-class.Rd b/man/PredictorDataset-class.Rd index 733d82f3..d1de9c14 100644 --- a/man/PredictorDataset-class.Rd +++ b/man/PredictorDataset-class.Rd @@ -27,6 +27,8 @@ This class describes the PredictorDataset and is used to store covariates within \item{\code{transformed}}{Saves whether the predictors have been transformed somehow.} \item{\code{timeperiod}}{A timeperiod field} + +\item{\code{is.spatial}}{A \code{\link{logical}} flag on whether the predictor is spatial or not.} } \if{html}{\out{}} } @@ -52,6 +54,7 @@ This class describes the PredictorDataset and is used to store covariates within \item \href{#method-PredictorDataset-summary}{\code{PredictorDataset$summary()}} \item \href{#method-PredictorDataset-has_derivates}{\code{PredictorDataset$has_derivates()}} \item \href{#method-PredictorDataset-is_transformed}{\code{PredictorDataset$is_transformed()}} +\item \href{#method-PredictorDataset-is_spatial}{\code{PredictorDataset$is_spatial()}} \item \href{#method-PredictorDataset-get_transformed_params}{\code{PredictorDataset$get_transformed_params()}} \item \href{#method-PredictorDataset-length}{\code{PredictorDataset$length()}} \item \href{#method-PredictorDataset-ncell}{\code{PredictorDataset$ncell()}} @@ -252,14 +255,15 @@ Utility function to clip the predictor dataset by another dataset \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{pol}}{A \code{\link{sf}} object used for cropping the data} +\item{\code{pol}}{A \code{\link{sf}} object used for cropping the data.} \item{\code{apply_time}}{A \code{\link{logical}} flag indicating if time should be acknowledged in cropping.} } \if{html}{\out{
}} } \subsection{Details}{ -This code now also +This code now also is able to determine the temporally closest +layer. In case a \code{\link{data.frame}} exists as predictor, only that is returned. } \subsection{Returns}{ @@ -385,6 +389,19 @@ Predictors have been transformed? \if{html}{\out{
}}\preformatted{PredictorDataset$is_transformed()}\if{html}{\out{
}} } +\subsection{Returns}{ +A \code{\link{logical}} flag. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PredictorDataset-is_spatial}{}}} +\subsection{Method \code{is_spatial()}}{ +Is Predictor dataset spatial? +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PredictorDataset$is_spatial()}\if{html}{\out{
}} +} + \subsection{Returns}{ A \code{\link{logical}} flag. } diff --git a/man/add_predictors.Rd b/man/add_predictors.Rd index 8bd0bbc7..54911da2 100644 --- a/man/add_predictors.Rd +++ b/man/add_predictors.Rd @@ -5,6 +5,7 @@ \alias{add_predictors,BiodiversityDistribution,SpatRasterCollection-method} \alias{add_predictors,BiodiversityDistribution,SpatRaster-method} \alias{add_predictors,BiodiversityDistribution,stars-method} +\alias{add_predictors,BiodiversityDistribution,data.frame-method} \alias{add_predictors,BiodiversityScenario,SpatRaster-method} \alias{add_predictors,BiodiversityScenario,stars-method} \title{Add predictors to a Biodiversity distribution object} @@ -73,6 +74,22 @@ add_predictors( ... ) +\S4method{add_predictors}{BiodiversityDistribution,data.frame}( + x, + env, + names = NULL, + transform = "none", + derivates = "none", + derivate_knots = 4, + int_variables = NULL, + bgmask = TRUE, + harmonize_na = FALSE, + explode_factors = FALSE, + priors = NULL, + state = NULL, + ... +) + \S4method{add_predictors}{BiodiversityScenario,SpatRaster}( x, env, @@ -108,7 +125,7 @@ add_predictors( \arguments{ \item{x}{\code{\link[=distribution]{distribution()}} (i.e. \code{\linkS4class{BiodiversityDistribution}}) object.} -\item{env}{A \code{\link{SpatRaster}} or \code{\link{stars}} object.} +\item{env}{A \code{\link{SpatRaster}}, \code{\link{stars}} or \code{\link{data.frame}} object.} \item{names}{A \code{\link{vector}} of character names describing the environmental stack in case they should be renamed.} @@ -166,9 +183,9 @@ also be combined. Available options for transformation are: \item \code{'pca'} - Converts the predictors to principal components. Note that this results in a renaming of the variables to principal component axes! \item \code{'scale'} - Transforms all predictors by applying \link{scale} on them. -\item \code{'norm'} - Normalizes all predictors by transforming them to a scale from 0 to 1. +\item \code{'norm'} - Normalizes all predictors by transforming them to a scale from \code{0} to \code{1}. \item \code{'windsor'} - Applies a windsorization to the target predictors. By default -this effectively cuts the predictors to the 0.05 and 0.95, thus helping to +this effectively cuts the predictors to the \code{0.05} and \code{0.95}, thus helping to remove extreme outliers. } @@ -199,7 +216,7 @@ estimate. Certain names such \code{"offset"} are forbidden as predictor variable names. The function will return an error message if these are used. -Some engines use binary variables regardless of the parameter \code{explode_factors} +Some engines use binary variables regardless of the parameter \code{"explode_factors"} set here. } \examples{ diff --git a/man/project.Rd b/man/project.Rd index 8556a3a9..bf64057f 100644 --- a/man/project.Rd +++ b/man/project.Rd @@ -4,6 +4,8 @@ \alias{project} \alias{project.BiodiversityScenario} \alias{project,BiodiversityScenario-method} +\alias{project.DistributionModel} +\alias{project,DistributionModel-method} \title{Project a fitted model to a new environment and covariates} \usage{ project.BiodiversityScenario(x, ...) @@ -14,13 +16,26 @@ project.BiodiversityScenario(x, ...) stabilize = FALSE, stabilize_method = "loess", layer = "mean", + env = NULL, + verbose = getOption("ibis.setupmessages", default = TRUE), + ... +) + +project.DistributionModel(x, ...) + +\S4method{project}{DistributionModel}( + x, + env, + layer = "mean", verbose = getOption("ibis.setupmessages", default = TRUE), ... ) } \arguments{ \item{x}{A \code{\link{BiodiversityScenario}} object with set predictors. Note that some -constrains such as \code{MigClim} can still simulate future change without projections.} +constrains such as \code{MigClim} can still simulate future change without projections. +Alternatively a \code{\link{DistributionModel}} object can be supplied if other scenario +functions are not needed. In this case provide a \code{env} parameter value.} \item{...}{passed on parameters.} @@ -35,6 +50,9 @@ applied. Currently supported is \code{`loess`}.} \item{layer}{A \code{\link{character}} specifying the layer to be projected (Default: \code{"mean"}).} +\item{env}{An optional \code{\link{SpatRaster}} or \code{\link{data.frame}} object for prediction. +Ignored unless a \code{\link{DistributionModel}} object is supplied (Default: \code{NULL}).} + \item{verbose}{Setting this \code{\link{logical}} value to \code{TRUE} prints out further information during the model fitting (Default: \code{FALSE}).} }