Skip to content

Commit

Permalink
🚀 Support for data.frames as predictors #136
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin-Jung committed Feb 1, 2025
1 parent 016222e commit 1b6e0cd
Show file tree
Hide file tree
Showing 20 changed files with 750 additions and 366 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ export(predictor_homogenize_na)
export(predictor_transform)
export(priors)
export(project.BiodiversityScenario)
export(project.DistributionModel)
export(pseudoabs_settings)
export(render_html)
export(rm_biodiversity)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# ibis.iSDM 0.1.6 (current dev branch)

#### New features
* Support for [`data.frame`] as predictors in `add_predictors()` #136
* Convenience function to allow [`data.frame`] and [`SpatRaster`] to be supplied directly via `project()` #136

#### Minor improvements and bug fixes
* IIASA internal functionalities such as preparation of GLOBIOM data have been transferred to [BNRTools](https://github.com/iiasa/BNRTools)
Expand Down
74 changes: 70 additions & 4 deletions R/add_predictors.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ NULL
#' create derivates of provided predictors.
#'
#' @param x [distribution()] (i.e. [`BiodiversityDistribution-class`]) object.
#' @param env A [`SpatRaster`] or [`stars`] object.
#' @param env A [`SpatRaster`], [`stars`] or [`data.frame`] object.
#' @param names A [`vector`] of character names describing the environmental
#' stack in case they should be renamed.
#' @param transform A [`vector`] stating whether predictors should be preprocessed
Expand Down Expand Up @@ -49,9 +49,9 @@ NULL
#' * \code{'pca'} - Converts the predictors to principal components. Note that this
#' results in a renaming of the variables to principal component axes!
#' * \code{'scale'} - Transforms all predictors by applying [scale] on them.
#' * \code{'norm'} - Normalizes all predictors by transforming them to a scale from 0 to 1.
#' * \code{'norm'} - Normalizes all predictors by transforming them to a scale from \code{0} to \code{1}.
#' * \code{'windsor'} - Applies a windsorization to the target predictors. By default
#' this effectively cuts the predictors to the 0.05 and 0.95, thus helping to
#' this effectively cuts the predictors to the \code{0.05} and \code{0.95}, thus helping to
#' remove extreme outliers.
#'
#' Available options for creating derivates are:
Expand Down Expand Up @@ -79,7 +79,7 @@ NULL
#' Certain names such \code{"offset"} are forbidden as predictor variable names.
#' The function will return an error message if these are used.
#'
#' Some engines use binary variables regardless of the parameter \code{explode_factors}
#' Some engines use binary variables regardless of the parameter \code{"explode_factors"}
#' set here.
#'
#' @examples
Expand Down Expand Up @@ -292,6 +292,72 @@ methods::setMethod(
}
)

#' @rdname add_predictors
methods::setMethod(
"add_predictors",
methods::signature(x = "BiodiversityDistribution", env = "data.frame"),
function(x, env, names = NULL, priors = NULL, state = NULL, ... ) {
assertthat::assert_that(inherits(x, "BiodiversityDistribution"),
is.data.frame(env),
is.null(names) || assertthat::is.scalar(names) || is.vector(names),
is.null(priors) || inherits(priors,'PriorList'),
is.matrix(state) || is.null(state)
)
# Messenger
if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','green','Adding predictors...')

if(!is.null(names)) {
assertthat::assert_that(colnames(env) == length(names),
all(is.character(names)),
msg = 'Provided names not of same length as environmental data.')
# Set names of env
colnames(env) <- names
}

# Check that all names allowed
problematic_names <- grep("offset|w|weight|spatial_offset|observed|Intercept|spatial.field", names(env),fixed = TRUE)
if( length(problematic_names)>0 ){
cli::cli_abort(paste0("Some predictor names are not allowed as they might interfere with model fitting:", paste0(names(env)[problematic_names],collapse = " | ")))
}

# Make a clone copy of the object
y <- x$clone(deep = TRUE)

# If priors have been set, save them in the distribution object
if(!is.null(priors)) {
assertthat::assert_that( all( priors$varnames() %in% names(env) ) )
y <- y$set_priors(priors)
}

# Grep all vars that are not x or y
vars <- colnames(env)
assertthat::assert_that(all(c("x","y") %in% vars),
msg = "Coordinates x & y need to be in the supplied data.frame!")
vars <- grep("x|y", vars,value = TRUE, invert = TRUE,fixed = FALSE,useBytes = TRUE)
assertthat::assert_that(
nrow(env)>1, length(vars)>0,
msg = "Supplied data.frame not correct?"
)

# Check for attribute to this object to keep track of it
transform <- attr(env,'transform')
if(is.null(transform)) transform <- "none"

# Check whether predictors already exist, if so overwrite
if(!is.Waiver(x$predictors)) myLog('[Setup]','yellow','Overwriting existing predictors.')

# Sanitize names if specified
if(getOption('ibis.cleannames', default = TRUE)) names(env) <- sanitize_names(names(env))

# Finally set the data to the BiodiversityDistribution object
pd <- PredictorDataset$new(id = new_id(),
data = env,
transformed = ifelse('none' %notin% transform, TRUE, FALSE ),
...)
y$set_predictors(pd)
}
)

# Add elevational delineation as predictor ----

#' Create lower and upper limits for an elevational range and add them as
Expand Down
98 changes: 78 additions & 20 deletions R/class-predictors.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ PredictorDataset <- R6::R6Class(
#' @field name A name for this object.
#' @field transformed Saves whether the predictors have been transformed somehow.
#' @field timeperiod A timeperiod field
#' @field is.spatial A [`logical`] flag on whether the predictor is spatial or not.
id = character(0),
data = new_waiver(),
name = character(0),
transformed = logical(0),
timeperiod = new_waiver(),
is.spatial = NULL,

#' @description
#' Initializes the object and creates an empty list
Expand All @@ -50,6 +52,8 @@ PredictorDataset <- R6::R6Class(
for(el in names(dots)){
self[[el]] <- dots[[el]]
}
# Check for spatial
self$is.spatial <- ifelse(is.Raster(data) || inherits(data, "stars"), TRUE, FALSE)
},

#' @description
Expand Down Expand Up @@ -117,7 +121,7 @@ PredictorDataset <- R6::R6Class(
terra::as.data.frame(self$data, xy = TRUE, na.rm = na.rm, ...)
}
} else {
# For scenario files
# For scenario files and other data.frames
as.data.frame( self$data )
}
} else self$data
Expand All @@ -131,25 +135,33 @@ PredictorDataset <- R6::R6Class(
# Get data
d <- self$get_data()
if(is.Waiver(d)) return(new_waiver())
if(!inherits(d, 'stars')){
if(is.Raster(d)){
# Try and get a z dimension from the raster object
terra::time(d)
} else {
# Get dimensions
o <- stars::st_dimensions(d)
# Take third entry as the one likely to be the time variable
return(
to_POSIXct(
stars::st_get_dimension_values(d, names(o)[3], center = TRUE)
if(inherits(d, 'stars')){
# Get dimensions
o <- stars::st_dimensions(d)
# Take third entry as the one likely to be the time variable
return(
to_POSIXct(
stars::st_get_dimension_values(d, names(o)[3], center = TRUE)
)
)
)
} else {
return( new_waiver() )
}
}
},

#' @description
#' Get Projection
#' @return A [`vector`] with the geographical projection of the object.
get_projection = function(){
if(is.data.frame(self$data)){
cli::cli_alert_danger("Predictors not in spatial format but projection requested?")
return( NULL )
}
assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars'))
sf::st_crs(self$data)
},
Expand All @@ -158,6 +170,10 @@ PredictorDataset <- R6::R6Class(
#' Get Resolution
#' @return A [`numeric`] [`vector`] with the spatial resolution of the data.
get_resolution = function(){
if(is.data.frame(self$data)){
cli::cli_alert_danger("Predictors not in spatial format but resolution requested?")
return( NULL )
}
assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars'))
if(is.Raster(self$data)){
terra::res(self$data)
Expand All @@ -170,6 +186,10 @@ PredictorDataset <- R6::R6Class(
#' Get Extent of predictors
#' @return A [`numeric`] [`vector`] with the spatial resolution of the data.
get_ext = function(){
if(is.data.frame(self$data)){
cli::cli_alert_danger("Predictors not in spatial format but extent requested?")
return( NULL )
}
assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars'))
if(is.Raster(self$data)){
terra::ext(self$get_data()) |> sf::st_bbox()
Expand All @@ -181,12 +201,17 @@ PredictorDataset <- R6::R6Class(
#' @description
#' Utility function to clip the predictor dataset by another dataset
#' @details
#' This code now also
#' This code now also is able to determine the temporally closest
#' layer. In case a [`data.frame`] exists as predictor, only that is returned.
#'
#' @param pol A [`sf`] object used for cropping the data
#' @param pol A [`sf`] object used for cropping the data.
#' @param apply_time A [`logical`] flag indicating if time should be acknowledged in cropping.
#' @return Invisible TRUE
crop_data = function(pol, apply_time = FALSE){
if(is.data.frame(self$data)){
cli::cli_abort("Predictors not in spatial format but should be cropped with another layer?")
return( NULL )
}
assertthat::assert_that(is.Raster(self$data) || inherits(self$data,'stars'),
inherits(pol, 'sf'),
is.logical(apply_time),
Expand Down Expand Up @@ -256,6 +281,10 @@ PredictorDataset <- R6::R6Class(
#' @param ... Any other parameters passed on to masking.
#' @return Invisible
mask = function(mask, inverse = FALSE, ...){
if(is.data.frame(self$data)){
cli::cli_abort("Predictors not in spatial format but should be masked with another layer?")
return( NULL )
}
if(inherits(self$get_data(), "stars")) invisible(self)
# Check whether prediction has been created
prediction <- self$get_data(df = FALSE)
Expand All @@ -282,13 +311,18 @@ PredictorDataset <- R6::R6Class(
#' @param value A new [`SpatRaster`] or [`stars`] object.
#' @return This object
set_data = function(value){
assertthat::assert_that(is.Raster(value) || inherits(value, "stars"))
assertthat::assert_that((is.Raster(value) || inherits(value, "stars"))||is.data.frame(value))
if(is.Raster(self$get_data()) && is.Raster(value)){
assertthat::assert_that(is_comparable_raster(self$get_data(), value))
self$data <- suppressWarnings( c(self$get_data(), value) )
} else if(inherits(self$get_data(), "stars") && is.Raster(value)) {
# Stars assumed
self$data <- st_add_raster(self$get_data(), value)
} else if(is.data.frame(self$get_data())) {
# Data.frame supplied
assertthat::assert_that(nrow(value)>0,
nrow(value)==nrow(self$data))
self$data <- cbind(self$get_data(), value)
} else {
# Simply try combining them
self$data <- suppressWarnings( c(self$get_data(), value) )
Expand Down Expand Up @@ -354,10 +388,16 @@ PredictorDataset <- R6::R6Class(
summary( as.data.frame(d) )
)
} else {
# Assume raster
return(
terra::summary( d, digits = digits)
)
if(is.Raster(d)){
# Assume raster
return(
terra::summary( d, digits = digits)
)
} else {
return(
summary(d)
)
}
}
}
rm(d)
Expand All @@ -383,6 +423,15 @@ PredictorDataset <- R6::R6Class(
)
},

#' @description
#' Is Predictor dataset spatial?
#' @return A [`logical`] flag.
is_spatial = function(){
return(
self$is.spatial
)
},

#' @description
#' Get transformation params.
#' @seealso [predictor_transform()]
Expand Down Expand Up @@ -411,6 +460,8 @@ PredictorDataset <- R6::R6Class(
ncell = function() {
if(inherits(self$get_data(),'SpatRaster'))
terra::ncell(self$get_data())
else if(is.data.frame(self$get_data()))
nrow(self$get_data())
else
terra::ncell(self$get_data()) |> as.numeric()
},
Expand All @@ -424,16 +475,23 @@ PredictorDataset <- R6::R6Class(
if(is.Raster(self$data)){
terra::plot( self$data, col = ibis_colours[['viridis_cividis']] )
} else {
# Assume stars scenario files
stars:::plot.stars(self$data, col = ibis_colours[['viridis_cividis']])
if(inherits(self$data, "stars")){
# Assume stars scenario files
stars:::plot.stars(self$data, col = ibis_colours[['viridis_cividis']])
} else {
if(ncol(self$data)>5) cli::cli_alert_info("Printing only the first 5 columns")
if(ncol(self$data)>5) {
graphics::pairs.default(self$data[,1:5])
} else {
graphics::pairs.default(self$data)
}
}
}
graphics::par(par.ori)
}
),

# Any private entries
private = list(
finalize = function() {
}
)
)
25 changes: 7 additions & 18 deletions R/engine_bart.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,14 @@ engine_bart <- function(x,
if(nburn > iter) nburn <- floor( iter / 4)

# Create a background raster
if(is.Waiver(x$predictors)){
# Create from background
template <- terra::rast(
ext = terra::ext(x$background),
crs = terra::crs(x$background),
res = c(diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100, # Simplified assumption for resolution
diff( (sf::st_bbox(x$background)[c(1,3)]) ) / 100
)
)
} else {
# If predictor existing, use them
template <- emptyraster(x$predictors$get_data() )
}
template <- create_background(x)

# Burn in the background
template <- terra::rasterize(x$background, template, field = 0)

# mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases
if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE))
# mask template where all predictor layers are NA; change na.rm = FALSE for complete.cases
if (!is.Waiver(x$predictors)){
if(x$predictors$is_spatial()){
template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE))
}
}

# Set up dbarts control with some parameters, rest default
dc <- dbarts::dbartsControl(keepTrees = TRUE, # Keep trees
Expand Down
Loading

0 comments on commit 1b6e0cd

Please sign in to comment.