From 25f29105850ee50161ab607bba5d5b502408167b Mon Sep 17 00:00:00 2001 From: Michael Mahoney Date: Mon, 18 Dec 2023 20:31:26 -0500 Subject: [PATCH] Warp non-VRT output files (#15) * Warp non-VRT output files Fixes #13 * Add test, fix condition --- R/stack_rasters.R | 85 ++++++++++++++++++++++++----- man/stack_rasters.Rd | 46 +++++++++++----- tests/testthat/test-stack_rasters.R | 36 ++++++++++++ vignettes/rsi.Rmd | 3 +- 4 files changed, 139 insertions(+), 31 deletions(-) diff --git a/R/stack_rasters.R b/R/stack_rasters.R index a018139..f97a68e 100644 --- a/R/stack_rasters.R +++ b/R/stack_rasters.R @@ -1,18 +1,22 @@ -#' Create and save a multi-band VRT by combining input rasters +#' Create and save a multi-band output raster by combining input rasters #' -#' This function creates a VRT that "stacks" all the bands of its input rasters, -#' as though they were loaded one after another into a GIS. The VRT is fast -#' to create and does not require much space, but does require the input rasters -#' not be moved or altered. Run -#' `sf::gdal_utils("warp", output_filename, "some_path.tif")` to turn the output -#' VRT into a standalone TIFF file. +#' This function creates an output raster that "stacks" all the bands of its +#' input rasters, as though they were loaded one after another into a GIS. It +#' does this by first constructing a GDAL virtual raster, or "VRT", and then +#' optionally uses GDAL's warper to convert this VRT into a standalone file. +#' The VRT is fast to create and does not require much space, but does require +#' the input rasters not be moved or altered. Creating a standalone raster from +#' this file may take a long time and a large amount of disk space. #' -#' @param rasters A list or character vector of file paths to rasters to combine -#' into a single multi-band raster. Files will be read by [terra::rast()]. -#' Rasters will be "stacked" upon one another, preserving values. -#' They must share a CRS. -#' @param output_filename The location to save the final "stacked" raster. Must be -#' a VRT file. +#' @param rasters A list of rasters to combine into a single multi-band raster, +#' either as SpatRaster objects from [terra::rast()] or character file paths +#' to files that can be read by [terra::rast()]. Rasters will be "stacked" upon +#' one another, preserving values. They must share CRS. +#' @param output_filename The location to save the final "stacked" raster. If +#' this filename has a "vrt" extension as determined by `tools::file_ext()`, +#' then this function exits after creating a VRT; otherwise, this function will +#' create a VRT and then use `sf::gdal_utils("warp")` to convert the VRT into +#' another format. #' @inheritParams rlang::args_dots_empty #' @param resolution Numeric of length 2, representing the target X and Y #' resolution of the output raster. If only a single value is provided, it will @@ -29,6 +33,12 @@ #' @param band_names Either a character vector of band names, or a function that #' when given a character vector of band names, returns a character vector of #' the same length containing new band names. +#' @param gdalwarp_options Options passed to `gdalwarp` through the `options` +#' argument of [sf::gdal_utils()]. This argument is ignored (with a warning) +#' if `output_filename` is a VRT. +#' @param gdal_config_options Options passed to `gdalwarp` through the +#' `config_options` argument of [sf::gdal_utils()]. This argument is ignored +#' (with a warning) if `output_filename` is a VRT. #' #' @returns `output_filename`, unchanged. #' @@ -49,7 +59,26 @@ stack_rasters <- function(rasters, extent, reference_raster = 1, resampling_method = "bilinear", - band_names) { + band_names, + gdalwarp_options = c( + "-r", "bilinear", + "-multi", + "-overwrite", + "-co", "COMPRESS=DEFLATE", + "-co", "PREDICTOR=2", + "-co", "NUM_THREADS=ALL_CPUS" + ), + gdal_config_options = c( + VSI_CACHE = "TRUE", + GDAL_CACHEMAX = "30%", + VSI_CACHE_SIZE = "10000000", + GDAL_HTTP_MULTIPLEX = "YES", + GDAL_INGESTED_BYTES_AT_OPEN = "32000", + GDAL_DISABLE_READDIR_ON_OPEN = "EMPTY_DIR", + GDAL_HTTP_VERSION = "2", + GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = "YES", + GDAL_NUM_THREADS = "ALL_CPUS" + )) { rlang::check_dots_empty() check_type_and_length( @@ -67,6 +96,15 @@ stack_rasters <- function(rasters, out_dir <- dirname(output_filename) + use_warper <- tolower(tools::file_ext(output_filename)) != "vrt" + + if (!use_warper && (!missing(gdal_config_options) || !missing(gdalwarp_options))) { + rlang::warn( + "`gdal_config_options` and `gdalwarp_options` are both ignored when `output_filename` ends in 'vrt'.", + class = "rsi_gdal_options_ignored" + ) + } + if (!(reference_raster %in% seq_along(rasters) || reference_raster %in% names(rasters))) { if (is.numeric(reference_raster)) { @@ -212,6 +250,12 @@ stack_rasters <- function(rasters, band_no <- band_no + 1 } + vrt_destination <- ifelse( + use_warper, + tempfile(fileext = ".vrt"), + output_filename + ) + band_def <- grep("VRTRasterBand", vrt_container) writeLines( c( @@ -219,7 +263,18 @@ stack_rasters <- function(rasters, unlist(vrt_bands), vrt_container[(band_def[[2]] + 1):length(vrt_container)] ), - output_filename + vrt_destination ) + + if (use_warper) { + sf::gdal_utils( + "warp", + source = vrt_destination, + destination = output_filename, + options = gdalwarp_options, + config_options = gdal_config_options + ) + } + output_filename } diff --git a/man/stack_rasters.Rd b/man/stack_rasters.Rd index 4db0974..b2d45d7 100644 --- a/man/stack_rasters.Rd +++ b/man/stack_rasters.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/stack_rasters.R \name{stack_rasters} \alias{stack_rasters} -\title{Create and save a multi-band VRT by combining input rasters} +\title{Create and save a multi-band output raster by combining input rasters} \usage{ stack_rasters( rasters, @@ -12,17 +12,26 @@ stack_rasters( extent, reference_raster = 1, resampling_method = "bilinear", - band_names + band_names, + gdalwarp_options = c("-r", "bilinear", "-multi", "-overwrite", "-co", + "COMPRESS=DEFLATE", "-co", "PREDICTOR=2", "-co", "NUM_THREADS=ALL_CPUS"), + gdal_config_options = c(VSI_CACHE = "TRUE", GDAL_CACHEMAX = "30\%", VSI_CACHE_SIZE = + "10000000", GDAL_HTTP_MULTIPLEX = "YES", GDAL_INGESTED_BYTES_AT_OPEN = "32000", + GDAL_DISABLE_READDIR_ON_OPEN = "EMPTY_DIR", GDAL_HTTP_VERSION = "2", + GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = "YES", GDAL_NUM_THREADS = "ALL_CPUS") ) } \arguments{ -\item{rasters}{A list or character vector of file paths to rasters to combine -into a single multi-band raster. Files will be read by \code{\link[terra:rast]{terra::rast()}}. -Rasters will be "stacked" upon one another, preserving values. -They must share a CRS.} +\item{rasters}{A list of rasters to combine into a single multi-band raster, +either as SpatRaster objects from \code{\link[terra:rast]{terra::rast()}} or character file paths +to files that can be read by \code{\link[terra:rast]{terra::rast()}}. Rasters will be "stacked" upon +one another, preserving values. They must share CRS.} -\item{output_filename}{The location to save the final "stacked" raster. Must be -a VRT file.} +\item{output_filename}{The location to save the final "stacked" raster. If +this filename has a "vrt" extension as determined by \code{tools::file_ext()}, +then this function exits after creating a VRT; otherwise, this function will +create a VRT and then use \code{sf::gdal_utils("warp")} to convert the VRT into +another format.} \item{...}{These dots are for future extensions and must be empty.} @@ -45,17 +54,26 @@ resolutions.} \item{band_names}{Either a character vector of band names, or a function that when given a character vector of band names, returns a character vector of the same length containing new band names.} + +\item{gdalwarp_options}{Options passed to \code{gdalwarp} through the \code{options} +argument of \code{\link[sf:gdal_utils]{sf::gdal_utils()}}. This argument is ignored (with a warning) +if \code{output_filename} is a VRT.} + +\item{gdal_config_options}{Options passed to \code{gdalwarp} through the +\code{config_options} argument of \code{\link[sf:gdal_utils]{sf::gdal_utils()}}. This argument is ignored +(with a warning) if \code{output_filename} is a VRT.} } \value{ \code{output_filename}, unchanged. } \description{ -This function creates a VRT that "stacks" all the bands of its input rasters, -as though they were loaded one after another into a GIS. The VRT is fast -to create and does not require much space, but does require the input rasters -not be moved or altered. Run -\code{sf::gdal_utils("warp", output_filename, "some_path.tif")} to turn the output -VRT into a standalone TIFF file. +This function creates an output raster that "stacks" all the bands of its +input rasters, as though they were loaded one after another into a GIS. It +does this by first constructing a GDAL virtual raster, or "VRT", and then +optionally uses GDAL's warper to convert this VRT into a standalone file. +The VRT is fast to create and does not require much space, but does require +the input rasters not be moved or altered. Creating a standalone raster from +this file may take a long time and a large amount of disk space. } \examples{ stack_rasters( diff --git a/tests/testthat/test-stack_rasters.R b/tests/testthat/test-stack_rasters.R index 2eda1d4..b7502eb 100644 --- a/tests/testthat/test-stack_rasters.R +++ b/tests/testthat/test-stack_rasters.R @@ -14,6 +14,29 @@ test_that("stack_rasters works", { ) }) +test_that("stack_rasters works with non-VRT outputs", { + expect_no_error( + out_tif <- stack_rasters( + list( + system.file("rasters/example_sentinel1.tif", package = "rsi") + ), + tempfile(fileext = ".tif") + ) + ) + + # the re-compression means we don't expect this to necessarily be the same size + # but it should still be a decent sized file, not just a text file + expect_true( + file.info(out_tif)$size > + (file.info(system.file("rasters/example_sentinel1.tif", package = "rsi"))$size / 2) + ) + + expect_equal( + terra::values(terra::rast(out_tif, drivers = "GTiff")), + terra::values(terra::rast(system.file("rasters/example_sentinel1.tif", package = "rsi"))) + ) +}) + test_that("stack_rasters fails when reference_raster isn't in the vector", { expect_error( stack_rasters( @@ -96,6 +119,19 @@ test_that("stack_rasters fails when rasters are not character vectors", { ) }) +test_that("stack_rasters warns when arguments are being ignored", { + expect_warning( + stack_rasters( + list( + system.file("rasters/example_sentinel1.tif", package = "rsi") + ), + tempfile(fileext = ".vrt"), + gdalwarp_options = c("THIS IS IGNORED") + ), + class = "rsi_gdal_options_ignored" + ) +}) + test_that("type_and_length checks", { expect_snapshot( stack_rasters("a", c("a", "b")), diff --git a/vignettes/rsi.Rmd b/vignettes/rsi.Rmd index e79d044..b41be66 100644 --- a/vignettes/rsi.Rmd +++ b/vignettes/rsi.Rmd @@ -87,8 +87,7 @@ filter_platforms(platforms = "Landsat-OLI") |> There's an equivalent function to filter indices based upon the bands that require. For instance, we can filter the list to only indices that use the red and blue band of images: ```{r} -filter_bands(bands = c("R", "B")) |> - head() +filter_bands(bands = c("R", "B")) ``` Arguments to these functions let you control whether you're looking for indices that match _all_ platforms and bands you've specified, or _any_ of them.