Skip to content

Commit

Permalink
Make get_stac_data() more modular (#54)
Browse files Browse the repository at this point in the history
* Make item filter function always a function

* Refactor non-download pieces out of download

* Separate progress bars out for each subfunction

* Save test files to tmp

* Remove noop

* Build symmetry

* Remove mask_band argument

* Make function calls identical

* Single download function call

* Edit rescale_bands first

* Name assets in simple method

* Remove nesting

* Fix simple download

* Reduce download forks

* Start making the download function prettier

* Start working on exposing composite, download functions

And style

* Rename default_query_function()

* Use new function name

* CHECK notes

* Fix progress bars

* Add NEWS

* Add returns documentation

* Clean up slightly

* Fixes #53

* Add new functions to pkgdown

* Fix snaps

* Add common use cases vignette

Fixes #51
  • Loading branch information
mikemahoney218 authored Mar 28, 2024
1 parent 6e8a240 commit 4a74c00
Show file tree
Hide file tree
Showing 25 changed files with 578 additions and 310 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Imports:
future.apply,
glue,
jsonlite,
lifecycle,
proceduralnames,
rlang,
rstac,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export(get_sentinel2_imagery)
export(get_stac_data)
export(landsat_mask_function)
export(landsat_platform_filter)
export(rsi_download_rasters)
export(rsi_query_api)
export(sentinel2_mask_function)
export(sign_planetary_computer)
export(spectral_indices)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# rsi (development version)

* Progress bars have been split into separate bars for downloading, masking,
compositing and so on.

* `get_stac_data()` gains an argument, `download_function`, which takes a
`STACItemCollection` object and returns a data frame, where columns correspond
to distinct assets, rows correspond to distinct items, and cells contain file
paths to the downloaded data.

* `rsi_download_rasters()` is a new function that exposes how `get_stac_data()`
downloads assets.

* `default_query_function()` has been renamed to `rsi_query_api()`. Please
update any code using the old name; it will be removed in a future release.

* `get_alos_palsar_imagery()` and `alos_palsar_mask_function()` are new
functions to help you get and mask ALOS PALSAR imagery, respectively.

Expand Down
36 changes: 36 additions & 0 deletions R/deprecated.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' Deprecated functions
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' These functions have been deprecated in favor of better approaches.
#'
#' * `default_query_function()` was renamed to `rsi_query_api()`. These
#' functions are identical, and the older name will be removed in a future
#' release.
#'
#' @name deprecated
#' @keywords internal
#' @export
default_query_function <- function(bbox,
stac_source,
collection,
start_date,
end_date,
limit,
...) {
lifecycle::deprecate_warn(
"0.2.0",
"default_query_function()",
"rsi_query_api()"
)
rsi_query_api(
bbox = bbox,
stac_source = stac_source,
collection = collection,
start_date = start_date,
end_date = end_date,
limit = limit,
...
)
}
135 changes: 135 additions & 0 deletions R/download.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#' Download specific assets from a set of STAC items
#'
#' @param items A `StacItemCollection` object, as returned by [rsi_query_api()].
#' @param aoi Either an sf(c) object outlining the area of interest to get
#' imagery for, or a `bbox` image containing the bounding box of your AOI.
#' @param merge Logical: for each asset, should data from multiple items be
#' merged into a single downloaded file? If `TRUE`, this returns a single file
#' for each asset, which has been merged via gdalwarp. No resampling or
#' compositing is performed, but rather each pixel uses the last data
#' downloaded. This is fast, but precludes per-item masking and compositing.
#' If `FALSE`, each asset from each item is saved as a separate file.
#' @inheritParams get_stac_data
#'
#' @returns A data frame where columns correspond to distinct assets, rows
#' correspond to distinct items, and cells contain file paths to the downloaded
#' data.
#'
#' @export
rsi_download_rasters <- function(items,
aoi,
asset_names,
sign_function = NULL,
merge = FALSE,
gdalwarp_options = c(
"-r", "bilinear",
"-multi",
"-overwrite",
"-co", "COMPRESS=DEFLATE",
"-co", "PREDICTOR=2",
"-co", "NUM_THREADS=ALL_CPUS"
),
gdal_config_options = c(
VSI_CACHE = "TRUE",
GDAL_CACHEMAX = "30%",
VSI_CACHE_SIZE = "10000000",
GDAL_HTTP_MULTIPLEX = "YES",
GDAL_INGESTED_BYTES_AT_OPEN = "32000",
GDAL_DISABLE_READDIR_ON_OPEN = "EMPTY_DIR",
GDAL_HTTP_VERSION = "2",
GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = "YES",
GDAL_NUM_THREADS = "ALL_CPUS"
),
...) {
if (!inherits(aoi, "bbox")) aoi <- sf::st_bbox(aoi)

check_type_and_length(
merge = logical(1)
)

n_tiles_out <- ifelse(merge, 1L, length(items$features))
p <- build_progressr(length(names(asset_names)) * n_tiles_out)

download_locations <- data.frame(
matrix(
data = replicate(
length(asset_names) * n_tiles_out,
tempfile(fileext = ".tif")
),
ncol = length(asset_names),
nrow = n_tiles_out
)
)
names(download_locations) <- names(asset_names)

if (merge) {
gdalwarp_options <- set_gdalwarp_extent(gdalwarp_options, aoi, NULL)
}

asset_iterator <- ifelse(
merge || (n_tiles_out < ncol(download_locations)),
function(...) future.apply::future_lapply(..., future.seed = TRUE),
lapply
)

current_options <- gdalwarp_options

asset_iterator(
names(download_locations),
function(asset) {
feature_iter <- seq_len(length(items$features))
if (length(download_locations[[asset]]) == 1) {
feature_iter <- list(feature_iter)
}

future.apply::future_mapply(
function(which_item, dl_location) {
p(glue::glue("Downloading {asset}"))
signed_items <- maybe_sign_items(items, sign_function)
url <- rstac::assets_url(signed_items, asset)[which_item]

if (!merge) {
item_bbox <- items$features[[which_item]]$bbox
current_options <- set_gdalwarp_extent(
gdalwarp_options,
aoi,
item_bbox
)
}

tryCatch(
{
sf::gdal_utils(
"warp",
paste0("/vsicurl/", url),
dl_location,
options = current_options,
quiet = TRUE,
config_options = gdal_config_options
)
},
error = function(e) {
rlang::warn(
glue::glue(
"Failed to download {items$features[[which_item]]$id %||% 'UNKNOWN'} from {items$features[[which_item]]$properties$datetime %||% 'UNKNOWN'}" # nolint
)
)
download_locations[which_item, ] <- NA
}
)
},
which_item = feature_iter,
dl_location = download_locations[[asset]],
future.seed = TRUE
)
}
)
as.data.frame(as.list(stats::na.omit(download_locations)))
}

maybe_sign_items <- function(items, sign_function) {
if (!is.null(sign_function)) {
items <- sign_function(items)
}
items
}
Loading

0 comments on commit 4a74c00

Please sign in to comment.