Skip to content

Commit

Permalink
fix(query): use meta-grid node status url (ideas-lab-nus#82)
Browse files Browse the repository at this point in the history
* fix(query): use meta-grid node status url

* fix(test): add `status` as a global variable

* fix(esgf): fix base url for query

* fix(test): correct experiment name when testing `init_cmip6_index()`

* fix(query): do no check project input

* fix(query): ignore 0-length param when building query url

* fix(query): only build facet cache for specified project

* fix(test): use ceda index node for testing

* fix(query): set timeout to 5 mins when building facet listing

* fix(test): do not hard code shard url

* fix(query): fix NSE in `EsgfQuery$project()` and etc

* fix(query): force parent frame before eval in `eval_with_bang()`
  • Loading branch information
hongyuanjia authored Mar 9, 2024
1 parent 849da10 commit 0a545f8
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 78 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: epwshiftr
Title: Create Future 'EnergyPlus' Weather Files using 'CMIP6' Data
Version: 0.1.3.9014
Version: 0.1.3.9015
Authors@R: c(
person(given = "Hongyuan",
family = "Jia",
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
climate variables (#25).
* Fix the wrong warning messages when `combined` method is used in
`morphing_epw()` (#25).
* Now `get_data_node()` works again (#80)

## Internal refactor

Expand Down
57 changes: 19 additions & 38 deletions R/esgf.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ esgf_query <- function(activity = "ScenarioMIP",
assert_choice(type, choices = c("Dataset", "File"))
assert_character(data_node, any.missing = FALSE, null.ok = TRUE)

url_base <- "http://esgf-node.llnl.gov/esg-search/search/?"
url_base <- "https://esgf-node.llnl.gov/esg-search/search?"

dict <- c(
activity = "activity_id",
Expand Down Expand Up @@ -771,48 +771,29 @@ get_data_dir <- function() {
#'
#' @export
get_data_node <- function(speed_test = FALSE, timeout = 3) {
# read html page
f <- tempfile()
utils::download.file("https://esgf-node.llnl.gov/status/", f, "libcurl", quiet = TRUE)
l <- readLines(f)
# use the metagrid-backend to get the data node status
# see: https://github.com/esgf2-us/metagrid/blob/2e90dd10317506a82f120217e39c4a3cde6a7560/backend/.envs/.django#L30
# https://github.com/ESGF/esgf-utils/blob/master/node_status/query_prom.py
res <- tryCatch(
jsonlite::fromJSON("https://aims2.llnl.gov/metagrid-backend/proxy/status"),
error = function(e) NULL
)

# locate table
l_s <- grep("<!--load block main-->", l, fixed = TRUE)
# nocov start
if (!length(l_s)) stop("Internal Error: Failed to read data node table")
# nocov end
l <- l[l_s:length(l)]
l_s <- grep("<table>", l, fixed = TRUE)[1L]
l_e <- grep("</table>", l, fixed = TRUE)[1L]
# nocov start
if (!length(l_s) || !length(l_e)) stop("Internal Error: Failed to read data node table")
if (is.null(res) || res$status != "success") {
message("Failed to retrieve the data node status from aims2.llnl.gov.")
return(data.table::data.table())
}
# nocov end
l <- l[l_s:l_e]

# extract nodes
loc <- regexec("\\t<td>(.+)</td>", l)
nodes <- vapply(seq_along(l), function(i) {
if (all(loc[[i]][1] == -1L)) {
return(NA_character_)
}
substr(l[i], loc[[i]][2], loc[[i]][2] + attr(loc[[i]], "match.length")[2] - 1L)
}, NA_character_)
nodes <- nodes[!is.na(nodes)]

# extract status
loc <- regexec('\\t\\t<font color="#\\S{6}"><b>(UP|DOWN)</b>', l)
status <- vapply(seq_along(l), function(i) {
if (all(loc[[i]][1] == -1L)) {
return(NA_character_)
}
substr(l[i], loc[[i]][2], loc[[i]][2] + attr(loc[[i]], "match.length")[2] - 1L)
}, NA_character_)
status <- status[!is.na(status)]
res <- data.table::data.table(
data_node = res$data$result$metric$instance,
status = data.table::fifelse(
vapply(res$data$result$value, .subset2, character(1L), 2L) == "1",
"UP", "DOWN"
)
)

# nocov start
if (length(nodes) != length(status)) stop("Internal Error: Failed to read data node table")
# nocov end
res <- data.table::data.table(data_node = nodes, status = status)
data.table::setorderv(res, "status", -1)

if (!speed_test) {
Expand Down
98 changes: 82 additions & 16 deletions R/query.R
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,12 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
project = function(value = "CMIP6") {
if (missing(value)) return(private$param_project)
private$param_project <- private$new_facet_param("project", value)
self
# See: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_project <- eval(bquote(
private$new_facet_param("project", .(substitute(value)), env = env)
))
invisible(self)
},

#' @description
Expand Down Expand Up @@ -417,8 +421,12 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
activity_id = function(value) {
if (missing(value)) return(private$param_activity_id)
private$param_activity_id <- private$new_facet_param("activity_id", value)
self
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_activity_id <- eval(bquote(
private$new_facet_param("activity_id", .(substitute(value)), env = env)
))
invisible(self)
},

#' @description
Expand Down Expand Up @@ -446,7 +454,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
experiment_id = function(value) {
if (missing(value)) return(private$param_experiment_id)
private$param_experiment_id <- private$new_facet_param("experiment_id", value)
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_experiment_id <- eval(bquote(
private$new_facet_param("experiment_id", .(substitute(value)), env = env)
))
invisible(self)
},

Expand Down Expand Up @@ -475,7 +487,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
source_id = function(value) {
if (missing(value)) return(private$param_source_id)
private$param_source_id <- private$new_facet_param("source_id", value)
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_source_id <- eval(bquote(
private$new_facet_param("source_id", .(substitute(value)), env = env)
))
invisible(self)
},

Expand Down Expand Up @@ -504,7 +520,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
variable_id = function(value) {
if (missing(value)) return(private$param_variable_id)
private$param_variable_id <- private$new_facet_param("variable_id", value)
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_variable_id <- eval(bquote(
private$new_facet_param("variable_id", .(substitute(value)), env = env)
))
invisible(self)
},

Expand Down Expand Up @@ -533,7 +553,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
frequency = function(value) {
if (missing(value)) return(private$param_frequency)
private$param_frequency <- private$new_facet_param("frequency", value)
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_frequency <- eval(bquote(
private$new_facet_param("frequency", .(substitute(value)), env = env)
))
invisible(self)
},

Expand Down Expand Up @@ -562,7 +586,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
variant_label = function(value) {
if (missing(value)) return(private$param_variant_label)
private$param_variant_label <- private$new_facet_param("variant_label", value)
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_variant_label <- eval(bquote(
private$new_facet_param("variant_label", .(substitute(value)), env = env)
))
invisible(self)
},

Expand Down Expand Up @@ -591,7 +619,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
nominal_resolution = function(value) {
if (missing(value)) return(private$param_nominal_resolution)
param <- private$new_facet_param("nominal_resolution", value)
# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
param <- eval(bquote(
private$new_facet_param("nominal_resolution", .(substitute(value)), env = env)
))

if (!is.null(param)) {
# handle nominal resolution specially
Expand Down Expand Up @@ -637,7 +669,11 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
#' }
data_node = function(value) {
if (missing(value)) return(private$param_data_node)
private$param_data_node <- private$new_facet_param("data_node", value)
# See: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
env <- parent.frame()
private$param_data_node <- eval(bquote(
private$new_facet_param("data_node", .(substitute(value)), env = env)
))
invisible(self)
},

Expand Down Expand Up @@ -987,7 +1023,8 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
return(invisible(self))
}

params <- eval_with_bang(...)
env <- parent.frame()
params <- eval_with_bang(..., .env = env)
checkmate::assert_list(
lapply(params, .subset2, "value"),
# allow NULL
Expand Down Expand Up @@ -1319,6 +1356,10 @@ EsgfQuery <- R6::R6Class("EsgfQuery",
if (length(choices)) {
choices <- gsub("(?<=/solr).+", "", choices, perl = TRUE)
}
} else if (facet == "project") {
# TODO: find a way to programmatically get all project names
# right now no validation is performed
return(value)
} else {
choices <- self$list_all_values(facet)
}
Expand All @@ -1327,7 +1368,22 @@ EsgfQuery <- R6::R6Class("EsgfQuery",

new_facet_param = function(facet, value, allow_negate = TRUE, env = parent.frame()) {
if (allow_negate) {
value <- eval(substitute(eval_with_bang(value)[[1L]], env))
# NOTE: We have to force `env` first otherwise R's lazy
# evaluation feature will cause lexical scoping error
# Take a look at the following example:
# q <- 1
# f1 <- function(x) {
# eval(bquote(f2(.(substitute(x)), parent.frame())))
# }
# f2 <- function(y, env = parent.frame()) {
# print(eval(substitute(y), env))
# }
#
# f1(q) will return `base::q()` instead of 1
force(env)

# see: https://stackoverflow.com/questions/75543796/how-to-use-substitute-and-quote-with-nested-functions-in-r
value <- eval(bquote(eval_with_bang(.(substitute(value)), .env = env)[[1L]]))
} else {
value <- list(value = value, negate = FALSE)
}
Expand Down Expand Up @@ -1401,7 +1457,7 @@ query_build <- function(host, params, type = "search") {
}

# skip empty parameter
params <- params[!vapply(params, is.null, logical(1L))]
params <- params[vapply(params, length, integer(1L)) > 0L]

if (type == "wget") {
params <- params[!names(params) %in% c("type", "format")]
Expand All @@ -1416,10 +1472,20 @@ query_build <- function(host, params, type = "search") {
}

query_build_facet_cache <- function(host, project = "CMIP6") {
# NOTE: not all index nodes support facet listing without project one
# example is https://esgf-node.llnl.gov/esg-search/search
# It will return status '500' and 'Read timed out' for queries with large
# size. So currently we only support facet cache for a single project

# set the timeout to 5 minutes temporarily
old <- getOption("timeout")
on.exit(options(timeout = old), add = TRUE)
options(timeout = 300)

# build a query without project to get facet names and values
url <- query_build(host,
list(
project = NULL,
project = project,
facets = "*",
limit = 0,
distrib = TRUE,
Expand All @@ -1431,7 +1497,7 @@ query_build_facet_cache <- function(host, project = "CMIP6") {
# build a query with project to get the Shards
url <- query_build(host,
list(
project = "CMIP6",
project = project,
limit = 0,
distrib = TRUE,
format = "application/solr+json"
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,5 @@ utils::globalVariables(c(
"variable", "wmo_number", "file_mtime", "i.file_path", "i.interval",
"interval", "time_calendar", "time_units", "overlap", "frequency",
"ind_lon", "ind_lat", "ord_lon", "ord_lat", "dist", "num_years", "type",
"year"
"year", "status"
))
2 changes: 1 addition & 1 deletion tests/testthat/test-esgf.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ test_that("init_cmip6_index()", {

expect_s3_class(
idx <- init_cmip6_index(variable = "tas", source = "EC-Earth3",
experiment = "ssp858", years = 2060, limit = 1, save = TRUE
experiment = "ssp585", years = 2060, limit = 1, save = TRUE
),
"data.table"
)
Expand Down
Loading

0 comments on commit 0a545f8

Please sign in to comment.