Skip to content

Commit

Permalink
patch: reinstate date batching within regions
Browse files Browse the repository at this point in the history
 - add more splitting on years
 - add handling for 500 error when end_year is 2024 (how many series have been established this year?)
  • Loading branch information
brownag committed Feb 19, 2024
1 parent 8265a67 commit c147b43
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 40 deletions.
10 changes: 6 additions & 4 deletions .github/workflows/refresh-osd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ name: refresh-osd
on:
schedule:
- cron: '0 0 * * 1'

push:
paths:
- 'refresh-osd.yml'
workflow_dispatch:

# see: https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-syntax-for-github-actions
Expand All @@ -17,7 +19,7 @@ jobs:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

# try fixing a specific firefox version 109.0 for now
- run: |
Expand All @@ -41,7 +43,7 @@ jobs:
with:
name: OSD-data-snapshot
path: "OSD-data-snapshot.zip"
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: SC-data-snapshot
path: "SC-data-snapshot.zip"
Expand All @@ -53,7 +55,7 @@ jobs:
- run: Rscript -e 'system(sprintf("git commit -am \"SC Data Refresh - %s\"", Sys.Date()))'
- run: git push
- name: Upload snapshot to release
uses: svenstaro/upload-release-action@v2
uses: svenstaro/upload-release-action@v3
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: "*-data-snapshot.zip"
Expand Down
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: OSDRegistry
Type: Package
Title: Official Series Description (OSD) Registry
Version: 0.6.1
Version: 0.6.2
Author: Soil Survey Staff
Maintainer: Andrew G. Brown <[email protected]>
Description: Version control solution for Official Series Descriptions (OSDs; <https://soilseries.sc.egov.usda.gov/>) and the Series Classification database. Official "series" are soil types used by the USDA-NRCS and the National Cooperative Soil Survey program.
Expand All @@ -12,6 +12,5 @@ Encoding: UTF-8
LazyData: true
URL: https://github.com/ncss-tech/OSDRegistry
BugReports: https://github.com/ncss-tech/OSDRegistry/issues
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Roxygen: list(markdown = TRUE)

84 changes: 53 additions & 31 deletions R/registry.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Automatic update by regional queries to NASIS soil series server
#'
#' @param test Default: `FALSE`; run on a pair of small regions (MO 3, 7)
#' @param test Default: `FALSE`; run on a pair of small regions (MO 12, 13)
#' @param port Passed to [RSelenium::rsDriver()]. Default: `4567L`.
#' @param moID Region ID codes (Default `1:13`, or `c(3,7)` when `test=TRUE`)
#' @param moID Region ID codes (Default `1:13`, or `c(12, 13)` when `test=TRUE`)
#'
#' @description Text files are written to alphabetical (first letter) folders containing raw Official Series Descriptions (OSDs). This method is for use in automatic pipeline (e.g. a GitHub action) to regularly replicate changes that occur across the entire set of series for commit.
#'
Expand All @@ -23,11 +23,11 @@
#'
#' @importFrom utils unzip write.csv
#' @importFrom RSelenium rsDriver makeFirefoxProfile
refresh_registry <- function(test = FALSE, moID = 1:13, port = 4567L) {
refresh_registry <- function(test = FALSE, moID = c(2, 3, 4, 9, 10, 12, 13), port = 4567L) {

message("Setting up RSelenium...")

if(!requireNamespace("RSelenium"))
if (!requireNamespace("RSelenium"))
stop("package `RSelenium` is required to download ZIP files")

target_dir <- file.path(path.expand("~"), "Downloads") # file.path(getwd(), 'raw')
Expand All @@ -43,10 +43,14 @@ refresh_registry <- function(test = FALSE, moID = 1:13, port = 4567L) {
"moz:firefoxOptions" = list(args = list('--headless'))
)

res <- try(rD <- RSelenium::rsDriver(browser = "firefox",
chromever = NULL,
extraCapabilities = eCaps,
port = as.integer(port)))
res <- try({
rD <- RSelenium::rsDriver(
browser = "firefox",
chromever = NULL,
extraCapabilities = eCaps,
port = as.integer(port)
)
})

## chrome driver setup
# eCaps <- list(chromeOptions =
Expand Down Expand Up @@ -88,39 +92,57 @@ refresh_registry <- function(test = FALSE, moID = 1:13, port = 4567L) {
message("Refreshing OSDs...")

idx <- moID
if(test == TRUE)
idx <- c(3,7)
if (test == TRUE)
idx <- c(12, 13)

# iterate over MO responsible codes 1:13
zips <- character()
for(i in idx) {
res <- .query_series_by_region(remDr, i)
for (i in idx) {

# try up to two additional times
if (inherits(res, 'try-error')) {
res <- .query_series_by_region(remDr, i)
# SWR and NWR have by far the most series, dont bother trying to do in one shot
if (!i %in% c(2, 4)) {
res <- try(.query_series_by_region(remDr, i))

# try up to additional times
if (inherits(res, 'try-error')) {
res <- .query_series_by_region(remDr, i)
res <- try(.query_series_by_region(remDr, i))
}
}

## batching within region no longer needed (it seems)
# if (inherits(res, 'try-error')) {
# res1 <- .query_series_by_region(remDr, i,
# start_year = 1800,
# end_year = 1980)
# res2 <- .query_series_by_region(remDr,
# i,
# start_year = 1980,
# end_year = format(Sys.Date(), "%Y"))
# if (!inherits(res1, 'try-error') &&
# !inherits(res2, 'try-error')) {
# res <- c(res1, res2)
# }
# }
if (i %in% c(2, 4) || inherits(res, 'try-error')) {
res1 <- try(.query_series_by_region(remDr, i,
start_year = 1800,
end_year = 1975))
res2 <- try(.query_series_by_region(remDr, i,
start_year = 1976,
end_year = 1990))
res3 <- try(.query_series_by_region(remDr, i,
start_year = 1991,
end_year = 2005))
res4 <- try(.query_series_by_region(remDr, i,
start_year = 2006,
end_year = format(Sys.Date(), "%Y")))

# TODO: why does above cause 500 error? no established series in current year? strange
if (inherits(res4, 'try-error')) {
res4 <- try(.query_series_by_region(remDr, i,
start_year = 2006,
end_year = as.numeric(format(Sys.Date(), "%Y")) - 1))

}

if (!inherits(res1, 'try-error') &&
!inherits(res2, 'try-error') &&
!inherits(res3, 'try-error') &&
!inherits(res4, 'try-error')) {
res <- c(res1, res2, res3, res4)
} else {
res <- try(stop("splitting region " , i, " by year failed"))
}
}

if (!inherits(res, 'try-error')) {
if (!is.na(res))
if (!is.na(all(res)))
zips <- c(zips, res)
} else {
message(paste0("Error querying OSDs region (", i, ")"))
Expand Down
4 changes: 2 additions & 2 deletions man/refresh_registry.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c147b43

Please sign in to comment.