Merge pull request #10 from stephenturner/dev

misc fixes
stephenturner · Sep 27, 2024 · a53b93b · a53b93b
2 parents 4f12715 + d0d2be9
commit a53b93b
Show file tree

Hide file tree

Showing 7 changed files with 61 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -15,14 +15,15 @@ Imports:
     ollamar (>= 1.2.1),
     rlang,
     rmarkdown,
+    tibble,
     tidyRSS,
     tinytable
 Suggests: 
     knitr,
     markdown,
     testthat (>= 3.0.0)
+Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
-Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ export(build_prompt_preprint)
 export(build_prompt_subject)
 export(get_preprints)
 export(list_models)
+export(safely_query_rss)
 export(test_connection)
 export(tt_preprints)
 importFrom(ollamar,list_models)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,8 @@
 # biorecap 0.2.1
 
 - Added a `host` argument to `biorecap_report()` and `add_summary()` that gets passed to `ollamar::generate()`, allowing the user to choose the host for ollama. By default, `host=NULL` uses ollama's default base URL (using the local ollama server). This can be used to connect to a llama server running remotely or within a firewalled network (fixes #7).
+- Fixed list of medRxiv subjects (see `?subjects`).
+- Fixed an issue with `get_preprints()` where a valid subject has no preprints associated with it at all.
 - Updated tests.
 
 # biorecap 0.2.0

diff --git a/R/biorecap.R b/R/biorecap.R
@@ -57,6 +57,37 @@ build_prompt_subject <- function(subject,
   return(prompt)
 }
 
+#' Safely query bioRxiv/medRxiv RSS feeds
+#'
+#' @param subject A character vector of valid bioRxiv and/or medRxiv subjects. See [subjects].
+#' @param server A character vector of either "biorxiv" or "medrxiv".
+#'
+#' @return A data frame of preprints from bioRxiv and/or medRxiv.
+#' @export
+#'
+safely_query_rss <- function(subject, server=c("biorxiv", "medrxiv")) {
+  server <- rlang::arg_match(server)
+  baseurl <- sprintf("https://connect.%s.org/%s_xml.php?subject=", server, server)
+  out <-
+    lapply(subject, \(x) {
+      tryCatch({
+        suppressMessages(preprints <- tidyRSS::tidyfeed(paste0(baseurl, x)))
+      },
+      error = function(e) {
+        message("No preprints found for subject: ", x)
+        tibble::tibble(feed_title=character(), feed_link=character(), feed_description=character(), item_title=character(),
+                       item_link=character(), item_description=character(), item_category=list())
+      })
+    })
+  out <-
+    out |>
+    stats::setNames(subject) |>
+    dplyr::bind_rows(.id="subject") |>
+    dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |>
+    dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |>
+    dplyr::mutate("source"=server, .before=1)
+}
+
 #' Get bioRxiv/medRxiv preprints
 #'
 #' @param subject A character vector of valid bioRxiv and/or medRxiv subjects. See [subjects].
@@ -80,26 +111,17 @@ get_preprints <- function(subject="all", clean=TRUE) {
   subject_bio <- subject[subject %in% biorecap::subjects$biorxiv]
   if (length(subject_bio)>0) {
     preprints$bio <-
-      lapply(subject_bio, \(x) suppressMessages(preprints <- tidyRSS::tidyfeed(paste0("https://connect.biorxiv.org/biorxiv_xml.php?subject=", x)))) |>
-      stats::setNames(subject_bio) |>
-      dplyr::bind_rows(.id="subject") |>
-      dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |>
-      dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |>
+      safely_query_rss(subject=subject_bio, server="biorxiv") |>
       dplyr::mutate("source"="bioRxiv", .before=1)
-    if (nrow(preprints$bio)<1L) stop("Something went wrong. No papers found for subject ", subject) #nocov
+    if (nrow(preprints$bio)<1L) warning("Something went wrong. No papers found for subject ", subject) #nocov
   }
 
-
   subject_med <- subject[subject %in% biorecap::subjects$medrxiv]
   if (length(subject_med)>0) {
     preprints$med <-
-      lapply(subject_med, \(x) suppressMessages(preprints <- tidyRSS::tidyfeed(paste0("https://connect.medrxiv.org/medrxiv_xml.php?subject=", x)))) |>
-      stats::setNames(subject_med) |>
-      dplyr::bind_rows(.id="subject") |>
-      dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |>
-      dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |>
+      safely_query_rss(subject=subject_med, server="medrxiv") |>
       dplyr::mutate("source"="medRxiv", .before=1)
-    if (nrow(preprints$med)<1L) stop("Something went wrong. No papers found for subject ", subject) #nocov
+    if (nrow(preprints$med)<1L) warning("Something went wrong. No papers found for subject ", subject) #nocov
   }
 
   preprints <- dplyr::bind_rows(preprints)

diff --git a/data-raw/biorecap_data.R b/data-raw/biorecap_data.R
@@ -39,10 +39,10 @@ subjects$medrxiv <- c("all",
                       "Cardiovascular_Medicine",
                       "Dentistry_and_Oral_Medicine",
                       "Dermatology",
-                      "Dermatology",
+                      "Emergency_Medicine",
                       "endocrinology",
                       "Epidemiology",
-                      "ecology",
+                      "endocrinology",
                       "epidemiology",
                       "Forensic_Medicine",
                       "Gastroenterology",

diff --git a/data/subjects.rda b/data/subjects.rda
diff --git a/man/safely_query_rss.Rd b/man/safely_query_rss.Rd