Skip to content

Commit

Permalink
Merge pull request #10 from stephenturner/dev
Browse files Browse the repository at this point in the history
misc fixes
  • Loading branch information
stephenturner authored Sep 27, 2024
2 parents 4f12715 + d0d2be9 commit a53b93b
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 16 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ Imports:
ollamar (>= 1.2.1),
rlang,
rmarkdown,
tibble,
tidyRSS,
tinytable
Suggests:
knitr,
markdown,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(build_prompt_preprint)
export(build_prompt_subject)
export(get_preprints)
export(list_models)
export(safely_query_rss)
export(test_connection)
export(tt_preprints)
importFrom(ollamar,list_models)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# biorecap 0.2.1

- Added a `host` argument to `biorecap_report()` and `add_summary()` that gets passed to `ollamar::generate()`, allowing the user to choose the host for ollama. By default, `host=NULL` uses ollama's default base URL (using the local ollama server). This can be used to connect to a llama server running remotely or within a firewalled network (fixes #7).
- Fixed list of medRxiv subjects (see `?subjects`).
- Fixed an issue with `get_preprints()` where a valid subject has no preprints associated with it at all.
- Updated tests.

# biorecap 0.2.0
Expand Down
48 changes: 35 additions & 13 deletions R/biorecap.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,37 @@ build_prompt_subject <- function(subject,
return(prompt)
}

#' Safely query bioRxiv/medRxiv RSS feeds
#'
#' @param subject A character vector of valid bioRxiv and/or medRxiv subjects. See [subjects].
#' @param server A character vector of either "biorxiv" or "medrxiv".
#'
#' @return A data frame of preprints from bioRxiv and/or medRxiv.
#' @export
#'
safely_query_rss <- function(subject, server=c("biorxiv", "medrxiv")) {
server <- rlang::arg_match(server)
baseurl <- sprintf("https://connect.%s.org/%s_xml.php?subject=", server, server)
out <-
lapply(subject, \(x) {
tryCatch({
suppressMessages(preprints <- tidyRSS::tidyfeed(paste0(baseurl, x)))
},
error = function(e) {
message("No preprints found for subject: ", x)
tibble::tibble(feed_title=character(), feed_link=character(), feed_description=character(), item_title=character(),
item_link=character(), item_description=character(), item_category=list())
})
})
out <-
out |>
stats::setNames(subject) |>
dplyr::bind_rows(.id="subject") |>
dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |>
dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |>
dplyr::mutate("source"=server, .before=1)
}

#' Get bioRxiv/medRxiv preprints
#'
#' @param subject A character vector of valid bioRxiv and/or medRxiv subjects. See [subjects].
Expand All @@ -80,26 +111,17 @@ get_preprints <- function(subject="all", clean=TRUE) {
subject_bio <- subject[subject %in% biorecap::subjects$biorxiv]
if (length(subject_bio)>0) {
preprints$bio <-
lapply(subject_bio, \(x) suppressMessages(preprints <- tidyRSS::tidyfeed(paste0("https://connect.biorxiv.org/biorxiv_xml.php?subject=", x)))) |>
stats::setNames(subject_bio) |>
dplyr::bind_rows(.id="subject") |>
dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |>
dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |>
safely_query_rss(subject=subject_bio, server="biorxiv") |>
dplyr::mutate("source"="bioRxiv", .before=1)
if (nrow(preprints$bio)<1L) stop("Something went wrong. No papers found for subject ", subject) #nocov
if (nrow(preprints$bio)<1L) warning("Something went wrong. No papers found for subject ", subject) #nocov
}


subject_med <- subject[subject %in% biorecap::subjects$medrxiv]
if (length(subject_med)>0) {
preprints$med <-
lapply(subject_med, \(x) suppressMessages(preprints <- tidyRSS::tidyfeed(paste0("https://connect.medrxiv.org/medrxiv_xml.php?subject=", x)))) |>
stats::setNames(subject_med) |>
dplyr::bind_rows(.id="subject") |>
dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |>
dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |>
safely_query_rss(subject=subject_med, server="medrxiv") |>
dplyr::mutate("source"="medRxiv", .before=1)
if (nrow(preprints$med)<1L) stop("Something went wrong. No papers found for subject ", subject) #nocov
if (nrow(preprints$med)<1L) warning("Something went wrong. No papers found for subject ", subject) #nocov
}

preprints <- dplyr::bind_rows(preprints)
Expand Down
4 changes: 2 additions & 2 deletions data-raw/biorecap_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ subjects$medrxiv <- c("all",
"Cardiovascular_Medicine",
"Dentistry_and_Oral_Medicine",
"Dermatology",
"Dermatology",
"Emergency_Medicine",
"endocrinology",
"Epidemiology",
"ecology",
"endocrinology",
"epidemiology",
"Forensic_Medicine",
"Gastroenterology",
Expand Down
Binary file modified data/subjects.rda
Binary file not shown.
19 changes: 19 additions & 0 deletions man/safely_query_rss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a53b93b

Please sign in to comment.