From 0d40d977e52767b699c5fa3f64f1019e8bc45552 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Fri, 27 Sep 2024 05:15:25 -0400 Subject: [PATCH 1/2] add tibble --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3a54d7..139ec11 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,14 +15,15 @@ Imports: ollamar (>= 1.2.1), rlang, rmarkdown, + tibble, tidyRSS, tinytable Suggests: knitr, markdown, testthat (>= 3.0.0) +Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -Config/testthat/edition: 3 From d0d2be9d4a113a3bd1e5a33a1cf3805833db32ad Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Fri, 27 Sep 2024 05:15:44 -0400 Subject: [PATCH 2/2] - Fixed list of medRxiv subjects (see `?subjects`). - Fixed an issue with `get_preprints()` where a valid subject has no preprints associated with it at all. --- NAMESPACE | 1 + NEWS.md | 2 ++ R/biorecap.R | 48 ++++++++++++++++++++++++++++----------- data-raw/biorecap_data.R | 4 ++-- data/subjects.rda | Bin 824 -> 819 bytes man/safely_query_rss.Rd | 19 ++++++++++++++++ 6 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 man/safely_query_rss.Rd diff --git a/NAMESPACE b/NAMESPACE index 4fe132c..9420a28 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(build_prompt_preprint) export(build_prompt_subject) export(get_preprints) export(list_models) +export(safely_query_rss) export(test_connection) export(tt_preprints) importFrom(ollamar,list_models) diff --git a/NEWS.md b/NEWS.md index e4b0d91..8c4200b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # biorecap 0.2.1 - Added a `host` argument to `biorecap_report()` and `add_summary()` that gets passed to `ollamar::generate()`, allowing the user to choose the host for ollama. By default, `host=NULL` uses ollama's default base URL (using the local ollama server). This can be used to connect to a llama server running remotely or within a firewalled network (fixes #7). +- Fixed list of medRxiv subjects (see `?subjects`). +- Fixed an issue with `get_preprints()` where a valid subject has no preprints associated with it at all. - Updated tests. # biorecap 0.2.0 diff --git a/R/biorecap.R b/R/biorecap.R index 799a14e..9b54648 100644 --- a/R/biorecap.R +++ b/R/biorecap.R @@ -57,6 +57,37 @@ build_prompt_subject <- function(subject, return(prompt) } +#' Safely query bioRxiv/medRxiv RSS feeds +#' +#' @param subject A character vector of valid bioRxiv and/or medRxiv subjects. See [subjects]. +#' @param server A character vector of either "biorxiv" or "medrxiv". +#' +#' @return A data frame of preprints from bioRxiv and/or medRxiv. +#' @export +#' +safely_query_rss <- function(subject, server=c("biorxiv", "medrxiv")) { + server <- rlang::arg_match(server) + baseurl <- sprintf("https://connect.%s.org/%s_xml.php?subject=", server, server) + out <- + lapply(subject, \(x) { + tryCatch({ + suppressMessages(preprints <- tidyRSS::tidyfeed(paste0(baseurl, x))) + }, + error = function(e) { + message("No preprints found for subject: ", x) + tibble::tibble(feed_title=character(), feed_link=character(), feed_description=character(), item_title=character(), + item_link=character(), item_description=character(), item_category=list()) + }) + }) + out <- + out |> + stats::setNames(subject) |> + dplyr::bind_rows(.id="subject") |> + dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |> + dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |> + dplyr::mutate("source"=server, .before=1) +} + #' Get bioRxiv/medRxiv preprints #' #' @param subject A character vector of valid bioRxiv and/or medRxiv subjects. See [subjects]. @@ -80,26 +111,17 @@ get_preprints <- function(subject="all", clean=TRUE) { subject_bio <- subject[subject %in% biorecap::subjects$biorxiv] if (length(subject_bio)>0) { preprints$bio <- - lapply(subject_bio, \(x) suppressMessages(preprints <- tidyRSS::tidyfeed(paste0("https://connect.biorxiv.org/biorxiv_xml.php?subject=", x)))) |> - stats::setNames(subject_bio) |> - dplyr::bind_rows(.id="subject") |> - dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |> - dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |> + safely_query_rss(subject=subject_bio, server="biorxiv") |> dplyr::mutate("source"="bioRxiv", .before=1) - if (nrow(preprints$bio)<1L) stop("Something went wrong. No papers found for subject ", subject) #nocov + if (nrow(preprints$bio)<1L) warning("Something went wrong. No papers found for subject ", subject) #nocov } - subject_med <- subject[subject %in% biorecap::subjects$medrxiv] if (length(subject_med)>0) { preprints$med <- - lapply(subject_med, \(x) suppressMessages(preprints <- tidyRSS::tidyfeed(paste0("https://connect.medrxiv.org/medrxiv_xml.php?subject=", x)))) |> - stats::setNames(subject_med) |> - dplyr::bind_rows(.id="subject") |> - dplyr::select("subject", title="item_title", url="item_link", abstract="item_description") |> - dplyr::mutate(dplyr::across(dplyr::everything(), trimws)) |> + safely_query_rss(subject=subject_med, server="medrxiv") |> dplyr::mutate("source"="medRxiv", .before=1) - if (nrow(preprints$med)<1L) stop("Something went wrong. No papers found for subject ", subject) #nocov + if (nrow(preprints$med)<1L) warning("Something went wrong. No papers found for subject ", subject) #nocov } preprints <- dplyr::bind_rows(preprints) diff --git a/data-raw/biorecap_data.R b/data-raw/biorecap_data.R index d6d673b..2ee228d 100644 --- a/data-raw/biorecap_data.R +++ b/data-raw/biorecap_data.R @@ -39,10 +39,10 @@ subjects$medrxiv <- c("all", "Cardiovascular_Medicine", "Dentistry_and_Oral_Medicine", "Dermatology", - "Dermatology", + "Emergency_Medicine", "endocrinology", "Epidemiology", - "ecology", + "endocrinology", "epidemiology", "Forensic_Medicine", "Gastroenterology", diff --git a/data/subjects.rda b/data/subjects.rda index a1dd9d55a2892e9cef02b2b0242adc496eace161..c5c6603a1bd53fa9de8df62c7283e9c48c3f46c2 100644 GIT binary patch delta 809 zcmV+^1J?Yw2D1hZLRx4!F+o`-Q(1Gw3I>r58Gkm)buoyRMSB98U}%&00xGJKyv_?0WbgwfB={P2@(iQ zhK&RX=z~odjW(cYG!SdX`^y-Cek=Tv5mpKL|Fa^-Re%ciyEscp3y+p&7Ry)5EEa+o zIe)1XY8N0Tg7qN+OnF48I3T3B&a#5FinSJKFlbv)gft`#jA(|E5e)(tkAw*#Yy~Id zmj4_4^y81z#OZ_^E98D&D^+-a)3Gk=sd+?=#cUkd*{Jzd`dNlokR+)m^w@v%^y zHeB$RInY%K-v_qYk))hjM9`&sE!jIz7Q*cK@wEZfT{v7)=4X<fw16;D31(;l8G;H1}d1I$|SbVqe?fEqmZtfCtR9|!R9s=H7brMYkRP3RDT}P z-Lgh*r<#ee0Rn86D{BppQ4?5Z-}?65a9YmIR41D)9ir;3Xg- zjj2oXrh+rOh_uvPbT1%M2VI)be70rVuCnSHifRUnR<=LmJs3-w6nBcHiKv?< znwwKV0009(Xfy&+OiAe)Xf$X8KmY&$1Q~9wvja#M#XlqnqXhK-Vnzm0fC{#{H|3%d z=YPBMqP!)%ETT7%p@@FA#OQx&&+p>Wpv$o=m$A4gkFU2w-dqC#@$a z2i0XBBt(d+2S>Z$eee-6jte3pA~Hot0X1w4rUV>^0?U?0bq*#-9Xn(xT5Xh0iGP}$ z5e1Up7OQ#?@b>hX>%#o*$fiJmV+&bYU}HWYj8nmv=c-ZygiFl~PzEPlc{06@_rB(l zln|gN0T2x>q1r=MgyYgJ&z`DXW@H501l;WpQN_f)lZc|D3*OU4HxFWzjdu)p=p_`Cg*Pc#((@4_F*lH zjG+ezk=u;tN>h<{2GA%tc!?WhOF@y)Vp4+^Y5+#%4t(PFzg12P!MZ*}omls{>UczI z*@$-L!9fUU2oBOLuL{$csRT_%HzOD`$YGJoW>|q<%}TVqQgLkMBGNknLj>5dv=>(7#>Q?t2}EWYkL3O?