Merge pull request #9 from stephenturner/dev

add host
stephenturner · Sep 27, 2024 · 4f12715 · 4f12715
2 parents 20dcdd9 + 13e89e3
commit 4f12715
Show file tree

Hide file tree

Showing 9 changed files with 35 additions and 12 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: biorecap
 Title: Retrieve and summarize bioRxiv and medRxiv preprints with a local LLM using ollama
-Version: 0.2.0
+Version: 0.2.1
 Authors@R: 
     person("Stephen", "Turner", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-9140-9028"))

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# biorecap 0.2.1
+
+- Added a `host` argument to `biorecap_report()` and `add_summary()` that gets passed to `ollamar::generate()`, allowing the user to choose the host for ollama. By default, `host=NULL` uses ollama's default base URL (using the local ollama server). This can be used to connect to a llama server running remotely or within a firewalled network (fixes #7).
+- Updated tests.
+
 # biorecap 0.2.0
 
 - Added medRxiv support. The `get_preprints()` function will now pull from either the bioRxiv or medRxiv RSS feed depending on the subject passed to it. All downstream functions and reporting updated to reflect this change (fixes #5).

diff --git a/R/biorecap.R b/R/biorecap.R
@@ -151,6 +151,7 @@ add_prompt <- function(preprints, ...) {
 #'
 #' @param preprints Output from [get_preprints()] followed by [add_prompt()].
 #' @param model A model available to Ollama (run `ollamar::list_models()`) to see what's available.
+#' @param host The base URL to use. Default is `NULL`, which uses Ollama's default base URL.
 #'
 #' @return A tibble, with a response column added.
 #' @export
@@ -165,7 +166,7 @@ add_prompt <- function(preprints, ...) {
 #' preprints
 #' }
 #'
-add_summary <- function(preprints, model="llama3.2") {
+add_summary <- function(preprints, model="llama3.2", host=NULL) {
 
   if (!inherits(preprints, "preprints_prompt")) warning("Expecting a tibble of class 'preprints_prompt' returned from get_preprints() |> add_prompt().")
   if (!inherits(preprints, "data.frame")) stop("Expecting a data frame.")
@@ -175,7 +176,7 @@ add_summary <- function(preprints, model="llama3.2") {
   suppressMessages({
     preprints <-
       preprints |>
-      dplyr::mutate("summary" = as.vector(sapply(.data$prompt, \(x) ollamar::generate(model=model, prompt=x, output="text"))))
+      dplyr::mutate("summary" = as.vector(sapply(.data$prompt, \(x) ollamar::generate(model=model, prompt=x, output="text", host=host))))
   })
 
   # Remove newlines anywhere within any text
@@ -249,6 +250,7 @@ tt_preprints <- function(preprints, cols=c("title", "summary"), width=c(1,3)) {
 #' @param subject Character vector of subjects to include in the report.
 #' @param nsentences Number of sentences to summarize each paper in.
 #' @param model The model to use for generating summaries. See [ollamar::list_models()].
+#' @param host The base URL to use. Default is `NULL`, which uses Ollama's default base URL.
 #' @param use_example_preprints Use the example preprints data included with the package instead of fetching new data from bioRxiv/medRxiv. For diagnostic/testing purposes only.
 #' @param ... Other arguments passed to [rmarkdown::render()].
 #'
@@ -262,7 +264,7 @@ tt_preprints <- function(preprints, cols=c("title", "summary"), width=c(1,3)) {
 #' biorecap_report(subject=c("bioinformatics", "genomics", "synthetic_biology"),
 #'                 output_dir=output_dir)
 #' }
-biorecap_report <- function(output_dir=".", subject=NULL, nsentences=2L, model="llama3.1", use_example_preprints=FALSE, ...) {
+biorecap_report <- function(output_dir=".", subject=NULL, nsentences=2L, model="llama3.2", host = NULL, use_example_preprints=FALSE, ...) {
   skeleton <- system.file("rmarkdown/templates/biorecap/skeleton/skeleton.Rmd", package="biorecap", mustWork = TRUE)
   output_dir <- normalizePath(output_dir)
   output_file <- paste0("biorecap-report-", format(Sys.time(), "%Y-%m-%d-%H%M%S"), ".html")
@@ -272,6 +274,6 @@ biorecap_report <- function(output_dir=".", subject=NULL, nsentences=2L, model="
   rmarkdown::render(input=skeleton,
                     output_file=output_file,
                     output_dir=output_dir,
-                    params=list(subject=subject, nsentences=nsentences, model=model, use_example_preprints=use_example_preprints, output_csv=output_csv),
+                    params=list(subject=subject, nsentences=nsentences, model=model, host=host, use_example_preprints=use_example_preprints, output_csv=output_csv),
                     ...)
 }
diff --git a/README.Rmd b/README.Rmd
@@ -86,7 +86,7 @@ Write an HTML report containing summaries of recent preprints in select subject
 ```{r, eval=FALSE}
 biorecap_report(output_dir=".", 
                 subject=c("bioinformatics", "infectious_diseases"), 
-                model="llama3.1")
+                model="llama3.2")
 ```
 
 Example HTML report generated from bioRxiv (bioinformatics) and infectious diseases (medRxiv) subjects on September 25, 2024:
@@ -172,6 +172,6 @@ You could create a report for _all_ subjects like this (note, this could take so
 
 ```{r, eval=FALSE}
 biorecap_report(output_dir=".", 
-                subject=subjects, 
+                subject=c(subjects$biorxiv, subjects$medrxiv)
                 model="llama3.2")
 ```
diff --git a/README.md b/README.md
@@ -73,7 +73,7 @@ use.
 ``` r
 biorecap_report(output_dir=".", 
                 subject=c("bioinformatics", "infectious_diseases"), 
-                model="llama3.1")
+                model="llama3.2")
 ```
 
 Example HTML report generated from bioRxiv (bioinformatics) and
@@ -317,6 +317,6 @@ take some time):
 
 ``` r
 biorecap_report(output_dir=".", 
-                subject=subjects, 
+                subject=c(subjects$biorxiv, subjects$medrxiv)
                 model="llama3.2")
 ```
diff --git a/inst/rmarkdown/templates/biorecap/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/biorecap/skeleton/skeleton.Rmd
@@ -8,6 +8,7 @@ params:
   subject: NULL
   nsentences: NULL
   model: NULL
+  host: NULL
   output_csv: NULL
   use_example_preprints: FALSE
 ---
@@ -34,7 +35,7 @@ if (params$use_example_preprints) {
   pp <- 
     get_preprints(subject=params$subject) |> 
     add_prompt() |> 
-    add_summary(model=params$model)
+    add_summary(model=params$model, host=params$host)
 }
 if (!is.null(output_csv)) {
   write.csv(pp, output_csv, row.names = FALSE)

diff --git a/man/add_summary.Rd b/man/add_summary.Rd
diff --git a/man/biorecap_report.Rd b/man/biorecap_report.Rd
diff --git a/tests/testthat/test-biorecap.R b/tests/testthat/test-biorecap.R
@@ -3,6 +3,13 @@ test_that("build_prompt_preprint", {
   expect_true(is.character(res))
   expect_error(build_prompt_preprint(title="A great paper", abstract="This is the abstract.", nsentences=0))
 })
+test_that("build_prompt_subject", {
+  title <- example_preprints |> dplyr::filter(subject=="bioinformatics") |> dplyr::pull(title)
+  summary <- example_preprints |> dplyr::filter(subject=="bioinformatics") |> dplyr::pull(summary)
+  res <- build_prompt_subject(subject="bioinformatics", title=title, summary=summary)
+  expect_true(is.character(res))
+  expect_error(build_prompt_preprint(subject, title, nsentences=-1))
+})
 
 test_that("get_preprints", {
   expect_silent(preprints <- get_preprints(subject="all"))
@@ -25,13 +32,16 @@ test_that("add_prompt", {
 
 test_that("add_summary", {
   expect_error(expect_warning(add_summary(data.frame())))
+  expect_warning(expect_error(add_summary("invalid")))
   expect_warning(add_summary(data.frame(prompt=character())))
   expect_silent(add_summary(structure(data.frame(prompt=character()), class=c("preprints_prompt", "data.frame"))))
 })
 
 test_that("tt_preprints", {
   expect_silent(res <- tt_preprints(example_preprints[1:2,]))
   expect_true(inherits(res, "tinytable"))
+  expect_warning(expect_error(tt_preprints("invalid")))
+  expect_warning(expect_error(tt_preprints(data.frame())))
 })
 
 test_that("tt_preprints", {