Updates to code and documentation (#46)

* up * up * up
EBI-Metagenomics · Sep 8, 2024 · 9ec7d4b · 9ec7d4b
1 parent 7ba865f
commit 9ec7d4b
Show file tree

Hide file tree

Showing 15 changed files with 115 additions and 106 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: MGnifyR
 Type: Package
-Version: 0.99.29
+Version: 0.99.30
 Authors@R:
     c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"),
              email = "[email protected]",
@@ -54,7 +54,7 @@ Suggests:
 URL: https://github.com/EBI-Metagenomics/MGnifyR
 BugReports: https://github.com/EBI-Metagenomics/MGnifyR/issues
 VignetteBuilder: knitr
-RoxygenNote: 7.3.0
+RoxygenNote: 7.3.1
 Collate:
     'utils.R'
     'MgnifyClient.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -76,7 +76,7 @@ importFrom(httr,write_disk)
 importFrom(methods,is)
 importFrom(methods,new)
 importFrom(mia,checkTaxonomy)
-importFrom(mia,loadFromBiom)
+importFrom(mia,importBIOM)
 importFrom(plyr,llply)
 importFrom(plyr,rbind.fill)
 importFrom(reshape2,dcast)

diff --git a/R/MgnifyClient.R b/R/MgnifyClient.R
@@ -3,10 +3,9 @@
 #'
 #' @details
 #' All functions in the MGnifyR package take a \code{MgnifyClient} object as
-#' their first argument. While not essential to querying the raw MGnify API
-#' (which is exposed as relative standard JSONAPI), the object allows the
-#' simple handling of both user authentication and access to private data,
-#' and local on-disk caching of results.
+#' their first argument. The object allows the simple handling of both user
+#' authentication and access to private data, and manages general options for
+#' querying the MGnify database.
 #'
 #' @param username A single character value specifying an optional username for
 #' authentication. (By default: \code{username = NULL})

diff --git a/R/doQuery.R b/R/doQuery.R
@@ -6,10 +6,10 @@
 #' power of the JSONAPI MGnify search filters. Search results may be filtered
 #' by metadata value, associated study/sample/analyse etc.
 #'
-#' See [Api browser](https://www.ebi.ac.uk/metagenomics/api/v1/) for
+#' See \href{https://www.ebi.ac.uk/metagenomics/api/v1/}{Api browser} for
 #' information on MGnify database filters.
 #' You can find help on customizing queries from
-#' [here](https://emg-docs.readthedocs.io/en/latest/api.html#customising-queries).
+#' \href{https://emg-docs.readthedocs.io/en/latest/api.html#customising-queries}{here}.
 #'
 #' For example the following filters are available:
 #' \itemize{
@@ -79,7 +79,7 @@
 #'
 #' @param ... Remaining parameter key/value pairs may be supplied to filter
 #' the returned values. Available options differ between \code{types}.
-#' See discussion above for details.
+#' See discussion Details section for details.
 #'
 #' @return A nested list or data.frame containing the results of the query.
 #'

diff --git a/R/getFile.R b/R/getFile.R
@@ -12,11 +12,11 @@
 #' we wish to download.
 #'
 #' @param file A single character value or NULL specifying an
-#' optional local filename to use for saving the file. If NULL (default),
+#' optional local filename to use for saving the file. If \code{NULL},
 #' MGNify local cache settings will be used. If the file is intended to be
 #' processed in a separate program, it may be sensible to provide a
 #' meaningful \code{file}, rather than having to hunt through the
-#' cache folders. If \code{file} is NULL \emph{and} \code{useCache(client)}
+#' cache folders. If \code{file} is \code{NULL} and \code{useCache(client)}
 #' is \code{FALSE}, the \code{read.func} parameter must be supplied or the
 #' file will be downloaded and then deleted.
 #' (By default: \code{file = NULL})
@@ -31,9 +31,9 @@
 #'
 #' @param ... Additional arguments; not used currently.
 #'
-#' @return Either the local filename of the downloaded file, be it either the
-#' location in the MGNifyR cache or file. If \code{read.func} is
-#' used, its result will be returned.
+#' @return For \code{getFile()}, either the local filename of the downloaded
+#' file, be it either the location in the MGNifyR cache or file. If
+#' \code{read.func} is used, its result will be returned.
 #'
 #' @examples
 #' # Make a client object
@@ -102,12 +102,12 @@ setMethod("getFile", signature = c(x = "MgnifyClient"), function(
 #' Listing files available for download
 #'
 #' @details
-#' THe function is a wrapper function allowing easy enumeration
-#' of downloads available for a given accession (or list thereof). Returns a
-#' single data.frame containing all available downloads and associated
+#' \code{searchFile()} function is a wrapper function allowing easy
+#' enumeration of downloads available for a given accession IDs.
+#' Returns a single data.frame containing all available downloads and associated
 #' metadata, including the url location and description. This can then be
 #' filtered to extract the urls of interest, before actually
-#' retrieving the files using \code{fetFile()}
+#' retrieving the files using \code{getFile()}
 #'
 #' @param accession A single character value or a vector of character values
 #' specifying accession IDs to return results for.
@@ -117,9 +117,9 @@ setMethod("getFile", signature = c(x = "MgnifyClient"), function(
 #' \code{studies}, \code{assembly}, \code{genome} or \code{run}.
 #' (By default: \code{type = "samples"})
 #'
-#' @return \code{data.frame} containing all discovered downloads. If
-#' multiple \code{accessions} are queried, the \code{accessions} column
-#' may to filter the results - since rownames are not set (and wouldn;'t
+#' @return For \code{searchFile()} \code{data.frame} containing all discovered
+#' downloads. If multiple \code{accessions} are queried, the \code{accessions}
+#' column may to filter the results - since rownames are not set (and wouldn't
 #' make sense as each query will return multiple items)
 #'
 #' @examples

diff --git a/R/getMetadata.R b/R/getMetadata.R
@@ -1,9 +1,9 @@
-#' Get all Study, Sample and Analysis metadata for the supplied analyses
+#' Get all study, sample and analysis metadata for the supplied analysis
 #' accessions
 #'
 #' @details
-#' The function retrieves all associated study, sample and analysis
-#' metadata attributes as a list of analyses accessions.
+#' The function retrieves all study, sample and analysis metadata associated
+#' with provided analysis accessions.
 #'
 #' @param x A \code{MgnifyClient} object.
 #'
@@ -12,8 +12,8 @@
 #' 
 #' @param ... Optional arguments; not currently used.
 #'
-#' @return \code{data.frame} of metadata for each analysis in the
-#' \code{accession} list.
+#' @return A \code{data.frame} containing metadata for each analysis in the
+#' \code{accession} list. Each row represents a single analysis.
 #'
 #' @examples
 #' # Create a client object

diff --git a/R/getResult.R b/R/getResult.R
@@ -13,44 +13,51 @@
 #' specifying accession IDs to return results for.
 #'
 #' @param output A single character value specifying the format of an output.
-#' Must be one of the following options: "TreeSE", "list", or "phyloseq".
-#' (By default: \code{output = "TreeSE"})
+#' Must be one of the following options: \code{"TreeSE"}, \code{"list"}, or 
+#' \code{"phyloseq"}. (By default: \code{output = "TreeSE"})
 #'
-#' @param get.taxa A boolean value specifying whether to retrieve metagenomic
-#' data. (By default: \code{get.taxa = TRUE})
+#' @param get.taxa A boolean value specifying whether to retrieve taxonomy
+#' data (OTU table). See \code{taxa.su} for specifying taxonomy type. The
+#' data is retrieved as BIOM files which are subsequently parsed.
+#' (By default: \code{get.taxa = TRUE})
 #'
 #' @param get.func A boolean value or a single character value or a vector
 #' character values specifying functional analysis types to retrieve. If
 #' \code{get.func = TRUE}, all available functional datatypes are retrieved,
 #' and if \code{FALSE}, functional data is not retrieved. The current list of
-#' available types is "antismash-gene-clusters", "go-slim", "go-terms",
-#' "interpro-identifiers", "taxonomy", "taxonomy-itsonedb",
-#' "taxonomy-itsunite", "taxonomy-lsu", and "taxonomy-ssu". Note that
-#' depending on the particular analysis type, pipeline version etc., not all
-#' functional results will be available. (By default: \code{get.func = TRUE})
+#' available types is \code{"antismash-gene-clusters"}, \code{"go-slim"},
+#' \code{"go-terms"}, \code{"interpro-identifiers"}, \code{"taxonomy"},
+#' \code{"taxonomy-itsonedb"}, \code{"taxonomy-itsunite"}, \code{"taxonomy-lsu"},
+#' and \code{"taxonomy-ssu"}. Note that depending on the particular analysis
+#' type, pipeline version etc., not all functional results will be available.
+#' Furthermore, taxonomy is also available via \code{get.func}, and loading
+#' the data might be considerable faster if \code{bulk.dl = TRUE}. However,
+#' phylogeny is available only via \code{get.taxa}.
+#' (By default: \code{get.func = TRUE})
 #'
 #' @param ... optional arguments:
 #' \itemize{
 #'   
 #'   \item \strong{taxa.su} A single character value specifying which taxa
-#'   subunit results should be selected? Currently, taxonomy assignments in the
+#'   subunit results should be selected. Currently, taxonomy assignments in the
 #'   MGnify pipelines rely on rRNA matches to existing databases
 #'   (GreenGenes and SILVA), with later pipelines checking both the SSU and
 #'   LSU portions of the rRNA sequence. \code{taxa.su} allows then selection
-#'   of either the Small subunit (SSU) or Large subunit results
-#'   in the final \code{TreeSummarizedExperiment} object. Older pipeline
+#'   of either the Small subunit (\code{"SSU"}) or Large subunit (\code{"LSU"})
+#'   results in the final \code{TreeSummarizedExperiment} object. Older pipeline
 #'   versions do not report results for both subunits, and thus for some
 #'   accessions this value will have no effect.
 #'
 #'   \item \strong{get.tree} A single boolean value specifying whether to
 #'   include available phylogenetic trees in the \code{TreeSummarizedExperiment}
-#'   object. (By default: \code{get.tree = TRUE})
+#'   object. Available when \code{get.taxa = TRUE}.
+#'   (By default: \code{get.tree = TRUE})
 #'
 #'   \item \strong{as.df} A single boolean value enabled when
 #'   \code{output = "list"}. The argument specifies whether return functional
 #'   data as a named list (one entry per element in the output list) of
 #'   data.frames, with each data.frame containing results for all requested
-#'   accessions. If \code{FALSE}, The function returns a list of lists, each
+#'   accessions. If \code{FALSE}, the function returns a list of lists, each
 #'   element consisting of results for a single accession. (By default:
 #'   \code{as.df = TRUE})
 #'
@@ -69,13 +76,13 @@
 #' }
 #'
 #' @return
-#' If only metagenomic data is retrieved, the result is returned in
+#' If only taxonomy data is retrieved, the result is returned in
 #' \code{TreeSummarizedExperiment} object by default. The result can also be
 #' returned as a \code{phyloseq} object or as a list of \code{data.frames}.
 #' Note that \code{phyloseq} object can include only one phylogenetic tree
 #' meaning that some taxa might be lost when data is subsetted based on tree.
 #'
-#' When functional data is retrieved in addition to metagenomic data, the result
+#' When functional data is retrieved in addition to taxonomy data, the result
 #' is returned as a \code{MultiAssayExperiment} object. Other options are a list
 #' containing \code{phyloseq} object and \code{data.frames} or just
 #' \code{data.frames}.
@@ -158,7 +165,7 @@ setMethod("getResult", signature = c(x = "MgnifyClient"), function(
     }
     # Get microbial profiling data
     if( get.taxa ){
-        # The fetched BIOM files are parsed with mia::loadFromBiom, however,
+        # The fetched BIOM files are parsed with mia::importBIOM, however,
         # mia does not import biomformat, it is only in its "suggests". This is
         # why we have to check that biomformat is available
         .require_package("biomformat")
@@ -383,7 +390,7 @@ setMethod("getResult", signature = c(x = "MgnifyClient"), function(
 
 # Get a single biom file and convert it to TreeSummarizedExperiment format
 #
-#' @importFrom mia loadFromBiom
+#' @importFrom mia importBIOM
 #' @importFrom mia checkTaxonomy
 #' @importFrom urltools parameters parameters<-
 #' @importFrom httr GET
@@ -498,9 +505,9 @@ setMethod("getResult", signature = c(x = "MgnifyClient"), function(
     }
 
     # Load in the TreeSummarizedExperiment object
-    tse <- loadFromBiom(
-        biom_path, removeTaxaPrefixes = TRUE, only.taxa.col = TRUE,
-        rankFromPrefix = TRUE, remove.artifacts = TRUE)
+    tse <- importBIOM(
+        biom_path, rank.from.prefix = TRUE, set.ranks = TRUE, verbose = FALSE,
+        prefix.rm = TRUE, artifact.rm = TRUE)
     # If the file was not in store already but fetched from database, and cache
     # storing is disabled
     if( fetched_from_url && !use.cache ){

diff --git a/R/searchAnalysis.R b/R/searchAnalysis.R
@@ -2,7 +2,12 @@
 #'
 #' @details
 #' Retrieve analysis accession IDs associated with the supplied study or
-#' sample accession.
+#' sample accession.  In MGnify, an analysis accession refers to a certain
+#' pipeline analysis, such as specific 16S rRNA or shotgun metagenomic mapping.
+#' Studies can include multiple samples, and each sample can undergo multiple
+#' analyses using these pipelines. Each analysis is identified by a unique
+#' accession ID, allowing precise tracking and retrieval of analysis results
+#' within the MGnify database.
 #'
 #' @param x A \code{MgnifyClient} object.
 #'
@@ -15,7 +20,7 @@
 #'
 #' @param ... Optional arguments; not currently used.
 #'
-#' @return vector of analysis accession IDs.
+#' @return Vector of analysis accession IDs.
 #'
 #' @examples
 #' # Create a client object

diff --git a/README.md b/README.md
@@ -24,19 +24,6 @@ multi-omics data. Its primary objective is to deepen our understanding of the
 interactions between hosts and their microbiomes. You can find more information
 on [FindingPheno website](https://findingpheno.eu/).
 
-## Requirements
-
-```
-devtools # for installation
-mia
-plyr
-dplyr
-reshape2
-
-httr
-urltools
-```
-
 ## Installation
 
 ### Bioc-release

diff --git a/man/MgnifyClient.Rd b/man/MgnifyClient.Rd
diff --git a/man/doQuery.Rd b/man/doQuery.Rd
diff --git a/man/getFile.Rd b/man/getFile.Rd