From 219eb91266d9cb3c69750470c7508483613965d2 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Thu, 26 Sep 2024 06:15:55 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20EBI-Meta?= =?UTF-8?q?genomics/MGnifyR@d5eefe9fa09d423da4471dd10a54caa27975bf92=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- articles/MGnifyR.html | 2 +- articles/MGnify_course.html | 10 +++++----- pkgdown.yml | 2 +- search.json | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/articles/MGnifyR.html b/articles/MGnifyR.html index bd5dc1c..114fe4e 100644 --- a/articles/MGnifyR.html +++ b/articles/MGnifyR.html @@ -188,7 +188,7 @@

Create a client#> [1] TRUE #> #> Slot "cacheDir": -#> [1] "/tmp/RtmpmzZIqo/.MGnifyR_cache" +#> [1] "/tmp/RtmpAEJ3j1/.MGnifyR_cache" #> #> Slot "showWarnings": #> [1] FALSE diff --git a/articles/MGnify_course.html b/articles/MGnify_course.html index 10ca7d5..5b16a97 100644 --- a/articles/MGnify_course.html +++ b/articles/MGnify_course.html @@ -102,19 +102,19 @@

Load packagesupdate <- FALSE # Loads BiocManager into the session. Install it if it is not already installed. -if( !require("BiocManager") ){ +if( !require("BiocManager", quietly = TRUE) ){ install.packages("BiocManager") - library("BiocManager") + library("BiocManager", quietly = TRUE) } # If there are packages that need to be installed, installs them with # BiocManager -install(packages, update = update, ask = FALSE) |> +install(packages, update = update, ask = FALSE) |> suppressWarnings() |> suppressMessages() # Load all packages into session. Stop if there are packages that were not # successfully loaded -pkgs_not_loaded <- !sapply( - packages, require, quietly = TRUE, character.only = TRUE) +pkgs_not_loaded <- !sapply(packages, require, character.only = TRUE) |> + suppressMessages() pkgs_not_loaded <- names(pkgs_not_loaded)[ pkgs_not_loaded ] if( length(pkgs_not_loaded) > 0 ){ stop("Error in loading the following packages into the session: '", diff --git a/pkgdown.yml b/pkgdown.yml index 6df7c29..8ee7970 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -5,4 +5,4 @@ articles: MGnify_course: MGnify_course.html MGnifyR_long: MGnifyR_long.html MGnifyR: MGnifyR.html -last_built: 2024-09-26T06:00Z +last_built: 2024-09-26T06:12Z diff --git a/search.json b/search.json index 8baead6..e1b90b9 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"/articles/MGnifyR.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. latest version MGnifyR seamlessly integrates miaverse framework providing access cutting-edge tools microbiome -stream analytics.","code":""},{"path":"/articles/MGnifyR.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR","text":"MGnifyR hosted Bioconductor, can installed using via BiocManager.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR) #> Loading required package: MultiAssayExperiment #> Loading required package: SummarizedExperiment #> Loading required package: MatrixGenerics #> Loading required package: matrixStats #> #> Attaching package: 'MatrixGenerics' #> The following objects are masked from 'package:matrixStats': #> #> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, #> colCounts, colCummaxs, colCummins, colCumprods, colCumsums, #> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, #> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, #> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, #> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, #> colWeightedMeans, colWeightedMedians, colWeightedSds, #> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, #> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, #> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, #> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins, #> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, #> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars, #> rowWeightedMads, rowWeightedMeans, rowWeightedMedians, #> rowWeightedSds, rowWeightedVars #> Loading required package: GenomicRanges #> Loading required package: stats4 #> Loading required package: BiocGenerics #> #> Attaching package: 'BiocGenerics' #> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs #> The following objects are masked from 'package:base': #> #> anyDuplicated, aperm, append, as.data.frame, basename, cbind, #> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, #> get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, #> match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, #> Position, rank, rbind, Reduce, rownames, sapply, setdiff, table, #> tapply, union, unique, unsplit, which.max, which.min #> Loading required package: S4Vectors #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:utils': #> #> findMatches #> The following objects are masked from 'package:base': #> #> expand.grid, I, unname #> Loading required package: IRanges #> Loading required package: GenomeInfoDb #> Loading required package: Biobase #> Welcome to Bioconductor #> #> Vignettes contain introductory material; view with #> 'browseVignettes()'. To cite Bioconductor, see #> 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'. #> #> Attaching package: 'Biobase' #> The following object is masked from 'package:MatrixGenerics': #> #> rowMedians #> The following objects are masked from 'package:matrixStats': #> #> anyMissing, rowMedians #> Loading required package: TreeSummarizedExperiment #> Loading required package: SingleCellExperiment #> Loading required package: Biostrings #> Loading required package: XVector #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit"},{"path":"/articles/MGnifyR.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . MgnifyClient object contains slots previously mentioned settings.","code":"mg <- MgnifyClient(useCache = TRUE) mg #> An object of class \"MgnifyClient\" #> Slot \"databaseUrl\": #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" #> #> Slot \"authTok\": #> [1] NA #> #> Slot \"useCache\": #> [1] TRUE #> #> Slot \"cacheDir\": #> [1] \"/tmp/RtmpmzZIqo/.MGnifyR_cache\" #> #> Slot \"showWarnings\": #> [1] FALSE #> #> Slot \"clearCache\": #> [1] FALSE #> #> Slot \"verbose\": #> [1] TRUE"},{"path":[]},{"path":"/articles/MGnifyR.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR","text":"doQuery() function can utilized search results samples studies MGnify database. , fetch information drinking water samples. result table containing accession IDs description – case – samples.","code":"# Fetch studies samples <- doQuery( mg, type = \"samples\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) colnames(samples) |> head() #> [1] \"biosample\" \"accession\" \"sample-desc\" #> [4] \"environment-biome\" \"environment-feature\" \"environment-material\""},{"path":"/articles/MGnifyR.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR","text":"Now want find analysis accessions. sample might multiple analyses. analysis ID corresponds single run particular pipeline single sample single study. running searchAnalysis() function, get vector analysis IDs samples fed input.","code":"analyses_accessions <- searchAnalysis(mg, \"samples\", samples$accession) analyses_accessions |> head() #> [1] \"MGYA00652201\" \"MGYA00652185\" \"MGYA00643487\" \"MGYA00643486\" \"MGYA00643485\" #> [6] \"MGYA00643484\""},{"path":"/articles/MGnifyR.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR","text":"can now check metadata get hint kind data . use getMetadata() function fetch data based analysis IDs. returned value data.frame includes metadata example analysis conducted kind samples analyzed.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) colnames(analyses_metadata) |> head() #> [1] \"analysis_analysis-status\" \"analysis_pipeline-version\" #> [3] \"analysis_experiment-type\" \"analysis_accession\" #> [5] \"analysis_is-private\" \"analysis_complete-time\""},{"path":"/articles/MGnifyR.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR","text":"selected data fetch, can use getResult() output TreeSummarizedExperiment (TreeSE) MultiAssayExperiment (MAE) depending dataset. dataset includes taxonomic profiling data, output single TreeSE. dataset includes also functional data, output multiple TreeSE objects linked together utilizing MAE. can get access individual TreeSE object MAE specifying index name. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples… … plot abundances abundant phyla. can also perform analyses principal component analysis microbial profiling data utilizing miaverse tools.","code":"mae <- getResult(mg, accession = analyses_accessions) mae #> A MultiAssayExperiment object of 6 listed #> experiments with user-defined names and respective classes. #> Containing an ExperimentList class object of length 6: #> [1] microbiota: TreeSummarizedExperiment with 3506 rows and 50 columns #> [2] go-slim: TreeSummarizedExperiment with 116 rows and 38 columns #> [3] go-terms: TreeSummarizedExperiment with 3133 rows and 38 columns #> [4] interpro-identifiers: TreeSummarizedExperiment with 18223 rows and 38 columns #> [5] taxonomy: TreeSummarizedExperiment with 3617 rows and 50 columns #> [6] taxonomy-lsu: TreeSummarizedExperiment with 3378 rows and 42 columns #> Functionality: #> experiments() - obtain the ExperimentList instance #> colData() - the primary/phenotype DataFrame #> sampleMap() - the sample coordination DataFrame #> `$`, `[`, `[[` - extract colData columns, subset, or experiment #> *Format() - convert into a long or wide DataFrame #> assays() - convert ExperimentList to a SimpleList of matrices #> exportClass() - save data to flat files mae[[1]] #> class: TreeSummarizedExperiment #> dim: 3506 50 #> metadata(0): #> assays(1): counts #> rownames(3506): 82608 62797 ... 5820 6794 #> rowData names(9): Kingdom Phylum ... taxonomy1 taxonomy #> colnames(50): MGYA00144458 MGYA00144419 ... MGYA00652185 MGYA00652201 #> colData names(64): analysis_analysis.status analysis_pipeline.version #> ... sample_geo.loc.name sample_instrument.model #> reducedDimNames(0): #> mainExpName: NULL #> altExpNames(0): #> rowLinks: NULL #> rowTree: NULL #> colLinks: NULL #> colTree: NULL library(mia) #> This is mia version 1.13.36 #> - Online documentation and vignettes: https://microbiome.github.io/mia/ #> - Online book 'Orchestrating Microbiome Analysis (OMA)': https://microbiome.github.io/OMA/docs/devel/ mae[[1]] <- estimateDiversity(mae[[1]], index = \"shannon\") #> Warning in estimateDiversity(mae[[1]], index = \"shannon\"): 'estimateDiversity' #> is deprecated. Use 'addAlpha' instead. library(scater) #> Loading required package: scuttle #> Loading required package: ggplot2 plotColData(mae[[1]], \"shannon\", x = \"sample_environment..biome.\") # Agglomerate data altExps(mae[[1]]) <- splitByRanks(mae[[1]]) library(miaViz) #> Loading required package: ggraph #> #> Attaching package: 'miaViz' #> The following object is masked from 'package:mia': #> #> plotNMDS # Plot top taxa top_taxa <- getTopFeatures(altExp(mae[[1]], \"Phylum\"), 10) #> Warning in getTopFeatures(altExp(mae[[1]], \"Phylum\"), 10): 'getTopFeatures' is #> deprecated. Use 'getTop' instead. plotAbundance( altExp(mae[[1]], \"Phylum\")[top_taxa, ], rank = \"Phylum\", as.relative = TRUE ) #> Warning: The following values are already present in `metadata` and will be #> overwritten: 'agglomerated_by_rank'. Consider using the 'name' argument to #> specify alternative names. # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim( mae[[1]], \"MDS\", colour_by = \"sample_environment..biome.\")"},{"path":"/articles/MGnifyR.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format. result data.frame default. case, includes information publications fetched data portal.","code":"publications <- getData(mg, type = \"publications\") colnames(publications) |> head() #> [1] \"document.id\" \"type\" #> [3] \"id\" \"attributes.pubmed-id\" #> [5] \"attributes.pubmed-central-id\" \"attributes.pub-title\""},{"path":"/articles/MGnifyR.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile(), can search files database. returned table contains search results related analyses fed input. table contains information file also URL address file can loaded. Finally, can download files getFile(). function returns path file stored.","code":"dl_urls <- searchFile(mg, analyses_accessions, type = \"analyses\") target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted alpha tmRNA\", ] colnames(target_urls) |> head() #> [1] \"type\" \"id\" #> [3] \"attributes.alias\" \"attributes.file.format.name\" #> [5] \"attributes.file.format.extension\" \"attributes.file.format.compression\" # Just select a single file from the target_urls list for demonstration. file_url <- target_urls$download_url[[1]] cached_location <- getFile(mg, file_url) # Where are the files? cached_location #> [1] \"/.MGnifyR_cache/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz\" sessionInfo() #> R version 4.4.1 (2024-06-14) #> Platform: x86_64-pc-linux-gnu #> Running under: Ubuntu 22.04.4 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 #> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 #> #> locale: #> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C #> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 #> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 #> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C #> [9] LC_ADDRESS=C LC_TELEPHONE=C #> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C #> #> time zone: UTC #> tzcode source: system (glibc) #> #> attached base packages: #> [1] stats4 stats graphics grDevices utils datasets methods #> [8] base #> #> other attached packages: #> [1] miaViz_1.13.10 ggraph_2.2.1 #> [3] scater_1.33.4 ggplot2_3.5.1 #> [5] scuttle_1.15.4 mia_1.13.36 #> [7] MGnifyR_0.99.30 TreeSummarizedExperiment_2.13.0 #> [9] Biostrings_2.73.1 XVector_0.45.0 #> [11] SingleCellExperiment_1.27.2 MultiAssayExperiment_1.31.5 #> [13] SummarizedExperiment_1.35.1 Biobase_2.65.1 #> [15] GenomicRanges_1.57.1 GenomeInfoDb_1.41.1 #> [17] IRanges_2.39.2 S4Vectors_0.43.2 #> [19] BiocGenerics_0.51.1 MatrixGenerics_1.17.0 #> [21] matrixStats_1.4.1 knitr_1.48 #> [23] BiocStyle_2.33.1 #> #> loaded via a namespace (and not attached): #> [1] splines_4.4.1 ggplotify_0.1.2 #> [3] urltools_1.7.3 tibble_3.2.1 #> [5] triebeard_0.4.1 polyclip_1.10-7 #> [7] rpart_4.1.23 DirichletMultinomial_1.47.0 #> [9] lifecycle_1.0.4 lattice_0.22-6 #> [11] MASS_7.3-61 SnowballC_0.7.1 #> [13] backports_1.5.0 magrittr_2.0.3 #> [15] Hmisc_5.1-3 sass_0.4.9 #> [17] rmarkdown_2.28 jquerylib_0.1.4 #> [19] yaml_2.3.10 DBI_1.2.3 #> [21] minqa_1.2.8 abind_1.4-8 #> [23] zlibbioc_1.51.1 purrr_1.0.2 #> [25] yulab.utils_0.1.7 nnet_7.3-19 #> [27] tweenr_2.0.3 sandwich_3.1-0 #> [29] GenomeInfoDbData_1.2.12 ggrepel_0.9.6 #> [31] tokenizers_0.3.0 irlba_2.3.5.1 #> [33] tidytree_0.4.6 vegan_2.6-8 #> [35] rbiom_1.0.3 tidyjson_0.3.2 #> [37] pkgdown_2.1.1 permute_0.9-7 #> [39] DelayedMatrixStats_1.27.3 codetools_0.2-20 #> [41] DelayedArray_0.31.11 ggforce_0.4.2 #> [43] tidyselect_1.2.1 aplot_0.2.3 #> [45] UCSC.utils_1.1.0 farver_2.1.2 #> [47] lme4_1.1-35.5 ScaledMatrix_1.13.0 #> [49] viridis_0.6.5 base64enc_0.1-3 #> [51] jsonlite_1.8.8 BiocNeighbors_1.99.0 #> [53] decontam_1.25.0 tidygraph_1.3.1 #> [55] Formula_1.2-5 systemfonts_1.1.0 #> [57] ggnewscale_0.5.0 tools_4.4.1 #> [59] treeio_1.29.1 ragg_1.3.3 #> [61] Rcpp_1.0.13 glue_1.7.0 #> [63] gridExtra_2.3 SparseArray_1.5.34 #> [65] BiocBaseUtils_1.7.3 xfun_0.47 #> [67] mgcv_1.9-1 dplyr_1.1.4 #> [69] withr_3.0.1 BiocManager_1.30.25 #> [71] fastmap_1.2.0 boot_1.3-31 #> [73] bluster_1.15.1 fansi_1.0.6 #> [75] digest_0.6.37 rsvd_1.0.5 #> [77] gridGraphics_0.5-1 R6_2.5.1 #> [79] textshaping_0.4.0 colorspace_2.1-1 #> [81] lpSolve_5.6.21 utf8_1.2.4 #> [83] tidyr_1.3.1 generics_0.1.3 #> [85] data.table_1.16.0 DECIPHER_3.1.4 #> [87] graphlayouts_1.1.1 httr_1.4.7 #> [89] htmlwidgets_1.6.4 S4Arrays_1.5.7 #> [91] pkgconfig_2.0.3 gtable_0.3.5 #> [93] janeaustenr_1.0.0 htmltools_0.5.8.1 #> [95] bookdown_0.40 scales_1.3.0 #> [97] ggfun_0.1.6 rstudioapi_0.16.0 #> [99] reshape2_1.4.4 checkmate_2.3.2 #> [101] nlme_3.1-166 nloptr_2.1.1 #> [103] cachem_1.1.0 zoo_1.8-12 #> [105] stringr_1.5.1 parallel_4.4.1 #> [107] vipor_0.4.7 foreign_0.8-87 #> [109] desc_1.4.3 pillar_1.9.0 #> [111] grid_4.4.1 vctrs_0.6.5 #> [113] slam_0.1-53 BiocSingular_1.21.3 #> [115] beachmat_2.21.6 cluster_2.1.6 #> [117] beeswarm_0.4.0 htmlTable_2.4.3 #> [119] evaluate_0.24.0 mvtnorm_1.3-1 #> [121] cli_3.6.3 compiler_4.4.1 #> [123] rlang_1.1.4 crayon_1.5.3 #> [125] tidytext_0.4.2 labeling_0.4.3 #> [127] mediation_4.5.0 plyr_1.8.9 #> [129] fs_1.6.4 ggbeeswarm_0.7.2 #> [131] stringi_1.8.4 viridisLite_0.4.2 #> [133] BiocParallel_1.39.0 assertthat_0.2.1 #> [135] munsell_0.5.1 lazyeval_0.2.2 #> [137] Matrix_1.7-0 patchwork_1.2.0 #> [139] sparseMatrixStats_1.17.2 highr_0.11 #> [141] igraph_2.0.3 memoise_2.0.1 #> [143] RcppParallel_5.1.9 bslib_0.8.0 #> [145] ggtree_3.13.1 ape_5.8"},{"path":"/articles/MGnifyR_long.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR, extended vignette","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. MGnify pipelines undoubtedly useful, currently implemented produce results strictly per-sample basis. whole study results available, comparisons across studies difficult. MGnifyR package designed facilitate cross-study analyses handling per-sample data retrieval merging details internally, leaving user free perform analysis see fit. latest version MGnifyR seamlessly integrates miaverse framework providing access tools microbiome -stream analytics. integration enables users leverage optimized standardized methods analyzing microbiome. Additionally, users can benefit comprehensive tutorial book offers valuable guidance support.","code":""},{"path":"/articles/MGnifyR_long.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR, extended vignette","text":"MGnifyR currently hosted GitHub, can installed using via devtools. MGnifyR built using following snippet.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR_long.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR, extended vignette","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR)"},{"path":"/articles/MGnifyR_long.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR, extended vignette","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . ’s recommended local caching enabled useCache = TRUE. Queries MGnify API can quite slow, particularly retrieving multipage results many analyses (many Interpro results). Using local disk cache can significantly speed subsequent work, bypassing need re-query API. Use cache entirely transparent, caching occurs raw data level. cache can persist across MGnifyR sessions, can even used multiple sessions simultaneously - provided different sets accessions queried . Optionally, username password may specified client creation, causing MGnifyR attempt retrieval authentication token API. gives access non-public results, currently author imposed embargo period.","code":"mg <- MgnifyClient() mg mg <- MgnifyClient( username = \"Webin-username\", password = \"your-password\", useCache = TRUE)"},{"path":[]},{"path":"/articles/MGnifyR_long.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR, extended vignette","text":"MGnifyR gives users access complete range search functionality implemented MGnify JSON API. single function doQuery() used perform searching, allowing Studies, Samples, Runs Accession interrogated common interface. MGnifyR functions first argument client must valid MgnifyClient instance. remaining required parameter qtype, specifying type data queried, may one studies, samples, runs, analyses assemblies. general parameter include max.hits. Unlike MGnifyR high level functions, caching turned default doQuery(). New data analyses added MGnify time, enabling caching default may lead --date search results long-lived sessions. However, ’s easy switch back , may useful many cases. Also, given huge ever increasing number datasets available MGnify, limit number results returned may set using max.hits. default set 200, exploratory queries sufficient. may increased decreased directly specifying max.hits, disabled completely (limit) setting max.hits=NULL. cases want specific search, also use either accession parameter, many filter options available API, discussed . Specifying accession id, case samples, runs assemblies may vector ids, returns data.frame metadata one row per matching accession. accession NULL (default) remaining parameters define filters applied API search result. Details parameters given help(doQuery). way example though, supposing interested amplicon Illumina samples arctic, might try following query: Specifying accession parameter restrict results just matching particular entry, study, sample run. example, retrieve information study “MGYS00002891”:","code":"northpolar <- doQuery( mg, \"samples\", latitude_gte=60.0, experiment_type=\"amplicon\", biome_name=\"Soil\", instrument_platform = \"Illumina\", max.hits = 10) head(northpolar) study_samples <- doQuery(mg, \"studies\", accession=\"MGYS00002891\") head(study_samples)"},{"path":"/articles/MGnifyR_long.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR, extended vignette","text":"obtained particular set search hits, ’s now time retrieve associated results. General automated analysis complicated MGnify database design, wherein example samples may shared multiple studies, studies analysed multiple times using different versions pipeline. Navigating “many--one” relationships can tricky, MGnifyR resorts using analyses accessions ’s canonical identifier. analysis corresponds single run particular pipeline single sample single study. downside approach queries returning studies, samples (anything analyses) accessions need converting corresponding analyses. MGnifyR therefore provides helper function handle conversion - searchAnalysis(). Following previous search, list study accessions, convert corresponding analyses use: useful side effect call attribute metadata sample now retrieved stored local cache. Thus subsequent API calls samples (occur multiple times later steps) significantly faster. ’s important aware results searchAnalysis() command necessarily one--one match input accessions. MGnify analysis runs sometimes performed multiple times, perhaps using different versions pipeline. Thus filtering result list may required, easily performed illustrated next section.","code":"analyses_accessions <- searchAnalysis( mg, type=\"studies\", accession = study_samples$accession) head(analyses_accessions)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR, extended vignette","text":"point long list analysis instances (potential duplicates) corresponding samples previously found. use getMetadata function download combine associated sample, run study metadata, filter required include rows want. resulting data.frame columns names prefixed source type. example, “sample_xxx” columns correspond metadata gleaned querying accession’s sample entry. MGnify allows quite flexible specification arbitray metadata submission time, many cases leading quite sparse data.frame results accession queries sourced one study. instance, one sample contains entry “sample_soil_PH”, entries rows filled NA. MGnifyR automatically clean missing values - instead opting allow user choose correct action. particular study ’re looking marine biome, suppose interested samples analyses sampling depth known. following snippet filters full data.frame selecting entries contain valid sample_depth. ’s worth noting .numeric call ensure column converted numeric type checked. sample data MGnifyR initially retrieved type character, ’s user make sure ostensibly numeric entries converted properly.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) head(analyses_metadata) known_depths <- analyses_metadata[ !is.na(as.numeric(analyses_metadata$sample_depth)), ] # How many are left? dim(known_depths)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR, extended vignette","text":"selected analyses wish examine , getResult() used download associated OTU tables taxonomy, join results single TreeSummarizedExperiment (TreeSE) object. TreeSE becoming defacto standard taxonomic abundance munging R. TreeSE objects integrate abundance, taxonomic, phylogenetic, sample sequence data single object, powerful facilities filtering, processing plotting results. Compared phyloseq object, TreeSE scalable capable efficient data analysis. miaverse framework developed around TreeSE data container. provides tools analysis visualization. Moreover, includes comprehensive tutorial book called OMA.","code":""},{"path":"/articles/MGnifyR_long.html","id":"amplicon-sequencing","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Amplicon sequencing","title":"MGnifyR, extended vignette","text":"dataset includes amplicon sequencing data, .e., dataset include function predictions, getResult() method returns dataset TreeSE default. See output types function documentation. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples. needed, TreeSE can converted phyloseq.","code":"tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE) tse library(mia) tse <- estimateDiversity(tse, index = \"shannon\") library(scater) plotColData(tse, \"shannon\", x = \"sample_geo.loc.name\") library(miaViz) plotAbundance( tse[!is.na( rowData(tse)[[\"Kingdom\"]] ), ], rank = \"Kingdom\", as.relative = TRUE ) pseq <- makePhyloseqFromTreeSE(tse) pseq"},{"path":"/articles/MGnifyR_long.html","id":"metagenomics","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Metagenomics","title":"MGnifyR, extended vignette","text":"Although previous queries based results doQuery(), now concentrate combining comparing results specific studies. Since newly performed analyses retrieved first doQuery() call, ’s likely time vignette read, query results different. principally due rapid increase MGnify submissions, leading potential lack consistency even closely spaced queries. mentioned previously, may best use useCache=FALSE MgnifyCLient object doQuery() calls, ensure queries actually returning latest data. remainder vignette however, ’ll comparing 3 ostensibly different studies. study saltmarsh soils York University, human faecal samples survey healthy Sardinians, set samples hydrothermal vents Mid-Cayman rise Carribbean Sea. simplify things, first 20 samples study used. Furthermore, intention demonstrate functionality MGnifyR package, rather produce scientifically rigorous results. first step new accession list , previously, retrieve associated metadata using getMetadata(), seen doQuery() results, returned data.frame contains large number columns. autogenerated flexible, column names can little difficult predict, examining colnames(full_metadata) make things clearer. full_metadata get idea type data ’re dealing , can extract useul information sequencing platform, source biome, etc. next code snippet tallies columns give idea ’s available. boxplot also indicates within study read counts similar, probably need use sort normalization procedure comparing across samples. might also want drop particularly low read coverage samples analysis. , can fetch data calling getResult(). bulk.dl=TRUE potential significantly speed data retrieval. MGnify makes functional results available two separate ways, either per-analysis basis web api, whole study level large files, tab separated (TSV), columns representing results analysis. bulk.dl FALSE, MGnifyR queries web api get results (given functional analyses results may consist thousands entries) may take significant time. Setting bulk.dl TRUE causes MGnifyR determine source study associated particular analysis instead download parse corresponding results file. Since result file contains entries analyses associated study, taking advantage MGnifyR’s local caching single download provides results many future analyses. cases affords several orders magnitude speedup api query case. Unfortunately, column entries per-study results files always directly correspond particular analysis run, causing retrieval fail. principal cause believed running multiple analyses jobs sample. Thus reliability, bulk.dl FALSE default. general recommendation though, try setting TRUE first time getResult() used set accessions. fails, setting bulk.dl FALSE enable robust approach allowing analysis continue. might take though. Hopefully future sample/analysis correspondence mismatches fixed default bulk.dl switch TRUE. metagenomic samples, result MultiAssayExperiment (MAE) links multiple TreeSE objects one dataset. TreeSE objects include taxonomic profiling data along functional data unique objects. objects linked sample names. can get access individual object experiment specifying index name. can perform principal component analysis microbial profiling data utilizing miaverse tools.","code":"soil <- searchAnalysis(mg, \"studies\", \"MGYS00001447\") human <- searchAnalysis(mg, \"studies\", \"MGYS00001442\") marine <- searchAnalysis(mg, \"studies\", \"MGYS00001282\") # Combine analyses all_accessions <- c(soil, human, marine) head(all_accessions) full_metadata <- getMetadata(mg, all_accessions) colnames(full_metadata) head(full_metadata) # Load ggplot2 library(ggplot2) #Distribution of sample source material: table(full_metadata$`sample_environment-material`) #What sequencing machine(s) were used? table(full_metadata$`sample_instrument model`) # Boxplot of raw read counts: ggplot( full_metadata, aes(x=study_accession, y=log( as.numeric(`analysis_Submitted nucleotide sequences`)))) + geom_boxplot(aes(group=study_accession)) + theme_bw() + ylab(\"log(submitted reads)\") mae <- getResult(mg, all_accessions, bulk.dl = TRUE) mae mae[[2]] # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim(mae[[1]], \"MDS\", colour_by = \"sample_environment.feature\")"},{"path":"/articles/MGnifyR_long.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR, extended vignette","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format.","code":"kegg <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\") head(kegg)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR, extended vignette","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile() simple wrapper function , supplied list accessions, finds urls files ’re . cases ’ll want filter returned list files interest, easily done resulting data.frame object. addition actual download location (download_url column), extra columns include file type, contents compression. ’s recommended colnames data.frame examined get grasp available metadata. demonstrate process, code retrieves data.frame containing available downloads accession ’ve examining previously. filters retain files corresponding retain annotated amino acid sequence files. list types available files, guide filtering, something like following might useful. Unlike MGnifyR functions, searchFile() limited analyses, specifying accession_type results types may found. instance, general genome functionality yet integrated MGnifyR, can retrieve associated files particular genome accession following: found set target urls, final step use getFile() actually retrieve file. Unlike functions, works single url location , entry target_urls must downloaded individually - easily done either looping applying list. files intended used external programs, might easiest provide file parameter function call, specifies local filename writing file. default MGnifyR use local cache, can make getting file afterwards awkward. Regardless, default behaviour getFile() retrieve file specified parameter url, save disk, return filepath saved . second download option available, allows built-parsing file. know ahead time processing performed, may possible integrate function, pass function getFile() read.func argument. function question take single argument (complete path name locally downloaded file) result call returned place usual output file name. Alternatively files first downloaded standard way, processed using function loop. Therefore many cases read.func parameter redundant. However, many outputs MGnify can quite large, meaning local storage many files may become issue. providing read_func parameter (necessarily setting MgnifyClient object: useCache=FALSE) analysis large number datasets may possible minimal storage requirements. illustrate, suppose interested retrieving detected sequences matching particular PFAM motif set analyses. simple function uses Biostrings package read amino acid fasta file, searches matching PFAM tag sequence name, tallies unique sequences single data.frame row. case PFAM motif identifies sequences coding amoC gene, found ammonia methane oxidizing organisms, filtering method used. defined function, just remains include call getFile().","code":"# Find list of available downloads dl_urls <- searchFile( mg, full_metadata$analysis_accession, type = \"analyses\") # Filter table target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted CDS with annotation\", ] head(target_urls) table(dl_urls$attributes.description.label) genome_urls <- searchFile(mg, \"MGYG000433953\", type = \"genomes\") genome_urls[ , c(\"id\", \"attributes.file.format.name\", \"download_url\")] # Just select a single file from the target_urls list for demonstration. # Default behavior - use local cache. cached_location1 = getFile(mg, target_urls$download_url[[1]]) # Specifying a file cached_location2 <- getFile( mg, target_urls$download_url[[1]]) cached_location <- c(cached_location1, cached_location2) # Where are the files? cached_location library(Biostrings) # Simple function to a count of unique sequences matching PFAM amoC/mmoC motif getAmoCseqs <- function(fname){ sequences <- readAAStringSet(fname) tgtvec <- grepl(\"PF04896\", names(sequences)) as.data.frame(as.list(table(as.character(sequences[tgtvec])))) } # Just download a single accession for demonstration, specifying a read_function amoC_seq_counts <- getFile( mg, target_urls$download_url[[1]], read_func = getAmoCseqs) amoC_seq_counts sessionInfo()"},{"path":"/articles/MGnify_course.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Metagenomics bioinformatics at MGnify","text":"notebook aim demonstrate MGnifyR tool can used fetch data MGnify microbiome data resource. showcase analyze datausing advanced microbiome data science tools, including estimating alpha beta diversity, well performing differential abundance analysis. MGnifyR R/Bioconductor package provides set tools easily accessing processing MGnify data R, making queries MGnify databases MGnify API. benefit MGnifyR streamlines data access, allowing users fetch data either “raw” format directly TreeSummarizedExperiment (TreeSE) object. enables seamless integration custom workflows analysis. Utilizing TreeSE provides access wide range tools within Bioconductor’s SummarizedExperiment (SE) ecosystem. also integrates mia package, offers microbiome-specific methods within SE framework. information microbiome data science Bioconductor, refer Orchestrating Microbiome Analysis (OMA) online book.","code":""},{"path":"/articles/MGnify_course.html","id":"load-packages","dir":"Articles","previous_headings":"","what":"Load packages","title":"Metagenomics bioinformatics at MGnify","text":"","code":"# List of packages that we need packages <- c(\"ANCOMBC\", \"MGnifyR\", \"mia\", \"miaViz\", \"scater\") update <- FALSE # Loads BiocManager into the session. Install it if it is not already installed. if( !require(\"BiocManager\") ){ install.packages(\"BiocManager\") library(\"BiocManager\") } # If there are packages that need to be installed, installs them with # BiocManager install(packages, update = update, ask = FALSE) |> suppressWarnings() |> suppressMessages() # Load all packages into session. Stop if there are packages that were not # successfully loaded pkgs_not_loaded <- !sapply( packages, require, quietly = TRUE, character.only = TRUE) pkgs_not_loaded <- names(pkgs_not_loaded)[ pkgs_not_loaded ] if( length(pkgs_not_loaded) > 0 ){ stop(\"Error in loading the following packages into the session: '\", paste0(pkgs_not_loaded, collapse = \"', '\"), \"'\") }"},{"path":"/articles/MGnify_course.html","id":"data-import","dir":"Articles","previous_headings":"","what":"Data import","title":"Metagenomics bioinformatics at MGnify","text":"example, fetch taxonomy annotations metadata specified study. dataset focuses human gut microbiome, analyzed across different geographic regions. interact MGnify database, need create MgnifyClient object. object allows us store options data fetching. instance, can configure use cache improved efficiency. can now search analyses associated certain study. analysis refers metagenomic runs performed samples. sample can multiple runs made, work analyses samples; analysis identifier points single entity. MGnify database, study unique identifier. study interested accession ID “MGYS00005154”. Now ready load metadata analyses get idea kind data dealing . currently (17 Sep 2024) almost 1,000 analyses available. Downloading whole dataset take time, use memory cache. can see analyses performed different pipelines. Let’s take analyses generated pipeline version 5.0. now analyses point unique sample. final step fetch abundance tables TreeSummarizedExperiment (TreeSE) format. fetched data TreeSE object, including taxonomy annotations. See OMA online book handle data format.","code":"# Create the MgnifyClient object with caching enabled mg <- MgnifyClient( useCache = TRUE, cacheDir = \"/home/trainers\" # Set this to your desired cache directory ) study_id <- \"MGYS00005154\" analysis_id <- searchAnalysis(mg, \"studies\", study_id) metadata <- getMetadata(mg, accession = analysis_id) metadata <- metadata[metadata[[\"analysis_pipeline-version\"]] == \"5.0\", ] tse <- getResult( mg, accession = metadata[[\"analysis_accession\"]], get.func = FALSE ) tse"},{"path":"/articles/MGnify_course.html","id":"preprocessing","dir":"Articles","previous_headings":"","what":"Preprocessing","title":"Metagenomics bioinformatics at MGnify","text":", agglomerate data Order level, meaning summarize abundances specific taxonomic rank. OMA provides detailed chapter explaining agglomeration depth. unique properties microbiome data, apply transformations. , perform relative transformation. can find information transformations OMA.","code":"tse_order <- agglomerateByRank(tse, rank = \"Order\") # Transform the main TreeSE tse <- transformAssay(tse, method = \"relabundance\") # Transform the agglomerated TreeSE tse_order <- transformAssay(tse_order, method = \"relabundance\")"},{"path":"/articles/MGnify_course.html","id":"alpha-diversity","dir":"Articles","previous_headings":"","what":"Alpha diversity","title":"Metagenomics bioinformatics at MGnify","text":"Alpha diversity measures community diversity within sample. Learn community diversity . can test whether diversity differences statistically significant. utilize Mann Whithney U test (Wilcoxon test). add p-values plot, see OMA.","code":"# Calculate alpha diversity tse <- addAlpha(tse) # Create a plot p <- plotColData( tse, y = \"shannon_diversity\", x = \"sample_geographic.location..country.and.or.sea.region.\", show_boxplot = TRUE ) p pairwise.wilcox.test( tse[[\"shannon_diversity\"]], tse[[\"sample_geographic.location..country.and.or.sea.region.\"]], p.adjust.method = \"fdr\" )"},{"path":"/articles/MGnify_course.html","id":"beta-diversity","dir":"Articles","previous_headings":"","what":"Beta diversity","title":"Metagenomics bioinformatics at MGnify","text":"can assess differences microbial compositions samples, aiming identify patterns data associated covariates. achieve , perform Principal Coordinate Analysis (PCoA) using Bray-Curtis dissimilarity. See community similarity chapter OMA information.","code":"# Perform PCoA tse <- runMDS( tse, FUN = getDissimilarity, method = \"bray\", assay.type = \"relabundance\" ) # Visualize PCoA p <- plotReducedDim( tse, dimred = \"MDS\", colour_by = \"sample_geographic.location..country.and.or.sea.region.\" ) p"},{"path":"/articles/MGnify_course.html","id":"differential-abundance-analysis-daa","dir":"Articles","previous_headings":"","what":"Differential abundance analysis (DAA)","title":"Metagenomics bioinformatics at MGnify","text":"DAA, analyze whether abundances certain features vary study groups. , OMA dedicated chapter also topic. Next visualize features lowest p-values.","code":"# Perform DAA res <- ancombc2( data = tse_order, assay.type = \"counts\", fix_formula = \"sample_geographic.location..country.and.or.sea.region.\", p_adj_method = \"fdr\", ) # Get the most significant features n_top <- 4 res_tab <- res[[\"res\"]] res_tab <- res_tab[order(res_tab[[\"q_(Intercept)\"]]), ] top_feat <- res_tab[seq_len(n_top), \"taxon\"] # Create a plot p <- plotExpression( tse_order, features = top_feat, assay.type = \"relabundance\", x = \"sample_geographic.location..country.and.or.sea.region.\", show_boxplot = TRUE, show_violin = FALSE, point_shape = NA ) + scale_y_log10() p"},{"path":"/articles/MGnify_course.html","id":"session-info","dir":"Articles","previous_headings":"","what":"Session info","title":"Metagenomics bioinformatics at MGnify","text":"","code":"sessionInfo()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Tuomas Borman. Author, maintainer. Ben Allen. Author. Leo Lahti. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Borman T, Allen B, Lahti L (2024). MGnifyR: R interface EBI MGnify metagenomics resource. R package version 0.99.30, https://github.com/EBI-Metagenomics/MGnifyR.","code":"@Manual{, title = {MGnifyR: R interface to EBI MGnify metagenomics resource}, author = {Tuomas Borman and Ben Allen and Leo Lahti}, year = {2024}, note = {R package version 0.99.30}, url = {https://github.com/EBI-Metagenomics/MGnifyR}, }"},{"path":"/index.html","id":"mgnifyr-","dir":"","previous_headings":"","what":"R interface to EBI MGnify metagenomics resource","title":"R interface to EBI MGnify metagenomics resource","text":"R package searching retrieving data EBI Metagenomics resource. cases, MGnifyR interacts directly JSONAPI, rather relying downloading analyses outputs TSV files. Thus general - allowing example intuitive combining multiple studies analyses single workflow, cases slower afformentioned direct access. Local caching results disk implemented help counter overheads, data downloads can slow - particularly functional annotation retrieval. MGnifyR package part miaverse microbiome analysis ecosystem enabling usage mia miaverse packages. research received funding Horizon 2020 Programme European Union within FindingPheno project grant agreement 952914. FindingPheno, EU-funded project, dedicated developing computational tools methodologies integration analysis multi-omics data. primary objective deepen understanding interactions hosts microbiomes. can find information FindingPheno website.","code":""},{"path":[]},{"path":"/index.html","id":"bioc-release","dir":"","previous_headings":"Installation","what":"Bioc-release","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"bioc-devel","dir":"","previous_headings":"Installation","what":"Bioc-devel","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") # The following initializes usage of Bioc devel BiocManager::install(version='devel') BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"github","dir":"","previous_headings":"Installation","what":"GitHub","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"remotes::install_github(\"EBI-Metagenomics/MGnifyR\")"},{"path":"/index.html","id":"basic-usage","dir":"","previous_headings":"","what":"Basic usage","title":"R interface to EBI MGnify metagenomics resource","text":"detailed instructions read associated function help vignette (vignette(\"MGNifyR\"))","code":"library(MGnifyR) # Set up the MGnify client instance mgclnt <- MgnifyClient(usecache = TRUE, cache_dir = '/tmp/MGnify_cache') # Retrieve the list of analyses associated with a study accession_list <- searchAnalysis(mgclnt, \"studies\", \"MGYS00005058\", usecache = TRUE) # Download all associated study/sample and analysis metadata meta_dataframe <- getMetadata(mgclnt, accession_list, usecache = TRUE) # Convert analyses outputs to a single `MultiAssayExperiment` object mae <- getResult(mgclnt, meta_dataframe$analysis_accession, usecache = TRUE) mae"},{"path":"/reference/MGnifyR-package.html","id":null,"dir":"Reference","previous_headings":"","what":"MGnifyR Package. — MGnifyR-package","title":"MGnifyR Package. — MGnifyR-package","text":"MGnifyR implements interface EBI MGnify database. See vignette general introduction package. MGnify general MGnify information, API documentation details JSONAPI implementation.","code":""},{"path":[]},{"path":"/reference/MGnifyR-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"MGnifyR Package. — MGnifyR-package","text":"Maintainer: Tuomas Borman tuomas.v.borman@utu.fi (ORCID) Authors: Ben Allen ben.allen@ncl.ac.uk Leo Lahti leo.lahti@iki.fi (ORCID)","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":null,"dir":"Reference","previous_headings":"","what":"MgnifyClient accessors and mutators — databaseUrl","title":"MgnifyClient accessors and mutators — databaseUrl","text":"MgnifyClient accessors mutators","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"databaseUrl(x) authTok(x) useCache(x) cacheDir(x) showWarnings(x) clearCache(x) verbose(x) databaseUrl(x) <- value authTok(x) <- value useCache(x) <- value cacheDir(x) <- value showWarnings(x) <- value clearCache(x) <- value verbose(x) <- value # S4 method for class 'MgnifyClient' databaseUrl(x) # S4 method for class 'MgnifyClient' authTok(x) # S4 method for class 'MgnifyClient' useCache(x) # S4 method for class 'MgnifyClient' cacheDir(x) # S4 method for class 'MgnifyClient' showWarnings(x) # S4 method for class 'MgnifyClient' clearCache(x) # S4 method for class 'MgnifyClient' verbose(x) # S4 method for class 'MgnifyClient' databaseUrl(x) <- value # S4 method for class 'MgnifyClient' authTok(x) <- value # S4 method for class 'MgnifyClient' useCache(x) <- value # S4 method for class 'MgnifyClient' cacheDir(x) <- value # S4 method for class 'MgnifyClient' showWarnings(x) <- value # S4 method for class 'MgnifyClient' clearCache(x) <- value # S4 method for class 'MgnifyClient' verbose(x) <- value"},{"path":"/reference/MgnifyClient-accessors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"MgnifyClient accessors and mutators — databaseUrl","text":"x MgnifyClient object. value value added certain slot.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MgnifyClient accessors and mutators — databaseUrl","text":"value MgnifyClient object nothing.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"MgnifyClient accessors and mutators — databaseUrl","text":"functions fetching mutating slots MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"mg <- MgnifyClient() databaseUrl(mg) #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" showWarnings(mg) <- FALSE"},{"path":"/reference/MgnifyClient.html","id":null,"dir":"Reference","previous_headings":"","what":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"Constructor creating MgnifyClient object allow access MGnify database. MgnifyClient object","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"","code":"MgnifyClient( username = NULL, password = NULL, useCache = FALSE, cacheDir = tempdir(), showWarnings = FALSE, verbose = TRUE, clearCache = FALSE, ... )"},{"path":"/reference/MgnifyClient.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"username single character value specifying optional username authentication. (default: username = NULL) password single character value specifying optional password authentication. (default: password = NULL) useCache single boolean value specifying whether enable -disk caching results session. use cases TRUE. (default: useCache = FALSE) cacheDir single character value specifying folder contain local cache. Note cached files persistent, cache directory may reused sessions, taking advantage previously downloaded results. directory created exist already. (default: cacheDir = tempdir()) showWarnings single boolean value specifying whether print warnings invocation MGnifyR functions. (default: showWarnings = FALSE) verbose single boolean value specifying whether print extra output invocation MGnifyR functions. (default: verbose = FALSE) clearCache single boolean value specifying whether clear cache. (default: clearCache = FALSE) ... optional arguments: url single character value specifying url address database. (default: url = \"https://www.ebi.ac.uk/metagenomics/api/v1\")","code":""},{"path":"/reference/MgnifyClient.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"functions MGnifyR package take MgnifyClient object first argument. object allows simple handling user authentication access private data, manages general options querying MGnify database. object required functions MGnifyR package.","code":""},{"path":"/reference/MgnifyClient.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"databaseUrl single character value specifying URL address database. authTok single character value specifying authentication token. useCache single boolean value specifying whether use cache. cacheDir single character value specifying cache directory. showWarnings single boolean value specifying whether show warnings. clearCache single boolean value specifying whether clear cache. verbose single boolean value specifying whether show messages.","code":""},{"path":"/reference/MgnifyClient.html","id":"constructor","dir":"Reference","previous_headings":"","what":"Constructor","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"See MgnifyClient constructor.","code":""},{"path":"/reference/MgnifyClient.html","id":"accessor","dir":"Reference","previous_headings":"","what":"Accessor","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"See MgnifyClient-accessors accessor functions.","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"","code":"my_client <- MgnifyClient( useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) if (FALSE) { # \\dontrun{ # Use username and password to get access to non-public data my_client <- MgnifyClient( username = \"Webin-1122334\", password = \"SecretPassword\", useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) } # }"},{"path":"/reference/deprecate.html","id":null,"dir":"Reference","previous_headings":"","what":"These functions will be deprecated. Please use other functions instead. — deprecate","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"functions deprecated. Please use functions instead.","code":""},{"path":"/reference/deprecate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"","code":"mgnify_client( username = NULL, password = NULL, usecache = FALSE, cache_dir = NULL, warnings = FALSE, use_memcache = FALSE, ... ) mgnify_query( client, qtype = \"samples\", accession = NULL, asDataFrame = TRUE, maxhits = 200, usecache = FALSE, ... ) mgnify_analyses_from_samples(client, accession, usecache = TRUE, ...) mgnify_analyses_from_studies(client, accession, usecache = TRUE, ...) mgnify_get_download_urls( client, accessions, accession_type, usecache = TRUE, ... ) mgnify_download( client, url, file = NULL, read_func = NULL, usecache = TRUE, Debug = FALSE, ... ) mgnify_get_analyses_results( client = NULL, accessions, retrievelist = c(), compact_results = TRUE, usecache = TRUE, bulk_dl = FALSE, ... ) mgnify_get_analyses_phyloseq( client = NULL, accessions, usecache = TRUE, returnLists = FALSE, tax_SU = \"SSU\", get_tree = FALSE, ... ) mgnify_get_analyses_metadata(client, accessions, usecache = TRUE, ...) mgnify_retrieve_json( client, path = \"biomes\", complete_url = NULL, qopts = NULL, maxhits = 200, usecache = FALSE, Debug = FALSE, ... )"},{"path":"/reference/deprecate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"username - password - usecache - cache_dir - warnings - use_memcache - ... - client - qtype - accession - asDataFrame - maxhits - accessions - accession_type - url - file - read_func - Debug - retrievelist - compact_results - bulk_dl - returnLists - tax_SU - get_tree - path - complete_url - qopts -","code":""},{"path":"/reference/deprecate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"-","code":""},{"path":"/reference/doQuery.html","id":null,"dir":"Reference","previous_headings":"","what":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"Search MGnify database studies, samples, runs, analyses, biomes, assemblies, genomes.","code":""},{"path":"/reference/doQuery.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"","code":"doQuery(x, ...) # S4 method for class 'MgnifyClient' doQuery( x, type = \"studies\", accession = NULL, as.df = TRUE, max.hits = 200, ... )"},{"path":"/reference/doQuery.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"x MgnifyClient object. ... Remaining parameter key/value pairs may supplied filter returned values. Available options differ types. See discussion Details section details. type single character value specifying type objects query. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. (default: type = \"studies\") accession single character value vector character values specifying MGnify accession identifiers (type type) NULL. NULL, results defined parameters retrieved. (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. cases, .df = TRUE make sense. (default: .df = TRUE) max.hits single integer value specifying maximum number results return FALSE. actual number results actually higher max.hits, clipping occurs pagination page boundaries. disable limit, set max.hits = NULL. (default: max.hits = 200)","code":""},{"path":"/reference/doQuery.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"nested list data.frame containing results query.","code":""},{"path":"/reference/doQuery.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"doQuery flexible query function, harnessing \"full\" power JSONAPI MGnify search filters. Search results may filtered metadata value, associated study/sample/analyse etc. See Api browser information MGnify database filters. can find help customizing queries . example following filters available: studies: accession, biome_name, lineage, centre_name, include samples: accession, experiment_type, biome_name, lineage, geo_loc_name, latitude_gte, latitude_lte, longitude_gte, longitude_lte, species, instrument_model, instrument_platform, metadata_key, metadata_value_gte, metadata_value_lte, metadata_value, environment_material, environment_feature, study_accession, include runs: accession, experiment_type, biome_name, lineage, species, instrument_platform, instrument_model, metdata_key, metadata_value_gte, metadata_value_lte, metadata_value, sample_accession, study_accession, include analyses: biome_name, lineage, experiment_type, species, sample_accession, pipeline_version biomes: depth_gte, depth_lte assemblies: depth_gte, depth_lte Unfortunately appears cases, filters work expected, important check results returned match expected. Even unfortunately error parameter specification, query run filter parameters present . Thus result appear superficially correct infact correspond something completely different. behaviour hopefully fixed future incarnations MGnifyR JSONAPI, now users double check returned values. currently possible combine queries type single call (example search samples latitude). However, possible run multiple queries combine results using set operations R get desired behaviour.","code":""},{"path":"/reference/doQuery.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"","code":"mg <- MgnifyClient(useCache = FALSE) # Get a list of studies from the Agricultural Wastewater : agwaste_studies <- doQuery( mg, \"studies\", biome_name=\"Agricultural wastewater\" ) if (FALSE) { # \\dontrun{ # Get all samples from a particular study samps <- doQuery(mg, \"samples\", accession=\"MGYS00004521\") # Search polar samples samps_np <- doQuery(mg, \"samples\", latitude_gte=66, max.hits=10) samps_sp <- doQuery(mg, \"samples\", latitude_lte=-66, max.hits=10) # Search studies that have studied drinking water tbl <- doQuery( mg, type = \"studies\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) } # }"},{"path":"/reference/getData.html","id":null,"dir":"Reference","previous_headings":"","what":"Versatile function to retrieve raw results — getData","title":"Versatile function to retrieve raw results — getData","text":"Versatile function retrieve raw results","code":""},{"path":"/reference/getData.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Versatile function to retrieve raw results — getData","text":"","code":"getData(x, ...) # S4 method for class 'MgnifyClient' getData(x, type, accession.type = NULL, accession = NULL, as.df = TRUE, ...)"},{"path":"/reference/getData.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Versatile function to retrieve raw results — getData","text":"x MgnifyClient object. ... optional arguments fed internal functions. type single character value specifying type data retrieve. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. accession.type single character value specifying type accession IDs (accession). Must specified accession specified. (default: accession.type = NULL) accession single character value vector character values specifying accession IDs return results . (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. (default: .df = TRUE)","code":""},{"path":"/reference/getData.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Versatile function to retrieve raw results — getData","text":"data.frame list","code":""},{"path":"/reference/getData.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Versatile function to retrieve raw results — getData","text":"function returns data MGnify database. Compared getResult, function allows flexible framework fetching data. However, drawbacks: counts data, getResult returns optimally structured data container easier downstream analysis. getData returns raw data database. However, want retrieve data pipelines publications, instance, getResult suitable , getData can utilized instead.","code":""},{"path":[]},{"path":"/reference/getData.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Versatile function to retrieve raw results — getData","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Find kegg modules for certain analysis df <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\")"},{"path":"/reference/getFile.html","id":null,"dir":"Reference","previous_headings":"","what":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"Download MGnify files, also including processed reads identified protein sequences Listing files available download","code":""},{"path":"/reference/getFile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"","code":"getFile(x, ...) searchFile(x, ...) # S4 method for class 'MgnifyClient' getFile(x, url, file = NULL, read.func = NULL, ...) # S4 method for class 'MgnifyClient' searchFile( x, accession, type = c(\"studies\", \"samples\", \"analyses\", \"assemblies\", \"genomes\", \"run\"), ... )"},{"path":"/reference/getFile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"x MgnifyClient object. ... Additional arguments; used currently. url single character value specifying url address file wish download. file single character value NULL specifying optional local filename use saving file. NULL, MGNify local cache settings used. file intended processed separate program, may sensible provide meaningful file, rather hunt cache folders. file NULL useCache(client) FALSE, read.func parameter must supplied file downloaded deleted. (default: file = NULL) read.func function specifying optional function process downloaded file return results, rather relying post processing. primary use-case parameter local disk space limited downloaded files can quickly processed discarded. function take single parameter, downloaded filename, may return valid R object. (default: read.func = NULL) accession single character value vector character values specifying accession IDs return results . type single character value specifying type objects query. Must one following options: analysis, samples, studies, assembly, genome run. (default: type = \"samples\")","code":""},{"path":"/reference/getFile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"getFile(), either local filename downloaded file, either location MGNifyR cache file. read.func used, result returned. searchFile() data.frame containing discovered downloads. multiple accessions queried, accessions column may filter results - since rownames set (make sense query return multiple items)","code":""},{"path":"/reference/getFile.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"getFile convenient wrapper round generic URL downloading functionality R, taking care things like local caching authentication. searchFile() function wrapper function allowing easy enumeration downloads available given accession IDs. Returns single data.frame containing available downloads associated metadata, including url location description. can filtered extract urls interest, actually retrieving files using getFile()","code":""},{"path":"/reference/getFile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"","code":"# Make a client object mg <- MgnifyClient(useCache = FALSE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c(\"MGYA00563876\", \"MGYA00563877\") downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |=================================== | 50% | |======================================================================| 100% # Filter to find the urls of 16S encoding sequences url_list <- downloads[ downloads$attributes.description.label == \"Contigs encoding SSU rRNA\", \"download_url\"] # Example 1: # Download the first file supplied_filename <- getFile( mg, url_list[[1]], file=\"SSU_file.fasta.gz\") if (FALSE) { # \\dontrun{ # Example 2: # Just use local caching cached_filename <- getFile(mg, url_list[[2]]) # Example 3: # Using read.func to open the reads with readDNAStringSet from # \\code{biostrings}. Without retaining on disk dna_seqs <- getFile( mg, url_list[[3]], read.func = readDNAStringSet) } # } # Make a client object mg <- MgnifyClient(useCache = TRUE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c( \"MGYA00563876\", \"MGYA00563877\", \"MGYA00563878\", \"MGYA00563879\", \"MGYA00563880\" ) downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%"},{"path":"/reference/getMetadata.html","id":null,"dir":"Reference","previous_headings":"","what":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"Get study, sample analysis metadata supplied analysis accessions","code":""},{"path":"/reference/getMetadata.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"","code":"getMetadata(x, ...) # S4 method for class 'MgnifyClient' getMetadata(x, accession, ...)"},{"path":"/reference/getMetadata.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"x MgnifyClient object. ... Optional arguments; currently used. accession single character value vector analysis accession IDs specifying accessions retrieve data .","code":""},{"path":"/reference/getMetadata.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"data.frame containing metadata analysis accession list. row represents single analysis.","code":""},{"path":"/reference/getMetadata.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"function retrieves study, sample analysis metadata associated provided analysis accessions.","code":""},{"path":"/reference/getMetadata.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Download all associated study/sample and analysis metadata accession_list <- c(\"MGYA00377505\") meta_dataframe <- getMetadata(mg, accession_list) #> Fetching metadata... #> | | | 0% | |======================================================================| 100%"},{"path":"/reference/getResult.html","id":null,"dir":"Reference","previous_headings":"","what":"Get microbial and/or functional profiling data for a list of accessions — getResult","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Get microbial /functional profiling data list accessions","code":""},{"path":"/reference/getResult.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"getResult(x, ...) # S4 method for class 'MgnifyClient' getResult( x, accession, get.taxa = TRUE, get.func = TRUE, output = \"TreeSE\", ... )"},{"path":"/reference/getResult.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"x MgnifyClient object. ... optional arguments: taxa.su single character value specifying taxa subunit results selected. Currently, taxonomy assignments MGnify pipelines rely rRNA matches existing databases (GreenGenes SILVA), later pipelines checking SSU LSU portions rRNA sequence. taxa.su allows selection either Small subunit (\"SSU\") Large subunit (\"LSU\") results final TreeSummarizedExperiment object. Older pipeline versions report results subunits, thus accessions value effect. get.tree single boolean value specifying whether include available phylogenetic trees TreeSummarizedExperiment object. Available get.taxa = TRUE. (default: get.tree = TRUE) .df single boolean value enabled output = \"list\". argument specifies whether return functional data named list (one entry per element output list) data.frames, data.frame containing results requested accessions. FALSE, function returns list lists, element consisting results single accession. (default: .df = TRUE) bulk.dl single boolean value specifying MGnifyR attempt speed things downloading relevant studies TSV results extracting required columns, rather using JSONAPI interface. getting results multiple accessions share study, option may result significantly faster processing. However, appear (quite ) cases database TSV result columns match expected accession names. hopefully fixed future, now bulk.dl defaults TRUE. work, can orders magnitude efficient. (default: buld_dl = TRUE) accession single character value vector character values specifying accession IDs return results . get.taxa boolean value specifying whether retrieve taxonomy data (OTU table). See taxa.su specifying taxonomy type. data retrieved BIOM files subsequently parsed. (default: get.taxa = TRUE) get.func boolean value single character value vector character values specifying functional analysis types retrieve. get.func = TRUE, available functional datatypes retrieved, FALSE, functional data retrieved. current list available types \"antismash-gene-clusters\", \"go-slim\", \"go-terms\", \"interpro-identifiers\", \"taxonomy\", \"taxonomy-itsonedb\", \"taxonomy-itsunite\", \"taxonomy-lsu\", \"taxonomy-ssu\". Note depending particular analysis type, pipeline version etc., functional results available. Furthermore, taxonomy also available via get.func, loading data might considerable faster bulk.dl = TRUE. However, phylogeny available via get.taxa. (default: get.func = TRUE) output single character value specifying format output. Must one following options: \"TreeSE\", \"list\", \"phyloseq\". (default: output = \"TreeSE\")","code":""},{"path":"/reference/getResult.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"taxonomy data retrieved, result returned TreeSummarizedExperiment object default. result can also returned phyloseq object list data.frames. Note phyloseq object can include one phylogenetic tree meaning taxa might lost data subsetted based tree. functional data retrieved addition taxonomy data, result returned MultiAssayExperiment object. options list containing phyloseq object data.frames just data.frames. Functional data can returned MultiAssayExperiment object list data.frames.","code":""},{"path":"/reference/getResult.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Given set analysis accessions collection annotation types, function queries MGNify API returns results. function convenient retrieving highly structured (analysis vs counts) data certain instances. example, BIOM files downloaded automatically. want just retrieve raw data database, see getData.","code":""},{"path":[]},{"path":"/reference/getResult.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Get OTU tables as TreeSE accession_list <- c(\"MGYA00377505\") tse <- getResult(mg, accession_list, get.func=FALSE, get.taxa=TRUE) #> Fetching taxonomy data... #> | | | 0% | |======================================================================| 100% #> Merging with full join... #> 1/1 #> if (FALSE) { # \\dontrun{ # Get functional data along with OTU tables as MAE mae <- getResult(mg, accession_list, get.func=TRUE, get.taxa=TRUE) # Get same data as list list <- getResult( mg, accession_list, get.func=TRUE, get.taxa=TRUE, output = \"list\", as.df = TRUE, use.cache = TRUE) } # }"},{"path":"/reference/searchAnalysis.html","id":null,"dir":"Reference","previous_headings":"","what":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Look analysis accession IDs one study sample accessions","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"searchAnalysis(x, ...) # S4 method for class 'MgnifyClient' searchAnalysis(x, type, accession, ...)"},{"path":"/reference/searchAnalysis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"x MgnifyClient object. ... Optional arguments; currently used. type single character value specifying type accession IDs specified accession. Must \"studies\" \"samples\". accession single character value vector character values specifying study sample accession IDs used retrieve analyses IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Vector analysis accession IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Retrieve analysis accession IDs associated supplied study sample accession. MGnify, analysis accession refers certain pipeline analysis, specific 16S rRNA shotgun metagenomic mapping. Studies can include multiple samples, sample can undergo multiple analyses using pipelines. analysis identified unique accession ID, allowing precise tracking retrieval analysis results within MGnify database.","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Retrieve analysis ids from study MGYS00005058 result <- searchAnalysis(mg, \"studies\", c(\"MGYS00005058\")) #> Fetching analyses... #> | | | 0% | |======================================================================| 100% if (FALSE) { # \\dontrun{ # Retrieve all analysis ids from samples result <- searchAnalysis( mg, \"samples\", c(\"SRS4392730\", \"SRS4392743\")) } # }"}] +[{"path":"/articles/MGnifyR.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. latest version MGnifyR seamlessly integrates miaverse framework providing access cutting-edge tools microbiome -stream analytics.","code":""},{"path":"/articles/MGnifyR.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR","text":"MGnifyR hosted Bioconductor, can installed using via BiocManager.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR) #> Loading required package: MultiAssayExperiment #> Loading required package: SummarizedExperiment #> Loading required package: MatrixGenerics #> Loading required package: matrixStats #> #> Attaching package: 'MatrixGenerics' #> The following objects are masked from 'package:matrixStats': #> #> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, #> colCounts, colCummaxs, colCummins, colCumprods, colCumsums, #> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, #> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, #> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, #> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, #> colWeightedMeans, colWeightedMedians, colWeightedSds, #> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, #> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, #> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, #> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins, #> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, #> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars, #> rowWeightedMads, rowWeightedMeans, rowWeightedMedians, #> rowWeightedSds, rowWeightedVars #> Loading required package: GenomicRanges #> Loading required package: stats4 #> Loading required package: BiocGenerics #> #> Attaching package: 'BiocGenerics' #> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs #> The following objects are masked from 'package:base': #> #> anyDuplicated, aperm, append, as.data.frame, basename, cbind, #> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, #> get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, #> match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, #> Position, rank, rbind, Reduce, rownames, sapply, setdiff, table, #> tapply, union, unique, unsplit, which.max, which.min #> Loading required package: S4Vectors #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:utils': #> #> findMatches #> The following objects are masked from 'package:base': #> #> expand.grid, I, unname #> Loading required package: IRanges #> Loading required package: GenomeInfoDb #> Loading required package: Biobase #> Welcome to Bioconductor #> #> Vignettes contain introductory material; view with #> 'browseVignettes()'. To cite Bioconductor, see #> 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'. #> #> Attaching package: 'Biobase' #> The following object is masked from 'package:MatrixGenerics': #> #> rowMedians #> The following objects are masked from 'package:matrixStats': #> #> anyMissing, rowMedians #> Loading required package: TreeSummarizedExperiment #> Loading required package: SingleCellExperiment #> Loading required package: Biostrings #> Loading required package: XVector #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit"},{"path":"/articles/MGnifyR.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . MgnifyClient object contains slots previously mentioned settings.","code":"mg <- MgnifyClient(useCache = TRUE) mg #> An object of class \"MgnifyClient\" #> Slot \"databaseUrl\": #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" #> #> Slot \"authTok\": #> [1] NA #> #> Slot \"useCache\": #> [1] TRUE #> #> Slot \"cacheDir\": #> [1] \"/tmp/RtmpAEJ3j1/.MGnifyR_cache\" #> #> Slot \"showWarnings\": #> [1] FALSE #> #> Slot \"clearCache\": #> [1] FALSE #> #> Slot \"verbose\": #> [1] TRUE"},{"path":[]},{"path":"/articles/MGnifyR.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR","text":"doQuery() function can utilized search results samples studies MGnify database. , fetch information drinking water samples. result table containing accession IDs description – case – samples.","code":"# Fetch studies samples <- doQuery( mg, type = \"samples\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) colnames(samples) |> head() #> [1] \"biosample\" \"accession\" \"sample-desc\" #> [4] \"environment-biome\" \"environment-feature\" \"environment-material\""},{"path":"/articles/MGnifyR.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR","text":"Now want find analysis accessions. sample might multiple analyses. analysis ID corresponds single run particular pipeline single sample single study. running searchAnalysis() function, get vector analysis IDs samples fed input.","code":"analyses_accessions <- searchAnalysis(mg, \"samples\", samples$accession) analyses_accessions |> head() #> [1] \"MGYA00652201\" \"MGYA00652185\" \"MGYA00643487\" \"MGYA00643486\" \"MGYA00643485\" #> [6] \"MGYA00643484\""},{"path":"/articles/MGnifyR.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR","text":"can now check metadata get hint kind data . use getMetadata() function fetch data based analysis IDs. returned value data.frame includes metadata example analysis conducted kind samples analyzed.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) colnames(analyses_metadata) |> head() #> [1] \"analysis_analysis-status\" \"analysis_pipeline-version\" #> [3] \"analysis_experiment-type\" \"analysis_accession\" #> [5] \"analysis_is-private\" \"analysis_complete-time\""},{"path":"/articles/MGnifyR.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR","text":"selected data fetch, can use getResult() output TreeSummarizedExperiment (TreeSE) MultiAssayExperiment (MAE) depending dataset. dataset includes taxonomic profiling data, output single TreeSE. dataset includes also functional data, output multiple TreeSE objects linked together utilizing MAE. can get access individual TreeSE object MAE specifying index name. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples… … plot abundances abundant phyla. can also perform analyses principal component analysis microbial profiling data utilizing miaverse tools.","code":"mae <- getResult(mg, accession = analyses_accessions) mae #> A MultiAssayExperiment object of 6 listed #> experiments with user-defined names and respective classes. #> Containing an ExperimentList class object of length 6: #> [1] microbiota: TreeSummarizedExperiment with 3506 rows and 50 columns #> [2] go-slim: TreeSummarizedExperiment with 116 rows and 38 columns #> [3] go-terms: TreeSummarizedExperiment with 3133 rows and 38 columns #> [4] interpro-identifiers: TreeSummarizedExperiment with 18223 rows and 38 columns #> [5] taxonomy: TreeSummarizedExperiment with 3617 rows and 50 columns #> [6] taxonomy-lsu: TreeSummarizedExperiment with 3378 rows and 42 columns #> Functionality: #> experiments() - obtain the ExperimentList instance #> colData() - the primary/phenotype DataFrame #> sampleMap() - the sample coordination DataFrame #> `$`, `[`, `[[` - extract colData columns, subset, or experiment #> *Format() - convert into a long or wide DataFrame #> assays() - convert ExperimentList to a SimpleList of matrices #> exportClass() - save data to flat files mae[[1]] #> class: TreeSummarizedExperiment #> dim: 3506 50 #> metadata(0): #> assays(1): counts #> rownames(3506): 82608 62797 ... 5820 6794 #> rowData names(9): Kingdom Phylum ... taxonomy1 taxonomy #> colnames(50): MGYA00144458 MGYA00144419 ... MGYA00652185 MGYA00652201 #> colData names(64): analysis_analysis.status analysis_pipeline.version #> ... sample_geo.loc.name sample_instrument.model #> reducedDimNames(0): #> mainExpName: NULL #> altExpNames(0): #> rowLinks: NULL #> rowTree: NULL #> colLinks: NULL #> colTree: NULL library(mia) #> This is mia version 1.13.36 #> - Online documentation and vignettes: https://microbiome.github.io/mia/ #> - Online book 'Orchestrating Microbiome Analysis (OMA)': https://microbiome.github.io/OMA/docs/devel/ mae[[1]] <- estimateDiversity(mae[[1]], index = \"shannon\") #> Warning in estimateDiversity(mae[[1]], index = \"shannon\"): 'estimateDiversity' #> is deprecated. Use 'addAlpha' instead. library(scater) #> Loading required package: scuttle #> Loading required package: ggplot2 plotColData(mae[[1]], \"shannon\", x = \"sample_environment..biome.\") # Agglomerate data altExps(mae[[1]]) <- splitByRanks(mae[[1]]) library(miaViz) #> Loading required package: ggraph #> #> Attaching package: 'miaViz' #> The following object is masked from 'package:mia': #> #> plotNMDS # Plot top taxa top_taxa <- getTopFeatures(altExp(mae[[1]], \"Phylum\"), 10) #> Warning in getTopFeatures(altExp(mae[[1]], \"Phylum\"), 10): 'getTopFeatures' is #> deprecated. Use 'getTop' instead. plotAbundance( altExp(mae[[1]], \"Phylum\")[top_taxa, ], rank = \"Phylum\", as.relative = TRUE ) #> Warning: The following values are already present in `metadata` and will be #> overwritten: 'agglomerated_by_rank'. Consider using the 'name' argument to #> specify alternative names. # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim( mae[[1]], \"MDS\", colour_by = \"sample_environment..biome.\")"},{"path":"/articles/MGnifyR.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format. result data.frame default. case, includes information publications fetched data portal.","code":"publications <- getData(mg, type = \"publications\") colnames(publications) |> head() #> [1] \"document.id\" \"type\" #> [3] \"id\" \"attributes.pubmed-id\" #> [5] \"attributes.pubmed-central-id\" \"attributes.pub-title\""},{"path":"/articles/MGnifyR.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile(), can search files database. returned table contains search results related analyses fed input. table contains information file also URL address file can loaded. Finally, can download files getFile(). function returns path file stored.","code":"dl_urls <- searchFile(mg, analyses_accessions, type = \"analyses\") target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted alpha tmRNA\", ] colnames(target_urls) |> head() #> [1] \"type\" \"id\" #> [3] \"attributes.alias\" \"attributes.file.format.name\" #> [5] \"attributes.file.format.extension\" \"attributes.file.format.compression\" # Just select a single file from the target_urls list for demonstration. file_url <- target_urls$download_url[[1]] cached_location <- getFile(mg, file_url) # Where are the files? cached_location #> [1] \"/.MGnifyR_cache/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz\" sessionInfo() #> R version 4.4.1 (2024-06-14) #> Platform: x86_64-pc-linux-gnu #> Running under: Ubuntu 22.04.4 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 #> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 #> #> locale: #> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C #> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 #> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 #> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C #> [9] LC_ADDRESS=C LC_TELEPHONE=C #> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C #> #> time zone: UTC #> tzcode source: system (glibc) #> #> attached base packages: #> [1] stats4 stats graphics grDevices utils datasets methods #> [8] base #> #> other attached packages: #> [1] miaViz_1.13.10 ggraph_2.2.1 #> [3] scater_1.33.4 ggplot2_3.5.1 #> [5] scuttle_1.15.4 mia_1.13.36 #> [7] MGnifyR_0.99.30 TreeSummarizedExperiment_2.13.0 #> [9] Biostrings_2.73.1 XVector_0.45.0 #> [11] SingleCellExperiment_1.27.2 MultiAssayExperiment_1.31.5 #> [13] SummarizedExperiment_1.35.1 Biobase_2.65.1 #> [15] GenomicRanges_1.57.1 GenomeInfoDb_1.41.1 #> [17] IRanges_2.39.2 S4Vectors_0.43.2 #> [19] BiocGenerics_0.51.1 MatrixGenerics_1.17.0 #> [21] matrixStats_1.4.1 knitr_1.48 #> [23] BiocStyle_2.33.1 #> #> loaded via a namespace (and not attached): #> [1] splines_4.4.1 ggplotify_0.1.2 #> [3] urltools_1.7.3 tibble_3.2.1 #> [5] triebeard_0.4.1 polyclip_1.10-7 #> [7] rpart_4.1.23 DirichletMultinomial_1.47.0 #> [9] lifecycle_1.0.4 lattice_0.22-6 #> [11] MASS_7.3-61 SnowballC_0.7.1 #> [13] backports_1.5.0 magrittr_2.0.3 #> [15] Hmisc_5.1-3 sass_0.4.9 #> [17] rmarkdown_2.28 jquerylib_0.1.4 #> [19] yaml_2.3.10 DBI_1.2.3 #> [21] minqa_1.2.8 abind_1.4-8 #> [23] zlibbioc_1.51.1 purrr_1.0.2 #> [25] yulab.utils_0.1.7 nnet_7.3-19 #> [27] tweenr_2.0.3 sandwich_3.1-0 #> [29] GenomeInfoDbData_1.2.12 ggrepel_0.9.6 #> [31] tokenizers_0.3.0 irlba_2.3.5.1 #> [33] tidytree_0.4.6 vegan_2.6-8 #> [35] rbiom_1.0.3 tidyjson_0.3.2 #> [37] pkgdown_2.1.1 permute_0.9-7 #> [39] DelayedMatrixStats_1.27.3 codetools_0.2-20 #> [41] DelayedArray_0.31.11 ggforce_0.4.2 #> [43] tidyselect_1.2.1 aplot_0.2.3 #> [45] UCSC.utils_1.1.0 farver_2.1.2 #> [47] lme4_1.1-35.5 ScaledMatrix_1.13.0 #> [49] viridis_0.6.5 base64enc_0.1-3 #> [51] jsonlite_1.8.8 BiocNeighbors_1.99.0 #> [53] decontam_1.25.0 tidygraph_1.3.1 #> [55] Formula_1.2-5 systemfonts_1.1.0 #> [57] ggnewscale_0.5.0 tools_4.4.1 #> [59] treeio_1.29.1 ragg_1.3.3 #> [61] Rcpp_1.0.13 glue_1.7.0 #> [63] gridExtra_2.3 SparseArray_1.5.34 #> [65] BiocBaseUtils_1.7.3 xfun_0.47 #> [67] mgcv_1.9-1 dplyr_1.1.4 #> [69] withr_3.0.1 BiocManager_1.30.25 #> [71] fastmap_1.2.0 boot_1.3-31 #> [73] bluster_1.15.1 fansi_1.0.6 #> [75] digest_0.6.37 rsvd_1.0.5 #> [77] gridGraphics_0.5-1 R6_2.5.1 #> [79] textshaping_0.4.0 colorspace_2.1-1 #> [81] lpSolve_5.6.21 utf8_1.2.4 #> [83] tidyr_1.3.1 generics_0.1.3 #> [85] data.table_1.16.0 DECIPHER_3.1.4 #> [87] graphlayouts_1.1.1 httr_1.4.7 #> [89] htmlwidgets_1.6.4 S4Arrays_1.5.7 #> [91] pkgconfig_2.0.3 gtable_0.3.5 #> [93] janeaustenr_1.0.0 htmltools_0.5.8.1 #> [95] bookdown_0.40 scales_1.3.0 #> [97] ggfun_0.1.6 rstudioapi_0.16.0 #> [99] reshape2_1.4.4 checkmate_2.3.2 #> [101] nlme_3.1-166 nloptr_2.1.1 #> [103] cachem_1.1.0 zoo_1.8-12 #> [105] stringr_1.5.1 parallel_4.4.1 #> [107] vipor_0.4.7 foreign_0.8-87 #> [109] desc_1.4.3 pillar_1.9.0 #> [111] grid_4.4.1 vctrs_0.6.5 #> [113] slam_0.1-53 BiocSingular_1.21.3 #> [115] beachmat_2.21.6 cluster_2.1.6 #> [117] beeswarm_0.4.0 htmlTable_2.4.3 #> [119] evaluate_0.24.0 mvtnorm_1.3-1 #> [121] cli_3.6.3 compiler_4.4.1 #> [123] rlang_1.1.4 crayon_1.5.3 #> [125] tidytext_0.4.2 labeling_0.4.3 #> [127] mediation_4.5.0 plyr_1.8.9 #> [129] fs_1.6.4 ggbeeswarm_0.7.2 #> [131] stringi_1.8.4 viridisLite_0.4.2 #> [133] BiocParallel_1.39.0 assertthat_0.2.1 #> [135] munsell_0.5.1 lazyeval_0.2.2 #> [137] Matrix_1.7-0 patchwork_1.2.0 #> [139] sparseMatrixStats_1.17.2 highr_0.11 #> [141] igraph_2.0.3 memoise_2.0.1 #> [143] RcppParallel_5.1.9 bslib_0.8.0 #> [145] ggtree_3.13.1 ape_5.8"},{"path":"/articles/MGnifyR_long.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR, extended vignette","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. MGnify pipelines undoubtedly useful, currently implemented produce results strictly per-sample basis. whole study results available, comparisons across studies difficult. MGnifyR package designed facilitate cross-study analyses handling per-sample data retrieval merging details internally, leaving user free perform analysis see fit. latest version MGnifyR seamlessly integrates miaverse framework providing access tools microbiome -stream analytics. integration enables users leverage optimized standardized methods analyzing microbiome. Additionally, users can benefit comprehensive tutorial book offers valuable guidance support.","code":""},{"path":"/articles/MGnifyR_long.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR, extended vignette","text":"MGnifyR currently hosted GitHub, can installed using via devtools. MGnifyR built using following snippet.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR_long.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR, extended vignette","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR)"},{"path":"/articles/MGnifyR_long.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR, extended vignette","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . ’s recommended local caching enabled useCache = TRUE. Queries MGnify API can quite slow, particularly retrieving multipage results many analyses (many Interpro results). Using local disk cache can significantly speed subsequent work, bypassing need re-query API. Use cache entirely transparent, caching occurs raw data level. cache can persist across MGnifyR sessions, can even used multiple sessions simultaneously - provided different sets accessions queried . Optionally, username password may specified client creation, causing MGnifyR attempt retrieval authentication token API. gives access non-public results, currently author imposed embargo period.","code":"mg <- MgnifyClient() mg mg <- MgnifyClient( username = \"Webin-username\", password = \"your-password\", useCache = TRUE)"},{"path":[]},{"path":"/articles/MGnifyR_long.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR, extended vignette","text":"MGnifyR gives users access complete range search functionality implemented MGnify JSON API. single function doQuery() used perform searching, allowing Studies, Samples, Runs Accession interrogated common interface. MGnifyR functions first argument client must valid MgnifyClient instance. remaining required parameter qtype, specifying type data queried, may one studies, samples, runs, analyses assemblies. general parameter include max.hits. Unlike MGnifyR high level functions, caching turned default doQuery(). New data analyses added MGnify time, enabling caching default may lead --date search results long-lived sessions. However, ’s easy switch back , may useful many cases. Also, given huge ever increasing number datasets available MGnify, limit number results returned may set using max.hits. default set 200, exploratory queries sufficient. may increased decreased directly specifying max.hits, disabled completely (limit) setting max.hits=NULL. cases want specific search, also use either accession parameter, many filter options available API, discussed . Specifying accession id, case samples, runs assemblies may vector ids, returns data.frame metadata one row per matching accession. accession NULL (default) remaining parameters define filters applied API search result. Details parameters given help(doQuery). way example though, supposing interested amplicon Illumina samples arctic, might try following query: Specifying accession parameter restrict results just matching particular entry, study, sample run. example, retrieve information study “MGYS00002891”:","code":"northpolar <- doQuery( mg, \"samples\", latitude_gte=60.0, experiment_type=\"amplicon\", biome_name=\"Soil\", instrument_platform = \"Illumina\", max.hits = 10) head(northpolar) study_samples <- doQuery(mg, \"studies\", accession=\"MGYS00002891\") head(study_samples)"},{"path":"/articles/MGnifyR_long.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR, extended vignette","text":"obtained particular set search hits, ’s now time retrieve associated results. General automated analysis complicated MGnify database design, wherein example samples may shared multiple studies, studies analysed multiple times using different versions pipeline. Navigating “many--one” relationships can tricky, MGnifyR resorts using analyses accessions ’s canonical identifier. analysis corresponds single run particular pipeline single sample single study. downside approach queries returning studies, samples (anything analyses) accessions need converting corresponding analyses. MGnifyR therefore provides helper function handle conversion - searchAnalysis(). Following previous search, list study accessions, convert corresponding analyses use: useful side effect call attribute metadata sample now retrieved stored local cache. Thus subsequent API calls samples (occur multiple times later steps) significantly faster. ’s important aware results searchAnalysis() command necessarily one--one match input accessions. MGnify analysis runs sometimes performed multiple times, perhaps using different versions pipeline. Thus filtering result list may required, easily performed illustrated next section.","code":"analyses_accessions <- searchAnalysis( mg, type=\"studies\", accession = study_samples$accession) head(analyses_accessions)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR, extended vignette","text":"point long list analysis instances (potential duplicates) corresponding samples previously found. use getMetadata function download combine associated sample, run study metadata, filter required include rows want. resulting data.frame columns names prefixed source type. example, “sample_xxx” columns correspond metadata gleaned querying accession’s sample entry. MGnify allows quite flexible specification arbitray metadata submission time, many cases leading quite sparse data.frame results accession queries sourced one study. instance, one sample contains entry “sample_soil_PH”, entries rows filled NA. MGnifyR automatically clean missing values - instead opting allow user choose correct action. particular study ’re looking marine biome, suppose interested samples analyses sampling depth known. following snippet filters full data.frame selecting entries contain valid sample_depth. ’s worth noting .numeric call ensure column converted numeric type checked. sample data MGnifyR initially retrieved type character, ’s user make sure ostensibly numeric entries converted properly.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) head(analyses_metadata) known_depths <- analyses_metadata[ !is.na(as.numeric(analyses_metadata$sample_depth)), ] # How many are left? dim(known_depths)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR, extended vignette","text":"selected analyses wish examine , getResult() used download associated OTU tables taxonomy, join results single TreeSummarizedExperiment (TreeSE) object. TreeSE becoming defacto standard taxonomic abundance munging R. TreeSE objects integrate abundance, taxonomic, phylogenetic, sample sequence data single object, powerful facilities filtering, processing plotting results. Compared phyloseq object, TreeSE scalable capable efficient data analysis. miaverse framework developed around TreeSE data container. provides tools analysis visualization. Moreover, includes comprehensive tutorial book called OMA.","code":""},{"path":"/articles/MGnifyR_long.html","id":"amplicon-sequencing","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Amplicon sequencing","title":"MGnifyR, extended vignette","text":"dataset includes amplicon sequencing data, .e., dataset include function predictions, getResult() method returns dataset TreeSE default. See output types function documentation. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples. needed, TreeSE can converted phyloseq.","code":"tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE) tse library(mia) tse <- estimateDiversity(tse, index = \"shannon\") library(scater) plotColData(tse, \"shannon\", x = \"sample_geo.loc.name\") library(miaViz) plotAbundance( tse[!is.na( rowData(tse)[[\"Kingdom\"]] ), ], rank = \"Kingdom\", as.relative = TRUE ) pseq <- makePhyloseqFromTreeSE(tse) pseq"},{"path":"/articles/MGnifyR_long.html","id":"metagenomics","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Metagenomics","title":"MGnifyR, extended vignette","text":"Although previous queries based results doQuery(), now concentrate combining comparing results specific studies. Since newly performed analyses retrieved first doQuery() call, ’s likely time vignette read, query results different. principally due rapid increase MGnify submissions, leading potential lack consistency even closely spaced queries. mentioned previously, may best use useCache=FALSE MgnifyCLient object doQuery() calls, ensure queries actually returning latest data. remainder vignette however, ’ll comparing 3 ostensibly different studies. study saltmarsh soils York University, human faecal samples survey healthy Sardinians, set samples hydrothermal vents Mid-Cayman rise Carribbean Sea. simplify things, first 20 samples study used. Furthermore, intention demonstrate functionality MGnifyR package, rather produce scientifically rigorous results. first step new accession list , previously, retrieve associated metadata using getMetadata(), seen doQuery() results, returned data.frame contains large number columns. autogenerated flexible, column names can little difficult predict, examining colnames(full_metadata) make things clearer. full_metadata get idea type data ’re dealing , can extract useul information sequencing platform, source biome, etc. next code snippet tallies columns give idea ’s available. boxplot also indicates within study read counts similar, probably need use sort normalization procedure comparing across samples. might also want drop particularly low read coverage samples analysis. , can fetch data calling getResult(). bulk.dl=TRUE potential significantly speed data retrieval. MGnify makes functional results available two separate ways, either per-analysis basis web api, whole study level large files, tab separated (TSV), columns representing results analysis. bulk.dl FALSE, MGnifyR queries web api get results (given functional analyses results may consist thousands entries) may take significant time. Setting bulk.dl TRUE causes MGnifyR determine source study associated particular analysis instead download parse corresponding results file. Since result file contains entries analyses associated study, taking advantage MGnifyR’s local caching single download provides results many future analyses. cases affords several orders magnitude speedup api query case. Unfortunately, column entries per-study results files always directly correspond particular analysis run, causing retrieval fail. principal cause believed running multiple analyses jobs sample. Thus reliability, bulk.dl FALSE default. general recommendation though, try setting TRUE first time getResult() used set accessions. fails, setting bulk.dl FALSE enable robust approach allowing analysis continue. might take though. Hopefully future sample/analysis correspondence mismatches fixed default bulk.dl switch TRUE. metagenomic samples, result MultiAssayExperiment (MAE) links multiple TreeSE objects one dataset. TreeSE objects include taxonomic profiling data along functional data unique objects. objects linked sample names. can get access individual object experiment specifying index name. can perform principal component analysis microbial profiling data utilizing miaverse tools.","code":"soil <- searchAnalysis(mg, \"studies\", \"MGYS00001447\") human <- searchAnalysis(mg, \"studies\", \"MGYS00001442\") marine <- searchAnalysis(mg, \"studies\", \"MGYS00001282\") # Combine analyses all_accessions <- c(soil, human, marine) head(all_accessions) full_metadata <- getMetadata(mg, all_accessions) colnames(full_metadata) head(full_metadata) # Load ggplot2 library(ggplot2) #Distribution of sample source material: table(full_metadata$`sample_environment-material`) #What sequencing machine(s) were used? table(full_metadata$`sample_instrument model`) # Boxplot of raw read counts: ggplot( full_metadata, aes(x=study_accession, y=log( as.numeric(`analysis_Submitted nucleotide sequences`)))) + geom_boxplot(aes(group=study_accession)) + theme_bw() + ylab(\"log(submitted reads)\") mae <- getResult(mg, all_accessions, bulk.dl = TRUE) mae mae[[2]] # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim(mae[[1]], \"MDS\", colour_by = \"sample_environment.feature\")"},{"path":"/articles/MGnifyR_long.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR, extended vignette","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format.","code":"kegg <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\") head(kegg)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR, extended vignette","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile() simple wrapper function , supplied list accessions, finds urls files ’re . cases ’ll want filter returned list files interest, easily done resulting data.frame object. addition actual download location (download_url column), extra columns include file type, contents compression. ’s recommended colnames data.frame examined get grasp available metadata. demonstrate process, code retrieves data.frame containing available downloads accession ’ve examining previously. filters retain files corresponding retain annotated amino acid sequence files. list types available files, guide filtering, something like following might useful. Unlike MGnifyR functions, searchFile() limited analyses, specifying accession_type results types may found. instance, general genome functionality yet integrated MGnifyR, can retrieve associated files particular genome accession following: found set target urls, final step use getFile() actually retrieve file. Unlike functions, works single url location , entry target_urls must downloaded individually - easily done either looping applying list. files intended used external programs, might easiest provide file parameter function call, specifies local filename writing file. default MGnifyR use local cache, can make getting file afterwards awkward. Regardless, default behaviour getFile() retrieve file specified parameter url, save disk, return filepath saved . second download option available, allows built-parsing file. know ahead time processing performed, may possible integrate function, pass function getFile() read.func argument. function question take single argument (complete path name locally downloaded file) result call returned place usual output file name. Alternatively files first downloaded standard way, processed using function loop. Therefore many cases read.func parameter redundant. However, many outputs MGnify can quite large, meaning local storage many files may become issue. providing read_func parameter (necessarily setting MgnifyClient object: useCache=FALSE) analysis large number datasets may possible minimal storage requirements. illustrate, suppose interested retrieving detected sequences matching particular PFAM motif set analyses. simple function uses Biostrings package read amino acid fasta file, searches matching PFAM tag sequence name, tallies unique sequences single data.frame row. case PFAM motif identifies sequences coding amoC gene, found ammonia methane oxidizing organisms, filtering method used. defined function, just remains include call getFile().","code":"# Find list of available downloads dl_urls <- searchFile( mg, full_metadata$analysis_accession, type = \"analyses\") # Filter table target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted CDS with annotation\", ] head(target_urls) table(dl_urls$attributes.description.label) genome_urls <- searchFile(mg, \"MGYG000433953\", type = \"genomes\") genome_urls[ , c(\"id\", \"attributes.file.format.name\", \"download_url\")] # Just select a single file from the target_urls list for demonstration. # Default behavior - use local cache. cached_location1 = getFile(mg, target_urls$download_url[[1]]) # Specifying a file cached_location2 <- getFile( mg, target_urls$download_url[[1]]) cached_location <- c(cached_location1, cached_location2) # Where are the files? cached_location library(Biostrings) # Simple function to a count of unique sequences matching PFAM amoC/mmoC motif getAmoCseqs <- function(fname){ sequences <- readAAStringSet(fname) tgtvec <- grepl(\"PF04896\", names(sequences)) as.data.frame(as.list(table(as.character(sequences[tgtvec])))) } # Just download a single accession for demonstration, specifying a read_function amoC_seq_counts <- getFile( mg, target_urls$download_url[[1]], read_func = getAmoCseqs) amoC_seq_counts sessionInfo()"},{"path":"/articles/MGnify_course.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Metagenomics bioinformatics at MGnify","text":"notebook aim demonstrate MGnifyR tool can used fetch data MGnify microbiome data resource. showcase analyze datausing advanced microbiome data science tools, including estimating alpha beta diversity, well performing differential abundance analysis. MGnifyR R/Bioconductor package provides set tools easily accessing processing MGnify data R, making queries MGnify databases MGnify API. benefit MGnifyR streamlines data access, allowing users fetch data either “raw” format directly TreeSummarizedExperiment (TreeSE) object. enables seamless integration custom workflows analysis. Utilizing TreeSE provides access wide range tools within Bioconductor’s SummarizedExperiment (SE) ecosystem. also integrates mia package, offers microbiome-specific methods within SE framework. information microbiome data science Bioconductor, refer Orchestrating Microbiome Analysis (OMA) online book.","code":""},{"path":"/articles/MGnify_course.html","id":"load-packages","dir":"Articles","previous_headings":"","what":"Load packages","title":"Metagenomics bioinformatics at MGnify","text":"","code":"# List of packages that we need packages <- c(\"ANCOMBC\", \"MGnifyR\", \"mia\", \"miaViz\", \"scater\") update <- FALSE # Loads BiocManager into the session. Install it if it is not already installed. if( !require(\"BiocManager\", quietly = TRUE) ){ install.packages(\"BiocManager\") library(\"BiocManager\", quietly = TRUE) } # If there are packages that need to be installed, installs them with # BiocManager install(packages, update = update, ask = FALSE) |> suppressWarnings() |> suppressMessages() # Load all packages into session. Stop if there are packages that were not # successfully loaded pkgs_not_loaded <- !sapply(packages, require, character.only = TRUE) |> suppressMessages() pkgs_not_loaded <- names(pkgs_not_loaded)[ pkgs_not_loaded ] if( length(pkgs_not_loaded) > 0 ){ stop(\"Error in loading the following packages into the session: '\", paste0(pkgs_not_loaded, collapse = \"', '\"), \"'\") }"},{"path":"/articles/MGnify_course.html","id":"data-import","dir":"Articles","previous_headings":"","what":"Data import","title":"Metagenomics bioinformatics at MGnify","text":"example, fetch taxonomy annotations metadata specified study. dataset focuses human gut microbiome, analyzed across different geographic regions. interact MGnify database, need create MgnifyClient object. object allows us store options data fetching. instance, can configure use cache improved efficiency. can now search analyses associated certain study. analysis refers metagenomic runs performed samples. sample can multiple runs made, work analyses samples; analysis identifier points single entity. MGnify database, study unique identifier. study interested accession ID “MGYS00005154”. Now ready load metadata analyses get idea kind data dealing . currently (17 Sep 2024) almost 1,000 analyses available. Downloading whole dataset take time, use memory cache. can see analyses performed different pipelines. Let’s take analyses generated pipeline version 5.0. now analyses point unique sample. final step fetch abundance tables TreeSummarizedExperiment (TreeSE) format. fetched data TreeSE object, including taxonomy annotations. See OMA online book handle data format.","code":"# Create the MgnifyClient object with caching enabled mg <- MgnifyClient( useCache = TRUE, cacheDir = \"/home/trainers\" # Set this to your desired cache directory ) study_id <- \"MGYS00005154\" analysis_id <- searchAnalysis(mg, \"studies\", study_id) metadata <- getMetadata(mg, accession = analysis_id) metadata <- metadata[metadata[[\"analysis_pipeline-version\"]] == \"5.0\", ] tse <- getResult( mg, accession = metadata[[\"analysis_accession\"]], get.func = FALSE ) tse"},{"path":"/articles/MGnify_course.html","id":"preprocessing","dir":"Articles","previous_headings":"","what":"Preprocessing","title":"Metagenomics bioinformatics at MGnify","text":", agglomerate data Order level, meaning summarize abundances specific taxonomic rank. OMA provides detailed chapter explaining agglomeration depth. unique properties microbiome data, apply transformations. , perform relative transformation. can find information transformations OMA.","code":"tse_order <- agglomerateByRank(tse, rank = \"Order\") # Transform the main TreeSE tse <- transformAssay(tse, method = \"relabundance\") # Transform the agglomerated TreeSE tse_order <- transformAssay(tse_order, method = \"relabundance\")"},{"path":"/articles/MGnify_course.html","id":"alpha-diversity","dir":"Articles","previous_headings":"","what":"Alpha diversity","title":"Metagenomics bioinformatics at MGnify","text":"Alpha diversity measures community diversity within sample. Learn community diversity . can test whether diversity differences statistically significant. utilize Mann Whithney U test (Wilcoxon test). add p-values plot, see OMA.","code":"# Calculate alpha diversity tse <- addAlpha(tse) # Create a plot p <- plotColData( tse, y = \"shannon_diversity\", x = \"sample_geographic.location..country.and.or.sea.region.\", show_boxplot = TRUE ) p pairwise.wilcox.test( tse[[\"shannon_diversity\"]], tse[[\"sample_geographic.location..country.and.or.sea.region.\"]], p.adjust.method = \"fdr\" )"},{"path":"/articles/MGnify_course.html","id":"beta-diversity","dir":"Articles","previous_headings":"","what":"Beta diversity","title":"Metagenomics bioinformatics at MGnify","text":"can assess differences microbial compositions samples, aiming identify patterns data associated covariates. achieve , perform Principal Coordinate Analysis (PCoA) using Bray-Curtis dissimilarity. See community similarity chapter OMA information.","code":"# Perform PCoA tse <- runMDS( tse, FUN = getDissimilarity, method = \"bray\", assay.type = \"relabundance\" ) # Visualize PCoA p <- plotReducedDim( tse, dimred = \"MDS\", colour_by = \"sample_geographic.location..country.and.or.sea.region.\" ) p"},{"path":"/articles/MGnify_course.html","id":"differential-abundance-analysis-daa","dir":"Articles","previous_headings":"","what":"Differential abundance analysis (DAA)","title":"Metagenomics bioinformatics at MGnify","text":"DAA, analyze whether abundances certain features vary study groups. , OMA dedicated chapter also topic. Next visualize features lowest p-values.","code":"# Perform DAA res <- ancombc2( data = tse_order, assay.type = \"counts\", fix_formula = \"sample_geographic.location..country.and.or.sea.region.\", p_adj_method = \"fdr\", ) # Get the most significant features n_top <- 4 res_tab <- res[[\"res\"]] res_tab <- res_tab[order(res_tab[[\"q_(Intercept)\"]]), ] top_feat <- res_tab[seq_len(n_top), \"taxon\"] # Create a plot p <- plotExpression( tse_order, features = top_feat, assay.type = \"relabundance\", x = \"sample_geographic.location..country.and.or.sea.region.\", show_boxplot = TRUE, show_violin = FALSE, point_shape = NA ) + scale_y_log10() p"},{"path":"/articles/MGnify_course.html","id":"session-info","dir":"Articles","previous_headings":"","what":"Session info","title":"Metagenomics bioinformatics at MGnify","text":"","code":"sessionInfo()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Tuomas Borman. Author, maintainer. Ben Allen. Author. Leo Lahti. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Borman T, Allen B, Lahti L (2024). MGnifyR: R interface EBI MGnify metagenomics resource. R package version 0.99.30, https://github.com/EBI-Metagenomics/MGnifyR.","code":"@Manual{, title = {MGnifyR: R interface to EBI MGnify metagenomics resource}, author = {Tuomas Borman and Ben Allen and Leo Lahti}, year = {2024}, note = {R package version 0.99.30}, url = {https://github.com/EBI-Metagenomics/MGnifyR}, }"},{"path":"/index.html","id":"mgnifyr-","dir":"","previous_headings":"","what":"R interface to EBI MGnify metagenomics resource","title":"R interface to EBI MGnify metagenomics resource","text":"R package searching retrieving data EBI Metagenomics resource. cases, MGnifyR interacts directly JSONAPI, rather relying downloading analyses outputs TSV files. Thus general - allowing example intuitive combining multiple studies analyses single workflow, cases slower afformentioned direct access. Local caching results disk implemented help counter overheads, data downloads can slow - particularly functional annotation retrieval. MGnifyR package part miaverse microbiome analysis ecosystem enabling usage mia miaverse packages. research received funding Horizon 2020 Programme European Union within FindingPheno project grant agreement 952914. FindingPheno, EU-funded project, dedicated developing computational tools methodologies integration analysis multi-omics data. primary objective deepen understanding interactions hosts microbiomes. can find information FindingPheno website.","code":""},{"path":[]},{"path":"/index.html","id":"bioc-release","dir":"","previous_headings":"Installation","what":"Bioc-release","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"bioc-devel","dir":"","previous_headings":"Installation","what":"Bioc-devel","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") # The following initializes usage of Bioc devel BiocManager::install(version='devel') BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"github","dir":"","previous_headings":"Installation","what":"GitHub","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"remotes::install_github(\"EBI-Metagenomics/MGnifyR\")"},{"path":"/index.html","id":"basic-usage","dir":"","previous_headings":"","what":"Basic usage","title":"R interface to EBI MGnify metagenomics resource","text":"detailed instructions read associated function help vignette (vignette(\"MGNifyR\"))","code":"library(MGnifyR) # Set up the MGnify client instance mgclnt <- MgnifyClient(usecache = TRUE, cache_dir = '/tmp/MGnify_cache') # Retrieve the list of analyses associated with a study accession_list <- searchAnalysis(mgclnt, \"studies\", \"MGYS00005058\", usecache = TRUE) # Download all associated study/sample and analysis metadata meta_dataframe <- getMetadata(mgclnt, accession_list, usecache = TRUE) # Convert analyses outputs to a single `MultiAssayExperiment` object mae <- getResult(mgclnt, meta_dataframe$analysis_accession, usecache = TRUE) mae"},{"path":"/reference/MGnifyR-package.html","id":null,"dir":"Reference","previous_headings":"","what":"MGnifyR Package. — MGnifyR-package","title":"MGnifyR Package. — MGnifyR-package","text":"MGnifyR implements interface EBI MGnify database. See vignette general introduction package. MGnify general MGnify information, API documentation details JSONAPI implementation.","code":""},{"path":[]},{"path":"/reference/MGnifyR-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"MGnifyR Package. — MGnifyR-package","text":"Maintainer: Tuomas Borman tuomas.v.borman@utu.fi (ORCID) Authors: Ben Allen ben.allen@ncl.ac.uk Leo Lahti leo.lahti@iki.fi (ORCID)","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":null,"dir":"Reference","previous_headings":"","what":"MgnifyClient accessors and mutators — databaseUrl","title":"MgnifyClient accessors and mutators — databaseUrl","text":"MgnifyClient accessors mutators","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"databaseUrl(x) authTok(x) useCache(x) cacheDir(x) showWarnings(x) clearCache(x) verbose(x) databaseUrl(x) <- value authTok(x) <- value useCache(x) <- value cacheDir(x) <- value showWarnings(x) <- value clearCache(x) <- value verbose(x) <- value # S4 method for class 'MgnifyClient' databaseUrl(x) # S4 method for class 'MgnifyClient' authTok(x) # S4 method for class 'MgnifyClient' useCache(x) # S4 method for class 'MgnifyClient' cacheDir(x) # S4 method for class 'MgnifyClient' showWarnings(x) # S4 method for class 'MgnifyClient' clearCache(x) # S4 method for class 'MgnifyClient' verbose(x) # S4 method for class 'MgnifyClient' databaseUrl(x) <- value # S4 method for class 'MgnifyClient' authTok(x) <- value # S4 method for class 'MgnifyClient' useCache(x) <- value # S4 method for class 'MgnifyClient' cacheDir(x) <- value # S4 method for class 'MgnifyClient' showWarnings(x) <- value # S4 method for class 'MgnifyClient' clearCache(x) <- value # S4 method for class 'MgnifyClient' verbose(x) <- value"},{"path":"/reference/MgnifyClient-accessors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"MgnifyClient accessors and mutators — databaseUrl","text":"x MgnifyClient object. value value added certain slot.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MgnifyClient accessors and mutators — databaseUrl","text":"value MgnifyClient object nothing.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"MgnifyClient accessors and mutators — databaseUrl","text":"functions fetching mutating slots MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"mg <- MgnifyClient() databaseUrl(mg) #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" showWarnings(mg) <- FALSE"},{"path":"/reference/MgnifyClient.html","id":null,"dir":"Reference","previous_headings":"","what":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"Constructor creating MgnifyClient object allow access MGnify database. MgnifyClient object","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"","code":"MgnifyClient( username = NULL, password = NULL, useCache = FALSE, cacheDir = tempdir(), showWarnings = FALSE, verbose = TRUE, clearCache = FALSE, ... )"},{"path":"/reference/MgnifyClient.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"username single character value specifying optional username authentication. (default: username = NULL) password single character value specifying optional password authentication. (default: password = NULL) useCache single boolean value specifying whether enable -disk caching results session. use cases TRUE. (default: useCache = FALSE) cacheDir single character value specifying folder contain local cache. Note cached files persistent, cache directory may reused sessions, taking advantage previously downloaded results. directory created exist already. (default: cacheDir = tempdir()) showWarnings single boolean value specifying whether print warnings invocation MGnifyR functions. (default: showWarnings = FALSE) verbose single boolean value specifying whether print extra output invocation MGnifyR functions. (default: verbose = FALSE) clearCache single boolean value specifying whether clear cache. (default: clearCache = FALSE) ... optional arguments: url single character value specifying url address database. (default: url = \"https://www.ebi.ac.uk/metagenomics/api/v1\")","code":""},{"path":"/reference/MgnifyClient.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"functions MGnifyR package take MgnifyClient object first argument. object allows simple handling user authentication access private data, manages general options querying MGnify database. object required functions MGnifyR package.","code":""},{"path":"/reference/MgnifyClient.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"databaseUrl single character value specifying URL address database. authTok single character value specifying authentication token. useCache single boolean value specifying whether use cache. cacheDir single character value specifying cache directory. showWarnings single boolean value specifying whether show warnings. clearCache single boolean value specifying whether clear cache. verbose single boolean value specifying whether show messages.","code":""},{"path":"/reference/MgnifyClient.html","id":"constructor","dir":"Reference","previous_headings":"","what":"Constructor","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"See MgnifyClient constructor.","code":""},{"path":"/reference/MgnifyClient.html","id":"accessor","dir":"Reference","previous_headings":"","what":"Accessor","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"See MgnifyClient-accessors accessor functions.","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constructor for creating a MgnifyClient object to allow the access to MGnify database. — MgnifyClient","text":"","code":"my_client <- MgnifyClient( useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) if (FALSE) { # \\dontrun{ # Use username and password to get access to non-public data my_client <- MgnifyClient( username = \"Webin-1122334\", password = \"SecretPassword\", useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) } # }"},{"path":"/reference/deprecate.html","id":null,"dir":"Reference","previous_headings":"","what":"These functions will be deprecated. Please use other functions instead. — deprecate","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"functions deprecated. Please use functions instead.","code":""},{"path":"/reference/deprecate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"","code":"mgnify_client( username = NULL, password = NULL, usecache = FALSE, cache_dir = NULL, warnings = FALSE, use_memcache = FALSE, ... ) mgnify_query( client, qtype = \"samples\", accession = NULL, asDataFrame = TRUE, maxhits = 200, usecache = FALSE, ... ) mgnify_analyses_from_samples(client, accession, usecache = TRUE, ...) mgnify_analyses_from_studies(client, accession, usecache = TRUE, ...) mgnify_get_download_urls( client, accessions, accession_type, usecache = TRUE, ... ) mgnify_download( client, url, file = NULL, read_func = NULL, usecache = TRUE, Debug = FALSE, ... ) mgnify_get_analyses_results( client = NULL, accessions, retrievelist = c(), compact_results = TRUE, usecache = TRUE, bulk_dl = FALSE, ... ) mgnify_get_analyses_phyloseq( client = NULL, accessions, usecache = TRUE, returnLists = FALSE, tax_SU = \"SSU\", get_tree = FALSE, ... ) mgnify_get_analyses_metadata(client, accessions, usecache = TRUE, ...) mgnify_retrieve_json( client, path = \"biomes\", complete_url = NULL, qopts = NULL, maxhits = 200, usecache = FALSE, Debug = FALSE, ... )"},{"path":"/reference/deprecate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"username - password - usecache - cache_dir - warnings - use_memcache - ... - client - qtype - accession - asDataFrame - maxhits - accessions - accession_type - url - file - read_func - Debug - retrievelist - compact_results - bulk_dl - returnLists - tax_SU - get_tree - path - complete_url - qopts -","code":""},{"path":"/reference/deprecate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"-","code":""},{"path":"/reference/doQuery.html","id":null,"dir":"Reference","previous_headings":"","what":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"Search MGnify database studies, samples, runs, analyses, biomes, assemblies, genomes.","code":""},{"path":"/reference/doQuery.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"","code":"doQuery(x, ...) # S4 method for class 'MgnifyClient' doQuery( x, type = \"studies\", accession = NULL, as.df = TRUE, max.hits = 200, ... )"},{"path":"/reference/doQuery.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"x MgnifyClient object. ... Remaining parameter key/value pairs may supplied filter returned values. Available options differ types. See discussion Details section details. type single character value specifying type objects query. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. (default: type = \"studies\") accession single character value vector character values specifying MGnify accession identifiers (type type) NULL. NULL, results defined parameters retrieved. (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. cases, .df = TRUE make sense. (default: .df = TRUE) max.hits single integer value specifying maximum number results return FALSE. actual number results actually higher max.hits, clipping occurs pagination page boundaries. disable limit, set max.hits = NULL. (default: max.hits = 200)","code":""},{"path":"/reference/doQuery.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"nested list data.frame containing results query.","code":""},{"path":"/reference/doQuery.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"doQuery flexible query function, harnessing \"full\" power JSONAPI MGnify search filters. Search results may filtered metadata value, associated study/sample/analyse etc. See Api browser information MGnify database filters. can find help customizing queries . example following filters available: studies: accession, biome_name, lineage, centre_name, include samples: accession, experiment_type, biome_name, lineage, geo_loc_name, latitude_gte, latitude_lte, longitude_gte, longitude_lte, species, instrument_model, instrument_platform, metadata_key, metadata_value_gte, metadata_value_lte, metadata_value, environment_material, environment_feature, study_accession, include runs: accession, experiment_type, biome_name, lineage, species, instrument_platform, instrument_model, metdata_key, metadata_value_gte, metadata_value_lte, metadata_value, sample_accession, study_accession, include analyses: biome_name, lineage, experiment_type, species, sample_accession, pipeline_version biomes: depth_gte, depth_lte assemblies: depth_gte, depth_lte Unfortunately appears cases, filters work expected, important check results returned match expected. Even unfortunately error parameter specification, query run filter parameters present . Thus result appear superficially correct infact correspond something completely different. behaviour hopefully fixed future incarnations MGnifyR JSONAPI, now users double check returned values. currently possible combine queries type single call (example search samples latitude). However, possible run multiple queries combine results using set operations R get desired behaviour.","code":""},{"path":"/reference/doQuery.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Search MGnify database for studies, samples, runs, analyses, biomes, assemblies, and genomes. — doQuery","text":"","code":"mg <- MgnifyClient(useCache = FALSE) # Get a list of studies from the Agricultural Wastewater : agwaste_studies <- doQuery( mg, \"studies\", biome_name=\"Agricultural wastewater\" ) if (FALSE) { # \\dontrun{ # Get all samples from a particular study samps <- doQuery(mg, \"samples\", accession=\"MGYS00004521\") # Search polar samples samps_np <- doQuery(mg, \"samples\", latitude_gte=66, max.hits=10) samps_sp <- doQuery(mg, \"samples\", latitude_lte=-66, max.hits=10) # Search studies that have studied drinking water tbl <- doQuery( mg, type = \"studies\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) } # }"},{"path":"/reference/getData.html","id":null,"dir":"Reference","previous_headings":"","what":"Versatile function to retrieve raw results — getData","title":"Versatile function to retrieve raw results — getData","text":"Versatile function retrieve raw results","code":""},{"path":"/reference/getData.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Versatile function to retrieve raw results — getData","text":"","code":"getData(x, ...) # S4 method for class 'MgnifyClient' getData(x, type, accession.type = NULL, accession = NULL, as.df = TRUE, ...)"},{"path":"/reference/getData.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Versatile function to retrieve raw results — getData","text":"x MgnifyClient object. ... optional arguments fed internal functions. type single character value specifying type data retrieve. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. accession.type single character value specifying type accession IDs (accession). Must specified accession specified. (default: accession.type = NULL) accession single character value vector character values specifying accession IDs return results . (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. (default: .df = TRUE)","code":""},{"path":"/reference/getData.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Versatile function to retrieve raw results — getData","text":"data.frame list","code":""},{"path":"/reference/getData.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Versatile function to retrieve raw results — getData","text":"function returns data MGnify database. Compared getResult, function allows flexible framework fetching data. However, drawbacks: counts data, getResult returns optimally structured data container easier downstream analysis. getData returns raw data database. However, want retrieve data pipelines publications, instance, getResult suitable , getData can utilized instead.","code":""},{"path":[]},{"path":"/reference/getData.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Versatile function to retrieve raw results — getData","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Find kegg modules for certain analysis df <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\")"},{"path":"/reference/getFile.html","id":null,"dir":"Reference","previous_headings":"","what":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"Download MGnify files, also including processed reads identified protein sequences Listing files available download","code":""},{"path":"/reference/getFile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"","code":"getFile(x, ...) searchFile(x, ...) # S4 method for class 'MgnifyClient' getFile(x, url, file = NULL, read.func = NULL, ...) # S4 method for class 'MgnifyClient' searchFile( x, accession, type = c(\"studies\", \"samples\", \"analyses\", \"assemblies\", \"genomes\", \"run\"), ... )"},{"path":"/reference/getFile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"x MgnifyClient object. ... Additional arguments; used currently. url single character value specifying url address file wish download. file single character value NULL specifying optional local filename use saving file. NULL, MGNify local cache settings used. file intended processed separate program, may sensible provide meaningful file, rather hunt cache folders. file NULL useCache(client) FALSE, read.func parameter must supplied file downloaded deleted. (default: file = NULL) read.func function specifying optional function process downloaded file return results, rather relying post processing. primary use-case parameter local disk space limited downloaded files can quickly processed discarded. function take single parameter, downloaded filename, may return valid R object. (default: read.func = NULL) accession single character value vector character values specifying accession IDs return results . type single character value specifying type objects query. Must one following options: analysis, samples, studies, assembly, genome run. (default: type = \"samples\")","code":""},{"path":"/reference/getFile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"getFile(), either local filename downloaded file, either location MGNifyR cache file. read.func used, result returned. searchFile() data.frame containing discovered downloads. multiple accessions queried, accessions column may filter results - since rownames set (make sense query return multiple items)","code":""},{"path":"/reference/getFile.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"getFile convenient wrapper round generic URL downloading functionality R, taking care things like local caching authentication. searchFile() function wrapper function allowing easy enumeration downloads available given accession IDs. Returns single data.frame containing available downloads associated metadata, including url location description. can filtered extract urls interest, actually retrieving files using getFile()","code":""},{"path":"/reference/getFile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download any MGnify files, also including processed reads and identified protein sequences — getFile","text":"","code":"# Make a client object mg <- MgnifyClient(useCache = FALSE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c(\"MGYA00563876\", \"MGYA00563877\") downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |=================================== | 50% | |======================================================================| 100% # Filter to find the urls of 16S encoding sequences url_list <- downloads[ downloads$attributes.description.label == \"Contigs encoding SSU rRNA\", \"download_url\"] # Example 1: # Download the first file supplied_filename <- getFile( mg, url_list[[1]], file=\"SSU_file.fasta.gz\") if (FALSE) { # \\dontrun{ # Example 2: # Just use local caching cached_filename <- getFile(mg, url_list[[2]]) # Example 3: # Using read.func to open the reads with readDNAStringSet from # \\code{biostrings}. Without retaining on disk dna_seqs <- getFile( mg, url_list[[3]], read.func = readDNAStringSet) } # } # Make a client object mg <- MgnifyClient(useCache = TRUE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c( \"MGYA00563876\", \"MGYA00563877\", \"MGYA00563878\", \"MGYA00563879\", \"MGYA00563880\" ) downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%"},{"path":"/reference/getMetadata.html","id":null,"dir":"Reference","previous_headings":"","what":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"Get study, sample analysis metadata supplied analysis accessions","code":""},{"path":"/reference/getMetadata.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"","code":"getMetadata(x, ...) # S4 method for class 'MgnifyClient' getMetadata(x, accession, ...)"},{"path":"/reference/getMetadata.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"x MgnifyClient object. ... Optional arguments; currently used. accession single character value vector analysis accession IDs specifying accessions retrieve data .","code":""},{"path":"/reference/getMetadata.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"data.frame containing metadata analysis accession list. row represents single analysis.","code":""},{"path":"/reference/getMetadata.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"function retrieves study, sample analysis metadata associated provided analysis accessions.","code":""},{"path":"/reference/getMetadata.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get all study, sample and analysis metadata for the supplied analysis accessions — getMetadata","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Download all associated study/sample and analysis metadata accession_list <- c(\"MGYA00377505\") meta_dataframe <- getMetadata(mg, accession_list) #> Fetching metadata... #> | | | 0% | |======================================================================| 100%"},{"path":"/reference/getResult.html","id":null,"dir":"Reference","previous_headings":"","what":"Get microbial and/or functional profiling data for a list of accessions — getResult","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Get microbial /functional profiling data list accessions","code":""},{"path":"/reference/getResult.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"getResult(x, ...) # S4 method for class 'MgnifyClient' getResult( x, accession, get.taxa = TRUE, get.func = TRUE, output = \"TreeSE\", ... )"},{"path":"/reference/getResult.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"x MgnifyClient object. ... optional arguments: taxa.su single character value specifying taxa subunit results selected. Currently, taxonomy assignments MGnify pipelines rely rRNA matches existing databases (GreenGenes SILVA), later pipelines checking SSU LSU portions rRNA sequence. taxa.su allows selection either Small subunit (\"SSU\") Large subunit (\"LSU\") results final TreeSummarizedExperiment object. Older pipeline versions report results subunits, thus accessions value effect. get.tree single boolean value specifying whether include available phylogenetic trees TreeSummarizedExperiment object. Available get.taxa = TRUE. (default: get.tree = TRUE) .df single boolean value enabled output = \"list\". argument specifies whether return functional data named list (one entry per element output list) data.frames, data.frame containing results requested accessions. FALSE, function returns list lists, element consisting results single accession. (default: .df = TRUE) bulk.dl single boolean value specifying MGnifyR attempt speed things downloading relevant studies TSV results extracting required columns, rather using JSONAPI interface. getting results multiple accessions share study, option may result significantly faster processing. However, appear (quite ) cases database TSV result columns match expected accession names. hopefully fixed future, now bulk.dl defaults TRUE. work, can orders magnitude efficient. (default: buld_dl = TRUE) accession single character value vector character values specifying accession IDs return results . get.taxa boolean value specifying whether retrieve taxonomy data (OTU table). See taxa.su specifying taxonomy type. data retrieved BIOM files subsequently parsed. (default: get.taxa = TRUE) get.func boolean value single character value vector character values specifying functional analysis types retrieve. get.func = TRUE, available functional datatypes retrieved, FALSE, functional data retrieved. current list available types \"antismash-gene-clusters\", \"go-slim\", \"go-terms\", \"interpro-identifiers\", \"taxonomy\", \"taxonomy-itsonedb\", \"taxonomy-itsunite\", \"taxonomy-lsu\", \"taxonomy-ssu\". Note depending particular analysis type, pipeline version etc., functional results available. Furthermore, taxonomy also available via get.func, loading data might considerable faster bulk.dl = TRUE. However, phylogeny available via get.taxa. (default: get.func = TRUE) output single character value specifying format output. Must one following options: \"TreeSE\", \"list\", \"phyloseq\". (default: output = \"TreeSE\")","code":""},{"path":"/reference/getResult.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"taxonomy data retrieved, result returned TreeSummarizedExperiment object default. result can also returned phyloseq object list data.frames. Note phyloseq object can include one phylogenetic tree meaning taxa might lost data subsetted based tree. functional data retrieved addition taxonomy data, result returned MultiAssayExperiment object. options list containing phyloseq object data.frames just data.frames. Functional data can returned MultiAssayExperiment object list data.frames.","code":""},{"path":"/reference/getResult.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Given set analysis accessions collection annotation types, function queries MGNify API returns results. function convenient retrieving highly structured (analysis vs counts) data certain instances. example, BIOM files downloaded automatically. want just retrieve raw data database, see getData.","code":""},{"path":[]},{"path":"/reference/getResult.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Get OTU tables as TreeSE accession_list <- c(\"MGYA00377505\") tse <- getResult(mg, accession_list, get.func=FALSE, get.taxa=TRUE) #> Fetching taxonomy data... #> | | | 0% | |======================================================================| 100% #> Merging with full join... #> 1/1 #> if (FALSE) { # \\dontrun{ # Get functional data along with OTU tables as MAE mae <- getResult(mg, accession_list, get.func=TRUE, get.taxa=TRUE) # Get same data as list list <- getResult( mg, accession_list, get.func=TRUE, get.taxa=TRUE, output = \"list\", as.df = TRUE, use.cache = TRUE) } # }"},{"path":"/reference/searchAnalysis.html","id":null,"dir":"Reference","previous_headings":"","what":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Look analysis accession IDs one study sample accessions","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"searchAnalysis(x, ...) # S4 method for class 'MgnifyClient' searchAnalysis(x, type, accession, ...)"},{"path":"/reference/searchAnalysis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"x MgnifyClient object. ... Optional arguments; currently used. type single character value specifying type accession IDs specified accession. Must \"studies\" \"samples\". accession single character value vector character values specifying study sample accession IDs used retrieve analyses IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Vector analysis accession IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Retrieve analysis accession IDs associated supplied study sample accession. MGnify, analysis accession refers certain pipeline analysis, specific 16S rRNA shotgun metagenomic mapping. Studies can include multiple samples, sample can undergo multiple analyses using pipelines. analysis identified unique accession ID, allowing precise tracking retrieval analysis results within MGnify database.","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Retrieve analysis ids from study MGYS00005058 result <- searchAnalysis(mg, \"studies\", c(\"MGYS00005058\")) #> Fetching analyses... #> | | | 0% | |======================================================================| 100% if (FALSE) { # \\dontrun{ # Retrieve all analysis ids from samples result <- searchAnalysis( mg, \"samples\", c(\"SRS4392730\", \"SRS4392743\")) } # }"}]