From 5417d7ea1d04b818b73c81910a413a668e5bc7d4 Mon Sep 17 00:00:00 2001 From: TuomasBorman <60338854+TuomasBorman@users.noreply.github.com> Date: Tue, 26 Mar 2024 19:20:23 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20EBI-Meta?= =?UTF-8?q?genomics/MGnifyR@60c01d2b09cfa9ffe86228e528b1389a7cf27104=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- articles/MGnifyR.html | 237 ++++++++---------------------------------- pkgdown.yml | 2 +- search.json | 2 +- 3 files changed, 44 insertions(+), 197 deletions(-) diff --git a/articles/MGnifyR.html b/articles/MGnifyR.html index d0d262a..60e3c5d 100644 --- a/articles/MGnifyR.html +++ b/articles/MGnifyR.html @@ -204,7 +204,7 @@
The result is a table containing accession IDs and description – in this case – on samples.
+
+colnames(samples) |> head()
+#> [1] "biosample" "accession" "sample-desc"
+#> [4] "environment-biome" "environment-feature" "environment-material"
+analyses_accessions <- searchAnalysis(mg, "samples", samples$accession)
By running the
+searchAnalysis()
function, we get a vector of analysis IDs of samples that we fed as an input.+analyses_accessions |> head() +#> [1] "MGYA00652201" "MGYA00652185" "MGYA00643487" "MGYA00643486" "MGYA00643485" +#> [6] "MGYA00643484"
getMetadata()
function to fetch data based on
analysis IDs.
-+analyses_metadata <- getMetadata(mg, analyses_accessions)
The returned value is a
+metadata for example on how analysis was conducted and what kind of +samples were analyzed. +data.frame
that includes -metadata includes for example information on how analysis was conducted -and what kind of samples were analyzed.
TreeSE
objects that are linked together
by utilizing MAE
.
-+-mae <- getResult(mg, accession = analyses_accessions)
+mae #> A MultiAssayExperiment object of 6 listed #> experiments with user-defined names and respective classes. @@ -294,7 +307,7 @@
Fetch microbiome data#> exportClass() - save data to flat files
You can get access to individual
-TreeSE
object inMAE
by specifying index or name.+mae[[1]] #> class: TreeSummarizedExperiment #> dim: 3506 50 @@ -316,7 +329,7 @@
-based microbiome data manipulation and visualization. Moreover, it enables access toFetch microbiome dataSummarizedExperiment
miaverse
tools. For example, we can estimate diversity of samples… -+mae[[1]] <- estimateDiversity(mae[[1]], index = "shannon") library(scater) @@ -326,7 +339,7 @@
Fetch microbiome dataplotColData(mae[[1]], "shannon", x = "sample_environment..biome.")
… and plot abundances of most abundant phyla.
-+# Agglomerate data altExps(mae[[1]]) <- splitByRanks(mae[[1]]) @@ -339,7 +352,7 @@
Fetch microbiome data
We can also perform other analyses such as principal component analysis to microbial profiling data by utilizing miaverse tools.
-+# Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = "relabundance") # Perform PCoA @@ -358,108 +371,15 @@
can be used more flexibly to retrieve any kind of data from the database. It returns data as simple data.frame or list format. -Fetch raw filesgetData()
+-publications <- getData(mg, type = "publications")
+-head(publications) -#> document.id type id attributes.pubmed-id -#> 1 1 publications 36363763 36363763 -#> 2 2 publications 35614211 35614211 -#> 3 3 publications 35614182 35614182 -#> 4 4 publications 35208912 35208912 -#> 5 5 publications 35178461 35178461 -#> 6 6 publications 35154023 35154023 -#> attributes.pubmed-central-id -#> 1 NA -#> 2 NA -#> 3 NA -#> 4 NA -#> 5 NA -#> 6 NA -#> attributes.pub-title -#> 1 Agricultural Crops Grown in Laboratory Conditions on Chernevaya Taiga Soil Demonstrate Unique Composition of the Rhizosphere Microbiota. -#> 2 Discovery of bioactive microbial gene products in inflammatory bowel disease. -#> 3 Lake microbiome and trophy fluctuations of the ancient hemp rettery. -#> 4 Mycobiome-Host Coevolution? The Mycobiome of Ancestral Human Populations Seems to Be Different and Less Diverse Than Those of Extant Native and Urban-Industrialized Populations. -#> 5 Effects of triclosan on bacterial community composition and <i>Vibrio</i> populations in natural seawater microcosms. -#> 6 Field Site-Specific Effects of an <i>Azospirillum</i> Seed Inoculant on Key Microbial Functional Groups in the Rhizosphere. -#> attributes.pub-abstract -#> 1 NA -#> 2 NA -#> 3 NA -#> 4 NA -#> 5 NA -#> 6 NA -#> attributes.authors -#> 1 Kravchenko I, Rayko M, Tikhonova E, Konopkin A, Abakumov E, Lapidus A. -#> 2 Zhang Y, Bhosle A, Bae S, McIver LJ, Pishchany G, Accorsi EK, Thompson KN, Arze C, Wang Y, Subramanian A, Kearney SM, Pawluk A, Plichta DR, Rahnavard A, Shafquat A, Xavier RJ, Vlamakis H, Garrett WS, Krueger A, Huttenhower C, Franzosa EA. -#> 3 Iwańska O, Latoch P, Suchora M, Pidek IA, Huber M, Bubak I, Kopik N, Kovalenko M, Gąsiorowski M, Armache JP, Starosta AL. -#> 4 Reynoso-García J, Narganes-Storde Y, Santiago-Rodriguez TM, Toranzos GA. -#> 5 Lydon KA, Glinski DA, Westrich JR, Henderson WM, Lipp EK. -#> 6 Renoud S, Vacheron J, Abrouk D, Prigent-Combaret C, Legendre L, Muller D, Moënne-Loccoz Y. -#> attributes.doi attributes.isbn -#> 1 10.3390/microorganisms10112171 2076-2607 -#> 2 10.1038/s41586-022-04648-7 0028-0836; 1476-4687; -#> 3 10.1038/s41598-022-12761-w 2045-2322 -#> 4 10.3390/microorganisms10020459 2076-2607 -#> 5 10.1525/elementa.141 2325-1026 -#> 6 10.3389/fmicb.2021.760512 1664-302x -#> attributes.published-year attributes.pub-type attributes.issue -#> 1 2022 journal article NA -#> 2 2022 research-article; journal article NA -#> 3 2022 journal article NA -#> 4 2022 journal article NA -#> 5 2017 journal article NA -#> 6 2021 journal article NA -#> attributes.volume attributes.raw-pages attributes.iso-journal -#> 1 10 2171 Microorganisms -#> 2 606 754-760 Nature -#> 3 12 8846 Sci Rep -#> 4 10 n/a Microorganisms -#> 5 5 1-16 Elementa (Wash D C) -#> 6 12 760512 Front Microbiol -#> attributes.medline-journal attributes.pub-url attributes.studies-count -#> 1 NA NA 1 -#> 2 NA NA 1 -#> 3 NA NA 2 -#> 4 NA NA 1 -#> 5 NA NA 1 -#> 6 NA NA 3 -#> attributes.samples-count -#> 1 5 -#> 2 176 -#> 3 18 -#> 4 57 -#> 5 24 -#> 6 3 -#> links.self -#> 1 https://www.ebi.ac.uk/metagenomics/api/v1/publications/36363763?format=json -#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614211?format=json -#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614182?format=json -#> 4 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35208912?format=json -#> 5 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35178461?format=json -#> 6 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35154023?format=json -#> relationships.studies.links.related -#> 1 https://www.ebi.ac.uk/metagenomics/api/v1/publications/36363763/studies?format=json -#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614211/studies?format=json -#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614182/studies?format=json -#> 4 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35208912/studies?format=json -#> 5 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35178461/studies?format=json -#> 6 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35154023/studies?format=json -#> relationships.samples.links.related -#> 1 https://www.ebi.ac.uk/metagenomics/api/v1/publications/36363763/samples?format=json -#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614211/samples?format=json -#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614182/samples?format=json -#> 4 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35208912/samples?format=json -#> 5 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35178461/samples?format=json -#> 6 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35154023/samples?format=json -#> ..JSON -#> 1 publicat.... -#> 2 publicat.... -#> 3 publicat.... -#> 4 publicat.... -#> 5 publicat.... -#> 6 publicat....
++colnames(publications) |> head() +#> [1] "document.id" "type" +#> [3] "id" "attributes.pubmed-id" +#> [5] "attributes.pubmed-central-id" "attributes.pub-title"
The result is a
data.frame
by default. In this case, it +includes information on publications fetched from the data portal.Fetch sequence files @@ -470,103 +390,30 @@
Fetch sequence filessearchFile(), we can search files from the database. -
+dl_urls <- searchFile(mg, analyses_accessions, type = "analyses")
The returned table contains search results related to analyses that we fed as an input. The table contains information on file and also URL address from where the file can be loaded.
-++ +colnames(target_urls) |> head() +#> [1] "type" "id" +#> [3] "attributes.alias" "attributes.file.format.name" +#> [5] "attributes.file.format.extension" "attributes.file.format.compression"target_urls <- dl_urls[ dl_urls$attributes.description.label == "Predicted alpha tmRNA", ] -head(target_urls) -#> type id -#> 24 analyses ERZ20300939_alpha_tmRNA.RF01849.fasta.gz -#> 75 analyses ERZ20300942_alpha_tmRNA.RF01849.fasta.gz -#> 126 analyses ERZ16299686_alpha_tmRNA.RF01849.fasta.gz -#> 177 analyses ERZ16299690_alpha_tmRNA.RF01849.fasta.gz -#> 228 analyses ERZ16299649_alpha_tmRNA.RF01849.fasta.gz -#> 279 analyses ERZ16299683_alpha_tmRNA.RF01849.fasta.gz -#> attributes.alias attributes.file.format.name -#> 24 ERZ20300939_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 75 ERZ20300942_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 126 ERZ16299686_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 177 ERZ16299690_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 228 ERZ16299649_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 279 ERZ16299683_alpha_tmRNA.RF01849.fasta.gz FASTA -#> attributes.file.format.extension attributes.file.format.compression -#> 24 fasta TRUE -#> 75 fasta TRUE -#> 126 fasta TRUE -#> 177 fasta TRUE -#> 228 fasta TRUE -#> 279 fasta TRUE -#> attributes.description.label -#> 24 Predicted alpha tmRNA -#> 75 Predicted alpha tmRNA -#> 126 Predicted alpha tmRNA -#> 177 Predicted alpha tmRNA -#> 228 Predicted alpha tmRNA -#> 279 Predicted alpha tmRNA -#> attributes.description.description -#> 24 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 75 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 126 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 177 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 228 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 279 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> attributes.group.type attributes.file.checksum.checksum -#> 24 non-coding RNAs -#> 75 non-coding RNAs -#> 126 non-coding RNAs -#> 177 non-coding RNAs -#> 228 non-coding RNAs -#> 279 non-coding RNAs -#> attributes.file.checksum.checksum.algorithm -#> 24 -#> 75 -#> 126 -#> 177 -#> 228 -#> 279 -#> relationships.pipeline.data.type relationships.pipeline.data.id -#> 24 pipelines 5.0 -#> 75 pipelines 5.0 -#> 126 pipelines 5.0 -#> 177 pipelines 5.0 -#> 228 pipelines 5.0 -#> 279 pipelines 5.0 -#> relationships.pipeline.related -#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 75 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 126 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 177 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 228 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 279 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> download_url -#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz -#> 75 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00652185/file/ERZ20300942_alpha_tmRNA.RF01849.fasta.gz -#> 126 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643487/file/ERZ16299686_alpha_tmRNA.RF01849.fasta.gz -#> 177 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643486/file/ERZ16299690_alpha_tmRNA.RF01849.fasta.gz -#> 228 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643485/file/ERZ16299649_alpha_tmRNA.RF01849.fasta.gz -#> 279 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643484/file/ERZ16299683_alpha_tmRNA.RF01849.fasta.gz -#> accession -#> 24 MGYA00652201 -#> 75 MGYA00652185 -#> 126 MGYA00643487 -#> 177 MGYA00643486 -#> 228 MGYA00643485 -#> 279 MGYA00643484
Finally, we can download the files with
-getFile()
.+# Just select a single file from the target_urls list for demonstration. file_url <- target_urls$download_url[[1]] cached_location <- getFile(mg, file_url)
The function returns a path where the file is stored.
-+-# Where are the files? cached_location #> [1] "/.MGnifyR_cache/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz"
+sessionInfo() #> R Under development (unstable) (2024-03-24 r86185) #> Platform: x86_64-pc-linux-gnu diff --git a/pkgdown.yml b/pkgdown.yml index 970e8d1..19802ac 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,5 +4,5 @@ pkgdown_sha: ~ articles: MGnifyR: MGnifyR.html MGnifyR_long: MGnifyR_long.html -last_built: 2024-03-26T18:55Z +last_built: 2024-03-26T19:16Z diff --git a/search.json b/search.json index d17df4a..912ab38 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"/articles/MGnifyR.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. latest version MGnifyR seamlessly integrates miaverse framework providing access cutting-edge tools microbiome -stream analytics.","code":""},{"path":"/articles/MGnifyR.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR","text":"MGnifyR hosted Bioconductor, can installed using via BiocManager.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR) #> Loading required package: mia #> Loading required package: SummarizedExperiment #> Loading required package: MatrixGenerics #> Loading required package: matrixStats #> #> Attaching package: 'MatrixGenerics' #> The following objects are masked from 'package:matrixStats': #> #> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, #> colCounts, colCummaxs, colCummins, colCumprods, colCumsums, #> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, #> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, #> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, #> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, #> colWeightedMeans, colWeightedMedians, colWeightedSds, #> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, #> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, #> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, #> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins, #> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, #> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars, #> rowWeightedMads, rowWeightedMeans, rowWeightedMedians, #> rowWeightedSds, rowWeightedVars #> Loading required package: GenomicRanges #> Loading required package: stats4 #> Loading required package: BiocGenerics #> #> Attaching package: 'BiocGenerics' #> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs #> The following objects are masked from 'package:base': #> #> anyDuplicated, aperm, append, as.data.frame, basename, cbind, #> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, #> get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, #> match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, #> Position, rank, rbind, Reduce, rownames, sapply, setdiff, table, #> tapply, union, unique, unsplit, which.max, which.min #> Loading required package: S4Vectors #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:utils': #> #> findMatches #> The following objects are masked from 'package:base': #> #> expand.grid, I, unname #> Loading required package: IRanges #> Loading required package: GenomeInfoDb #> Loading required package: Biobase #> Welcome to Bioconductor #> #> Vignettes contain introductory material; view with #> 'browseVignettes()'. To cite Bioconductor, see #> 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'. #> #> Attaching package: 'Biobase' #> The following object is masked from 'package:MatrixGenerics': #> #> rowMedians #> The following objects are masked from 'package:matrixStats': #> #> anyMissing, rowMedians #> Loading required package: SingleCellExperiment #> Loading required package: TreeSummarizedExperiment #> Loading required package: Biostrings #> Loading required package: XVector #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit #> Loading required package: MultiAssayExperiment #> Loading required package: biomformat"},{"path":"/articles/MGnifyR.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . MgnifyClient object contains slots previously mentioned settings.","code":"mg <- MgnifyClient(useCache = TRUE) mg #> An object of class \"MgnifyClient\" #> Slot \"databaseUrl\": #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" #> #> Slot \"authTok\": #> [1] NA #> #> Slot \"useCache\": #> [1] TRUE #> #> Slot \"cacheDir\": #> [1] \"/tmp/RtmphXjp8W/.MGnifyR_cache\" #> #> Slot \"showWarnings\": #> [1] FALSE #> #> Slot \"clearCache\": #> [1] FALSE #> #> Slot \"verbose\": #> [1] TRUE"},{"path":[]},{"path":"/articles/MGnifyR.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR","text":"doQuery() function can utilized search results samples studies MGnify database. , fetch information drinking water samples. result table containing accession IDs description – case – samples.","code":"# Fetch studies samples <- doQuery( mg, type = \"samples\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10)"},{"path":"/articles/MGnifyR.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR","text":"Now want find analysis accessions. sample might multiple analyses. analysis ID corresponds single run particular pipeline single sample single study. running searchAnalysis() function, get vector analysis IDs samples fed input.","code":"analyses_accessions <- searchAnalysis(mg, \"samples\", samples$accession)"},{"path":"/articles/MGnifyR.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR","text":"can now check metadata get hint kind data . use getMetadata() function fetch data based analysis IDs. returned value data.frame includes metadata includes example information analysis conducted kind samples analyzed.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions)"},{"path":"/articles/MGnifyR.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR","text":"selected data fetch, can use getResult() output TreeSummarizedExperiment (TreeSE) MultiAssayExperiment (MAE) depending dataset. dataset includes taxonomic profiling data, output single TreeSE. dataset includes also functional data, output multiple TreeSE objects linked together utilizing MAE. can get access individual TreeSE object MAE specifying index name. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples… … plot abundances abundant phyla. can also perform analyses principal component analysis microbial profiling data utilizing miaverse tools.","code":"mae <- getResult(mg, accession = analyses_accessions) mae #> A MultiAssayExperiment object of 6 listed #> experiments with user-defined names and respective classes. #> Containing an ExperimentList class object of length 6: #> [1] microbiota: TreeSummarizedExperiment with 3506 rows and 50 columns #> [2] go-slim: TreeSummarizedExperiment with 116 rows and 38 columns #> [3] go-terms: TreeSummarizedExperiment with 3133 rows and 38 columns #> [4] interpro-identifiers: TreeSummarizedExperiment with 18223 rows and 38 columns #> [5] taxonomy: TreeSummarizedExperiment with 3617 rows and 50 columns #> [6] taxonomy-lsu: TreeSummarizedExperiment with 3378 rows and 42 columns #> Functionality: #> experiments() - obtain the ExperimentList instance #> colData() - the primary/phenotype DataFrame #> sampleMap() - the sample coordination DataFrame #> `$`, `[`, `[[` - extract colData columns, subset, or experiment #> *Format() - convert into a long or wide DataFrame #> assays() - convert ExperimentList to a SimpleList of matrices #> exportClass() - save data to flat files mae[[1]] #> class: TreeSummarizedExperiment #> dim: 3506 50 #> metadata(0): #> assays(1): counts #> rownames(3506): 82608 62797 ... 5820 6794 #> rowData names(9): Kingdom Phylum ... taxonomy1 taxonomy #> colnames(50): MGYA00144458 MGYA00144419 ... MGYA00652185 MGYA00652201 #> colData names(64): analysis_analysis.status analysis_pipeline.version #> ... sample_geo.loc.name sample_instrument.model #> reducedDimNames(0): #> mainExpName: NULL #> altExpNames(0): #> rowLinks: NULL #> rowTree: NULL #> colLinks: NULL #> colTree: NULL mae[[1]] <- estimateDiversity(mae[[1]], index = \"shannon\") library(scater) #> Loading required package: scuttle #> Loading required package: ggplot2 plotColData(mae[[1]], \"shannon\", x = \"sample_environment..biome.\") # Agglomerate data altExps(mae[[1]]) <- splitByRanks(mae[[1]]) library(miaViz) #> Loading required package: ggraph # Plot top taxa top_taxa <- getTopFeatures(altExp(mae[[1]], \"Phylum\"), 10) plotAbundance(altExp(mae[[1]], \"Phylum\")[top_taxa, ], rank = \"Phylum\") # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim( mae[[1]], \"MDS\", colour_by = \"sample_environment..biome.\")"},{"path":"/articles/MGnifyR.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format.","code":"publications <- getData(mg, type = \"publications\") head(publications) #> document.id type id attributes.pubmed-id #> 1 1 publications 36363763 36363763 #> 2 2 publications 35614211 35614211 #> 3 3 publications 35614182 35614182 #> 4 4 publications 35208912 35208912 #> 5 5 publications 35178461 35178461 #> 6 6 publications 35154023 35154023 #> attributes.pubmed-central-id #> 1 NA #> 2 NA #> 3 NA #> 4 NA #> 5 NA #> 6 NA #> attributes.pub-title #> 1 Agricultural Crops Grown in Laboratory Conditions on Chernevaya Taiga Soil Demonstrate Unique Composition of the Rhizosphere Microbiota. #> 2 Discovery of bioactive microbial gene products in inflammatory bowel disease. #> 3 Lake microbiome and trophy fluctuations of the ancient hemp rettery. #> 4 Mycobiome-Host Coevolution? The Mycobiome of Ancestral Human Populations Seems to Be Different and Less Diverse Than Those of Extant Native and Urban-Industrialized Populations. #> 5 Effects of triclosan on bacterial community composition and Vibrio<\/i> populations in natural seawater microcosms. #> 6 Field Site-Specific Effects of an Azospirillum<\/i> Seed Inoculant on Key Microbial Functional Groups in the Rhizosphere. #> attributes.pub-abstract #> 1 NA #> 2 NA #> 3 NA #> 4 NA #> 5 NA #> 6 NA #> attributes.authors #> 1 Kravchenko I, Rayko M, Tikhonova E, Konopkin A, Abakumov E, Lapidus A. #> 2 Zhang Y, Bhosle A, Bae S, McIver LJ, Pishchany G, Accorsi EK, Thompson KN, Arze C, Wang Y, Subramanian A, Kearney SM, Pawluk A, Plichta DR, Rahnavard A, Shafquat A, Xavier RJ, Vlamakis H, Garrett WS, Krueger A, Huttenhower C, Franzosa EA. #> 3 Iwańska O, Latoch P, Suchora M, Pidek IA, Huber M, Bubak I, Kopik N, Kovalenko M, Gąsiorowski M, Armache JP, Starosta AL. #> 4 Reynoso-García J, Narganes-Storde Y, Santiago-Rodriguez TM, Toranzos GA. #> 5 Lydon KA, Glinski DA, Westrich JR, Henderson WM, Lipp EK. #> 6 Renoud S, Vacheron J, Abrouk D, Prigent-Combaret C, Legendre L, Muller D, Moënne-Loccoz Y. #> attributes.doi attributes.isbn #> 1 10.3390/microorganisms10112171 2076-2607 #> 2 10.1038/s41586-022-04648-7 0028-0836; 1476-4687; #> 3 10.1038/s41598-022-12761-w 2045-2322 #> 4 10.3390/microorganisms10020459 2076-2607 #> 5 10.1525/elementa.141 2325-1026 #> 6 10.3389/fmicb.2021.760512 1664-302x #> attributes.published-year attributes.pub-type attributes.issue #> 1 2022 journal article NA #> 2 2022 research-article; journal article NA #> 3 2022 journal article NA #> 4 2022 journal article NA #> 5 2017 journal article NA #> 6 2021 journal article NA #> attributes.volume attributes.raw-pages attributes.iso-journal #> 1 10 2171 Microorganisms #> 2 606 754-760 Nature #> 3 12 8846 Sci Rep #> 4 10 n/a Microorganisms #> 5 5 1-16 Elementa (Wash D C) #> 6 12 760512 Front Microbiol #> attributes.medline-journal attributes.pub-url attributes.studies-count #> 1 NA NA 1 #> 2 NA NA 1 #> 3 NA NA 2 #> 4 NA NA 1 #> 5 NA NA 1 #> 6 NA NA 3 #> attributes.samples-count #> 1 5 #> 2 176 #> 3 18 #> 4 57 #> 5 24 #> 6 3 #> links.self #> 1 https://www.ebi.ac.uk/metagenomics/api/v1/publications/36363763?format=json #> 2 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614211?format=json #> 3 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614182?format=json #> 4 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35208912?format=json #> 5 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35178461?format=json #> 6 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35154023?format=json #> relationships.studies.links.related #> 1 https://www.ebi.ac.uk/metagenomics/api/v1/publications/36363763/studies?format=json #> 2 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614211/studies?format=json #> 3 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614182/studies?format=json #> 4 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35208912/studies?format=json #> 5 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35178461/studies?format=json #> 6 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35154023/studies?format=json #> relationships.samples.links.related #> 1 https://www.ebi.ac.uk/metagenomics/api/v1/publications/36363763/samples?format=json #> 2 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614211/samples?format=json #> 3 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35614182/samples?format=json #> 4 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35208912/samples?format=json #> 5 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35178461/samples?format=json #> 6 https://www.ebi.ac.uk/metagenomics/api/v1/publications/35154023/samples?format=json #> ..JSON #> 1 publicat.... #> 2 publicat.... #> 3 publicat.... #> 4 publicat.... #> 5 publicat.... #> 6 publicat...."},{"path":"/articles/MGnifyR.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile(), can search files database. returned table contains search results related analyses fed input. table contains information file also URL address file can loaded. Finally, can download files getFile(). function returns path file stored.","code":"dl_urls <- searchFile(mg, analyses_accessions, type = \"analyses\") target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted alpha tmRNA\", ] head(target_urls) #> type id #> 24 analyses ERZ20300939_alpha_tmRNA.RF01849.fasta.gz #> 75 analyses ERZ20300942_alpha_tmRNA.RF01849.fasta.gz #> 126 analyses ERZ16299686_alpha_tmRNA.RF01849.fasta.gz #> 177 analyses ERZ16299690_alpha_tmRNA.RF01849.fasta.gz #> 228 analyses ERZ16299649_alpha_tmRNA.RF01849.fasta.gz #> 279 analyses ERZ16299683_alpha_tmRNA.RF01849.fasta.gz #> attributes.alias attributes.file.format.name #> 24 ERZ20300939_alpha_tmRNA.RF01849.fasta.gz FASTA #> 75 ERZ20300942_alpha_tmRNA.RF01849.fasta.gz FASTA #> 126 ERZ16299686_alpha_tmRNA.RF01849.fasta.gz FASTA #> 177 ERZ16299690_alpha_tmRNA.RF01849.fasta.gz FASTA #> 228 ERZ16299649_alpha_tmRNA.RF01849.fasta.gz FASTA #> 279 ERZ16299683_alpha_tmRNA.RF01849.fasta.gz FASTA #> attributes.file.format.extension attributes.file.format.compression #> 24 fasta TRUE #> 75 fasta TRUE #> 126 fasta TRUE #> 177 fasta TRUE #> 228 fasta TRUE #> 279 fasta TRUE #> attributes.description.label #> 24 Predicted alpha tmRNA #> 75 Predicted alpha tmRNA #> 126 Predicted alpha tmRNA #> 177 Predicted alpha tmRNA #> 228 Predicted alpha tmRNA #> 279 Predicted alpha tmRNA #> attributes.description.description #> 24 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> 75 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> 126 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> 177 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> 228 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> 279 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> attributes.group.type attributes.file.checksum.checksum #> 24 non-coding RNAs #> 75 non-coding RNAs #> 126 non-coding RNAs #> 177 non-coding RNAs #> 228 non-coding RNAs #> 279 non-coding RNAs #> attributes.file.checksum.checksum.algorithm #> 24 #> 75 #> 126 #> 177 #> 228 #> 279 #> relationships.pipeline.data.type relationships.pipeline.data.id #> 24 pipelines 5.0 #> 75 pipelines 5.0 #> 126 pipelines 5.0 #> 177 pipelines 5.0 #> 228 pipelines 5.0 #> 279 pipelines 5.0 #> relationships.pipeline.related #> 24 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> 75 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> 126 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> 177 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> 228 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> 279 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> download_url #> 24 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz #> 75 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00652185/file/ERZ20300942_alpha_tmRNA.RF01849.fasta.gz #> 126 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643487/file/ERZ16299686_alpha_tmRNA.RF01849.fasta.gz #> 177 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643486/file/ERZ16299690_alpha_tmRNA.RF01849.fasta.gz #> 228 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643485/file/ERZ16299649_alpha_tmRNA.RF01849.fasta.gz #> 279 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643484/file/ERZ16299683_alpha_tmRNA.RF01849.fasta.gz #> accession #> 24 MGYA00652201 #> 75 MGYA00652185 #> 126 MGYA00643487 #> 177 MGYA00643486 #> 228 MGYA00643485 #> 279 MGYA00643484 # Just select a single file from the target_urls list for demonstration. file_url <- target_urls$download_url[[1]] cached_location <- getFile(mg, file_url) # Where are the files? cached_location #> [1] \"/.MGnifyR_cache/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz\" sessionInfo() #> R Under development (unstable) (2024-03-24 r86185) #> Platform: x86_64-pc-linux-gnu #> Running under: Ubuntu 22.04.4 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 #> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 #> #> locale: #> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C #> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 #> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 #> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C #> [9] LC_ADDRESS=C LC_TELEPHONE=C #> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C #> #> time zone: UTC #> tzcode source: system (glibc) #> #> attached base packages: #> [1] stats4 stats graphics grDevices utils datasets methods #> [8] base #> #> other attached packages: #> [1] miaViz_1.11.0 ggraph_2.2.1 #> [3] scater_1.31.2 ggplot2_3.5.0 #> [5] scuttle_1.13.1 MGnifyR_0.99.27 #> [7] biomformat_1.31.0 mia_1.11.1 #> [9] MultiAssayExperiment_1.29.1 TreeSummarizedExperiment_2.11.0 #> [11] Biostrings_2.71.5 XVector_0.43.1 #> [13] SingleCellExperiment_1.25.0 SummarizedExperiment_1.33.3 #> [15] Biobase_2.63.0 GenomicRanges_1.55.4 #> [17] GenomeInfoDb_1.39.9 IRanges_2.37.1 #> [19] S4Vectors_0.41.5 BiocGenerics_0.49.1 #> [21] MatrixGenerics_1.15.0 matrixStats_1.2.0 #> [23] knitr_1.45 BiocStyle_2.31.0 #> #> loaded via a namespace (and not attached): #> [1] jsonlite_1.8.8 tidyjson_0.3.2 #> [3] magrittr_2.0.3 ggbeeswarm_0.7.2 #> [5] farver_2.1.1 rmarkdown_2.26 #> [7] fs_1.6.3 zlibbioc_1.49.3 #> [9] ragg_1.3.0 vctrs_0.6.5 #> [11] memoise_2.0.1 DelayedMatrixStats_1.25.1 #> [13] RCurl_1.98-1.14 ggtree_3.11.1 #> [15] htmltools_0.5.8 S4Arrays_1.3.6 #> [17] BiocBaseUtils_1.5.1 BiocNeighbors_1.21.2 #> [19] Rhdf5lib_1.25.1 gridGraphics_0.5-1 #> [21] SparseArray_1.3.4 rhdf5_2.47.6 #> [23] sass_0.4.9 bslib_0.6.2 #> [25] desc_1.4.3 plyr_1.8.9 #> [27] DECIPHER_2.31.3 cachem_1.0.8 #> [29] igraph_2.0.3 lifecycle_1.0.4 #> [31] pkgconfig_2.0.3 rsvd_1.0.5 #> [33] Matrix_1.7-0 R6_2.5.1 #> [35] fastmap_1.1.1 GenomeInfoDbData_1.2.11 #> [37] aplot_0.2.2 digest_0.6.35 #> [39] ggnewscale_0.4.10 colorspace_2.1-0 #> [41] patchwork_1.2.0 irlba_2.3.5.1 #> [43] textshaping_0.3.7 vegan_2.6-4 #> [45] beachmat_2.19.2 labeling_0.4.3 #> [47] fansi_1.0.6 urltools_1.7.3 #> [49] polyclip_1.10-6 httr_1.4.7 #> [51] abind_1.4-5 mgcv_1.9-1 #> [53] compiler_4.4.0 withr_3.0.0 #> [55] BiocParallel_1.37.1 viridis_0.6.5 #> [57] DBI_1.2.2 highr_0.10 #> [59] ggforce_0.4.2 MASS_7.3-60.2 #> [61] DelayedArray_0.29.9 bluster_1.13.0 #> [63] permute_0.9-7 tools_4.4.0 #> [65] vipor_0.4.7 beeswarm_0.4.0 #> [67] ape_5.7-1 glue_1.7.0 #> [69] nlme_3.1-164 rhdf5filters_1.15.4 #> [71] grid_4.4.0 cluster_2.1.6 #> [73] reshape2_1.4.4 generics_0.1.3 #> [75] gtable_0.3.4 tidyr_1.3.1 #> [77] tidygraph_1.3.1 BiocSingular_1.19.0 #> [79] ScaledMatrix_1.11.1 utf8_1.2.4 #> [81] ggrepel_0.9.5 pillar_1.9.0 #> [83] stringr_1.5.1 yulab.utils_0.1.4 #> [85] splines_4.4.0 tweenr_2.0.3 #> [87] dplyr_1.1.4 treeio_1.27.0 #> [89] lattice_0.22-6 tidyselect_1.2.1 #> [91] DirichletMultinomial_1.45.0 gridExtra_2.3 #> [93] bookdown_0.38 xfun_0.43 #> [95] graphlayouts_1.1.1 stringi_1.8.3 #> [97] ggfun_0.1.4 lazyeval_0.2.2 #> [99] yaml_2.3.8 evaluate_0.23 #> [101] codetools_0.2-19 tibble_3.2.1 #> [103] BiocManager_1.30.22 ggplotify_0.1.2 #> [105] cli_3.6.2 systemfonts_1.0.6 #> [107] munsell_0.5.0 jquerylib_0.1.4 #> [109] Rcpp_1.0.12 triebeard_0.4.1 #> [111] parallel_4.4.0 pkgdown_2.0.7 #> [113] assertthat_0.2.1 sparseMatrixStats_1.15.0 #> [115] bitops_1.0-7 decontam_1.23.0 #> [117] viridisLite_0.4.2 tidytree_0.4.6 #> [119] scales_1.3.0 purrr_1.0.2 #> [121] crayon_1.5.2 rlang_1.1.3"},{"path":"/articles/MGnifyR_long.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR, extended vignette","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. MGnify pipelines undoubtedly useful, currently implemented produce results strictly per-sample basis. whole study results available, comparisons across studies difficult. MGnifyR package designed facilitate cross-study analyses handling per-sample data retrieval merging details internally, leaving user free perform analysis see fit. latest version MGnifyR seamlessly integrates miaverse framework providing access tools microbiome -stream analytics. integration enables users leverage optimized standardized methods analyzing microbiome. Additionally, users can benefit comprehensive tutorial book offers valuable guidance support.","code":""},{"path":"/articles/MGnifyR_long.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR, extended vignette","text":"MGnifyR currently hosted GitHub, can installed using via devtools. MGnifyR built using following snippet.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR_long.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR, extended vignette","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR)"},{"path":"/articles/MGnifyR_long.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR, extended vignette","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . ’s recommended local caching enabled useCache = TRUE. Queries MGnify API can quite slow, particularly retrieving multipage results many analyses (many Interpro results). Using local disk cache can significantly speed subsequent work, bypassing need re-query API. Use cache entirely transparent, caching occurs raw data level. cache can persist across MGnifyR sessions, can even used multiple sessions simultaneously - provided different sets accessions queried . Optionally, username password may specified client creation, causing MGnifyR attempt retrieval authentication token API. gives access non-public results, currently author imposed embargo period.","code":"mg <- MgnifyClient() mg mg <- MgnifyClient( username = \"Webin-username\", password = \"your-password\", useCache = TRUE)"},{"path":[]},{"path":"/articles/MGnifyR_long.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR, extended vignette","text":"MGnifyR gives users access complete range search functionality implemented MGnify JSON API. single function doQuery() used perform searching, allowing Studies, Samples, Runs Accession interrogated common interface. MGnifyR functions first argument client must valid MgnifyClient instance. remaining required parameter qtype, specifying type data queried, may one studies, samples, runs, analyses assemblies. general parameter include max.hits. Unlike MGnifyR high level functions, caching turned default doQuery(). New data analyses added MGnify time, enabling caching default may lead --date search results long-lived sessions. However, ’s easy switch back , may useful many cases. Also, given huge ever increasing number datasets available MGnify, limit number results returned may set using max.hits. default set 200, exploratory queries sufficient. may increased decreased directly specifying max.hits, disabled completely (limit) setting max.hits=NULL. cases want specific search, also use either accession parameter, many filter options available API, discussed . Specifying accession id, case samples, runs assemblies may vector ids, returns data.frame metadata one row per matching accession. accession NULL (default) remaining parameters define filters applied API search result. Details parameters given help(doQuery). way example though, supposing interested amplicon Illumina samples arctic, might try following query: Specifying accession parameter restrict results just matching particular entry, study, sample run. example, retrieve information study “MGYS00002891”:","code":"northpolar <- doQuery( mg, \"samples\", latitude_gte=60.0, experiment_type=\"amplicon\", biome_name=\"Soil\", instrument_platform = \"Illumina\", max.hits = 10) head(northpolar) study_samples <- doQuery(mg, \"studies\", accession=\"MGYS00002891\") head(study_samples)"},{"path":"/articles/MGnifyR_long.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR, extended vignette","text":"obtained particular set search hits, ’s now time retrieve associated results. General automated analysis complicated MGnify database design, wherein example samples may shared multiple studies, studies analysed multiple times using different versions pipeline. Navigating “many--one” relationships can tricky, MGnifyR resorts using analyses accessions ’s canonical identifier. analysis corresponds single run particular pipeline single sample single study. downside approach queries returning studies, samples (anything analyses) accessions need converting corresponding analyses. MGnifyR therefore provides helper function handle conversion - searchAnalysis(). Following previous search, list study accessions, convert corresponding analyses use: useful side effect call attribute metadata sample now retrieved stored local cache. Thus subsequent API calls samples (occur multiple times later steps) significantly faster. ’s important aware results searchAnalysis() command necessarily one--one match input accessions. MGnify analysis runs sometimes performed multiple times, perhaps using different versions pipeline. Thus filtering result list may required, easily performed illustrated next section.","code":"analyses_accessions <- searchAnalysis( mg, type=\"studies\", accession = study_samples$accession) head(analyses_accessions)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR, extended vignette","text":"point long list analysis instances (potential duplicates) corresponding samples previously found. use getMetadata function download combine associated sample, run study metadata, filter required include rows want. resulting data.frame columns names prefixed source type. example, “sample_xxx” columns correspond metadata gleaned querying accession’s sample entry. MGnify allows quite flexible specification arbitray metadata submission time, many cases leading quite sparse data.frame results accession queries sourced one study. instance, one sample contains entry “sample_soil_PH”, entries rows filled NA. MGnifyR automatically clean missing values - instead opting allow user choose correct action. particular study ’re looking marine biome, suppose interested samples analyses sampling depth known. following snippet filters full data.frame selecting entries contain valid sample_depth. ’s worth noting .numeric call ensure column converted numeric type checked. sample data MGnifyR initially retrieved type character, ’s user make sure ostensibly numeric entries converted properly.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) head(analyses_metadata) known_depths <- analyses_metadata[ !is.na(as.numeric(analyses_metadata$sample_depth)), ] # How many are left? dim(known_depths)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR, extended vignette","text":"selected analyses wish examine , getResult() used download associated OTU tables taxonomy, join results single TreeSummarizedExperiment (TreeSE) object. TreeSE becoming defacto standard taxonomic abundance munging R. TreeSE objects integrate abundance, taxonomic, phylogenetic, sample sequence data single object, powerful facilities filtering, processing plotting results. Compared phyloseq object, TreeSE scalable capable efficient data analysis. miaverse framework developed around TreeSE data container. provides tools analysis visualization. Moreover, includes comprehensive tutorial book called OMA.","code":""},{"path":"/articles/MGnifyR_long.html","id":"amplicon-sequencing","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Amplicon sequencing","title":"MGnifyR, extended vignette","text":"dataset includes amplicon sequencing data, .e., dataset include function predictions, getResult() method returns dataset TreeSE default. See output types function documentation. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples. needed, TreeSE can converted phyloseq.","code":"tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE) tse tse <- estimateDiversity(tse, index = \"shannon\") library(scater) plotColData(tse, \"shannon\", x = \"sample_geo.loc.name\") library(miaViz) plotAbundance(tse[!is.na( rowData(tse)[[\"Kingdom\"]] ), ], rank = \"Kingdom\") pseq <- makePhyloseqFromTreeSE(tse) pseq"},{"path":"/articles/MGnifyR_long.html","id":"metagenomics","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Metagenomics","title":"MGnifyR, extended vignette","text":"Although previous queries based results doQuery(), now concentrate combining comparing results specific studies. Since newly performed analyses retrieved first doQuery() call, ’s likely time vignette read, query results different. principally due rapid increase MGnify submissions, leading potential lack consistency even closely spaced queries. mentioned previously, may best use useCache=FALSE MgnifyCLient object doQuery() calls, ensure queries actually returning latest data. remainder vignette however, ’ll comparing 3 ostensibly different studies. study saltmarsh soils York University, human faecal samples survey healthy Sardinians, set samples hydrothermal vents Mid-Cayman rise Carribbean Sea. simplify things, first 20 samples study used. Furthermore, intention demonstrate functionality MGnifyR package, rather produce scientifically rigorous results. first step new accession list , previously, retrieve associated metadata using getMetadata(), seen doQuery() results, returned data.frame contains large number columns. autogenerated flexible, column names can little difficult predict, examining colnames(full_metadata) make things clearer. full_metadata get idea type data ’re dealing , can extract useul information sequencing platform, source biome, etc. next code snippet tallies columns give idea ’s available. boxplot also indicates within study read counts similar, probably need use sort normalization procedure comparing across samples. might also want drop particularly low read coverage samples analysis. , can fetch data calling getResult(). bulk.dl=TRUE potential significantly speed data retrieval. MGnify makes functional results available two separate ways, either per-analysis basis web api, whole study level large files, tab separated (TSV), columns representing results analysis. bulk.dl FALSE, MGnifyR queries web api get results (given functional analyses results may consist thousands entries) may take significant time. Setting bulk.dl TRUE causes MGnifyR determine source study associated particular analysis instead download parse corresponding results file. Since result file contains entries analyses associated study, taking advantage MGnifyR’s local caching single download provides results many future analyses. cases affords several orders magnitude speedup api query case. Unfortunately, column entries per-study results files always directly correspond particular analysis run, causing retrieval fail. principal cause believed running multiple analyses jobs sample. Thus reliability, bulk.dl FALSE default. general recommendation though, try setting TRUE first time getResult() used set accessions. fails, setting bulk.dl FALSE enable robust approach allowing analysis continue. might take though. Hopefully future sample/analysis correspondence mismatches fixed default bulk.dl switch TRUE. metagenomic samples, result MultiAssayExperiment (MAE) links multiple TreeSE objects one dataset. TreeSE objects include taxonomic profiling data along functional data unique objects. objects linked sample names. can get access individual object experiment specifying index name. can perform principal component analysis microbial profiling data utilizing miaverse tools.","code":"soil <- searchAnalysis(mg, \"studies\", \"MGYS00001447\") human <- searchAnalysis(mg, \"studies\", \"MGYS00001442\") marine <- searchAnalysis(mg, \"studies\", \"MGYS00001282\") # Combine analyses all_accessions <- c(soil, human, marine) head(all_accessions) full_metadata <- getMetadata(mg, all_accessions) colnames(full_metadata) head(full_metadata) # Load ggplot2 library(ggplot2) #Distribution of sample source material: table(full_metadata$`sample_environment-material`) #What sequencing machine(s) were used? table(full_metadata$`sample_instrument model`) # Boxplot of raw read counts: ggplot( full_metadata, aes(x=study_accession, y=log( as.numeric(`analysis_Submitted nucleotide sequences`)))) + geom_boxplot(aes(group=study_accession)) + theme_bw() + ylab(\"log(submitted reads)\") mae <- getResult(mg, all_accessions, bulk.dl = TRUE) mae mae[[2]] # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim(mae[[1]], \"MDS\", colour_by = \"sample_environment.feature\")"},{"path":"/articles/MGnifyR_long.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR, extended vignette","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format.","code":"kegg <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\") head(kegg)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR, extended vignette","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile() simple wrapper function , supplied list accessions, finds urls files ’re . cases ’ll want filter returned list files interest, easily done resulting data.frame object. addition actual download location (download_url column), extra columns include file type, contents compression. ’s recommended colnames data.frame examined get grasp available metadata. demonstrate process, code retrieves data.frame containing available downloads accession ’ve examining previously. filters retain files corresponding retain annotated amino acid sequence files. list types available files, guide filtering, something like following might useful. Unlike MGnifyR functions, searchFile() limited analyses, specifying accession_type results types may found. instance, general genome functionality yet integrated MGnifyR, can retrieve associated files particular genome accession following: found set target urls, final step use getFile() actually retrieve file. Unlike functions, works single url location , entry target_urls must downloaded individually - easily done either looping applying list. files intended used external programs, might easiest provide file parameter function call, specifies local filename writing file. default MGnifyR use local cache, can make getting file afterwards awkward. Regardless, default behaviour getFile() retrieve file specified parameter url, save disk, return filepath saved . second download option available, allows built-parsing file. know ahead time processing performed, may possible integrate function, pass function getFile() read.func argument. function question take single argument (complete path name locally downloaded file) result call returned place usual output file name. Alternatively files first downloaded standard way, processed using function loop. Therefore many cases read.func parameter redundant. However, many outputs MGnify can quite large, meaning local storage many files may become issue. providing read_func parameter (necessarily setting MgnifyClient object: useCache=FALSE) analysis large number datasets may possible minimal storage requirements. illustrate, suppose interested retrieving detected sequences matching particular PFAM motif set analyses. simple function uses Biostrings package read amino acid fasta file, searches matching PFAM tag sequence name, tallies unique sequences single data.frame row. case PFAM motif identifies sequences coding amoC gene, found ammonia methane oxidizing organisms, filtering method used. defined function, just remains include call getFile().","code":"# Find list of available downloads dl_urls <- searchFile( mg, full_metadata$analysis_accession, type = \"analyses\") # Filter table target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted CDS with annotation\", ] head(target_urls) table(dl_urls$attributes.description.label) genome_urls <- searchFile(mg, \"MGYG000433953\", type = \"genomes\") genome_urls[ , c(\"id\", \"attributes.file.format.name\", \"download_url\")] # Just select a single file from the target_urls list for demonstration. # Default behavior - use local cache. cached_location1 = getFile(mg, target_urls$download_url[[1]]) # Specifying a file cached_location2 <- getFile( mg, target_urls$download_url[[1]]) cached_location <- c(cached_location1, cached_location2) # Where are the files? cached_location library(Biostrings) # Simple function to a count of unique sequences matching PFAM amoC/mmoC motif getAmoCseqs <- function(fname){ sequences <- readAAStringSet(fname) tgtvec <- grepl(\"PF04896\", names(sequences)) as.data.frame(as.list(table(as.character(sequences[tgtvec])))) } # Just download a single accession for demonstration, specifying a read_function amoC_seq_counts <- getFile( mg, target_urls$download_url[[1]], read_func = getAmoCseqs) amoC_seq_counts sessionInfo()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Tuomas Borman. Author, maintainer. Ben Allen. Author. Leo Lahti. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Borman T, Allen B, Lahti L (2024). MGnifyR: R interface EBI MGnify metagenomics resource. R package version 0.99.27, https://github.com/EBI-Metagenomics/MGnifyR.","code":"@Manual{, title = {MGnifyR: R interface to EBI MGnify metagenomics resource}, author = {Tuomas Borman and Ben Allen and Leo Lahti}, year = {2024}, note = {R package version 0.99.27}, url = {https://github.com/EBI-Metagenomics/MGnifyR}, }"},{"path":"/index.html","id":"mgnifyr-","dir":"","previous_headings":"","what":"R interface to EBI MGnify metagenomics resource","title":"R interface to EBI MGnify metagenomics resource","text":"R package searching retrieving data EBI Metagenomics resource. cases, MGnifyR interacts directly JSONAPI, rather relying downloading analyses outputs TSV files. Thus general - allowing example intuitive combining multiple studies analyses single workflow, cases slower afformentioned direct access. Local caching results disk implemented help counter overheads, data downloads can slow - particularly functional annotation retrieval. MGnifyR package part miaverse microbiome analysis ecosystem enabling usage mia miaverse packages. research received funding Horizon 2020 Programme European Union within FindingPheno project grant agreement 952914. FindingPheno, EU-funded project, dedicated developing computational tools methodologies integration analysis multi-omics data. primary objective deepen understanding interactions hosts microbiomes. can find information FindingPheno website.","code":""},{"path":"/index.html","id":"requirements","dir":"","previous_headings":"","what":"Requirements","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"devtools # for installation mia plyr dplyr reshape2 httr urltools"},{"path":[]},{"path":"/index.html","id":"bioc-release","dir":"","previous_headings":"Installation","what":"Bioc-release","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"bioc-devel","dir":"","previous_headings":"Installation","what":"Bioc-devel","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") # The following initializes usage of Bioc devel BiocManager::install(version='devel') BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"github","dir":"","previous_headings":"Installation","what":"GitHub","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"remotes::install_github(\"EBI-Metagenomics/MGnifyR\")"},{"path":"/index.html","id":"basic-usage","dir":"","previous_headings":"","what":"Basic usage","title":"R interface to EBI MGnify metagenomics resource","text":"detailed instructions read associated function help vignette (vignette(\"MGNifyR\"))","code":"library(MGnifyR) # Set up the MGnify client instance mgclnt <- MgnifyClient(usecache = TRUE, cache_dir = '/tmp/MGnify_cache') # Retrieve the list of analyses associated with a study accession_list <- searchAnalysis(mgclnt, \"studies\", \"MGYS00005058\", usecache = TRUE) # Download all associated study/sample and analysis metadata meta_dataframe <- getMetadata(mgclnt, accession_list, usecache = TRUE) # Convert analyses outputs to a single `MultiAssayExperiment` object mae <- getResult(mgclnt, meta_dataframe$analysis_accession, usecache = TRUE) mae"},{"path":"/reference/MGnifyR-package.html","id":null,"dir":"Reference","previous_headings":"","what":"MGnifyR Package. — MGnifyR-package","title":"MGnifyR Package. — MGnifyR-package","text":"MGnifyR implements interface EBI MGnify database. See vignette general introduction package. MGnify general MGnify information, API documentation details JSONAPI implementation.","code":""},{"path":[]},{"path":"/reference/MGnifyR-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"MGnifyR Package. — MGnifyR-package","text":"Maintainer: Tuomas Borman tuomas.v.borman@utu.fi (ORCID) Authors: Ben Allen ben.allen@ncl.ac.uk Leo Lahti leo.lahti@iki.fi (ORCID)","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":null,"dir":"Reference","previous_headings":"","what":"MgnifyClient accessors and mutators — databaseUrl","title":"MgnifyClient accessors and mutators — databaseUrl","text":"MgnifyClient accessors mutators","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"databaseUrl(x) authTok(x) useCache(x) cacheDir(x) showWarnings(x) clearCache(x) verbose(x) databaseUrl(x) <- value authTok(x) <- value useCache(x) <- value cacheDir(x) <- value showWarnings(x) <- value clearCache(x) <- value verbose(x) <- value # S4 method for MgnifyClient databaseUrl(x) # S4 method for MgnifyClient authTok(x) # S4 method for MgnifyClient useCache(x) # S4 method for MgnifyClient cacheDir(x) # S4 method for MgnifyClient showWarnings(x) # S4 method for MgnifyClient clearCache(x) # S4 method for MgnifyClient verbose(x) # S4 method for MgnifyClient databaseUrl(x) <- value # S4 method for MgnifyClient authTok(x) <- value # S4 method for MgnifyClient useCache(x) <- value # S4 method for MgnifyClient cacheDir(x) <- value # S4 method for MgnifyClient showWarnings(x) <- value # S4 method for MgnifyClient clearCache(x) <- value # S4 method for MgnifyClient verbose(x) <- value"},{"path":"/reference/MgnifyClient-accessors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"MgnifyClient accessors and mutators — databaseUrl","text":"x MgnifyClient object. value value added certain slot.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MgnifyClient accessors and mutators — databaseUrl","text":"value MgnifyClient object nothing.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"MgnifyClient accessors and mutators — databaseUrl","text":"functions fetching mutating slots MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"mg <- MgnifyClient() databaseUrl(mg) #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" showWarnings(mg) <- FALSE"},{"path":"/reference/MgnifyClient.html","id":null,"dir":"Reference","previous_headings":"","what":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"Constructor creating MgnifyClient object allow access MGnify database. MgnifyClient object","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"","code":"MgnifyClient( username = NULL, password = NULL, useCache = FALSE, cacheDir = tempdir(), showWarnings = FALSE, verbose = TRUE, clearCache = FALSE, ... )"},{"path":"/reference/MgnifyClient.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"username single character value specifying optional username authentication. (default: username = NULL) password single character value specifying optional password authentication. (default: password = NULL) useCache single boolean value specifying whether enable -disk caching results session. use cases TRUE. (default: useCache = FALSE) cacheDir single character value specifying folder contain local cache. Note cached files persistent, cache directory may reused sessions, taking advantage previously downloaded results. directory created exist already. (default: cacheDir = tempdir()) showWarnings single boolean value specifying whether print warnings invocation MGnifyR functions. (default: showWarnings = FALSE) verbose single boolean value specifying whether print extra output invocation MGnifyR functions. (default: verbose = FALSE) clearCache single boolean value specifying whether clear cache. (default: clearCache = FALSE) ... optional arguments: url single character value specifying url address database. (default: url = \"https://www.ebi.ac.uk/metagenomics/api/v1\")","code":""},{"path":"/reference/MgnifyClient.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"functions MGnifyR package take MgnifyClient object first argument. essential querying raw MGnify API (exposed relative standard JSONAPI), object allows simple handling user authentication access private data, local -disk caching results. object required functions MGnifyR package.","code":""},{"path":"/reference/MgnifyClient.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"databaseUrl single character value specifying URL address database. authTok single character value specifying authentication token. useCache single boolean value specifying whether use cache. cacheDir single character value specifying cache directory. showWarnings single boolean value specifying whether show warnings. clearCache single boolean value specifying whether clear cache. verbose single boolean value specifying whether show messages.","code":""},{"path":"/reference/MgnifyClient.html","id":"constructor","dir":"Reference","previous_headings":"","what":"Constructor","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"See MgnifyClient constructor.","code":""},{"path":"/reference/MgnifyClient.html","id":"accessor","dir":"Reference","previous_headings":"","what":"Accessor","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"See MgnifyClient-accessors accessor functions.","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"","code":"my_client <- MgnifyClient( useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) if (FALSE) { # Use username and password to get access to non-public data my_client <- MgnifyClient( username = \"Webin-1122334\", password = \"SecretPassword\", useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) }"},{"path":"/reference/deprecate.html","id":null,"dir":"Reference","previous_headings":"","what":"These functions will be deprecated. Please use other functions instead. — deprecate","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"functions deprecated. Please use functions instead.","code":""},{"path":"/reference/deprecate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"","code":"mgnify_client( username = NULL, password = NULL, usecache = FALSE, cache_dir = NULL, warnings = FALSE, use_memcache = FALSE, ... ) mgnify_query( client, qtype = \"samples\", accession = NULL, asDataFrame = TRUE, maxhits = 200, usecache = FALSE, ... ) mgnify_analyses_from_samples(client, accession, usecache = TRUE, ...) mgnify_analyses_from_studies(client, accession, usecache = TRUE, ...) mgnify_get_download_urls( client, accessions, accession_type, usecache = TRUE, ... ) mgnify_download( client, url, file = NULL, read_func = NULL, usecache = TRUE, Debug = FALSE, ... ) mgnify_get_analyses_results( client = NULL, accessions, retrievelist = c(), compact_results = TRUE, usecache = TRUE, bulk_dl = FALSE, ... ) mgnify_get_analyses_phyloseq( client = NULL, accessions, usecache = TRUE, returnLists = FALSE, tax_SU = \"SSU\", get_tree = FALSE, ... ) mgnify_get_analyses_metadata(client, accessions, usecache = TRUE, ...) mgnify_retrieve_json( client, path = \"biomes\", complete_url = NULL, qopts = NULL, maxhits = 200, usecache = FALSE, Debug = FALSE )"},{"path":"/reference/deprecate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"username - password - usecache - cache_dir - warnings - use_memcache - ... - client - qtype - accession - asDataFrame - maxhits - accessions - accession_type - url - file - read_func - Debug - retrievelist - compact_results - bulk_dl - returnLists - tax_SU - get_tree - path - complete_url - qopts -","code":""},{"path":"/reference/doQuery.html","id":null,"dir":"Reference","previous_headings":"","what":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"Search MGnify database studies, samples, runs, analyses, biomes, assemblies, genomes.","code":""},{"path":"/reference/doQuery.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"","code":"doQuery(x, ...) # S4 method for MgnifyClient doQuery( x, type = \"studies\", accession = NULL, as.df = TRUE, max.hits = 200, ... )"},{"path":"/reference/doQuery.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"x MgnifyClient object. ... Remaining parameter key/value pairs may supplied filter returned values. Available options differ types. See discussion details. type single character value specifying type objects query. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. (default: type = \"studies\") accession single character value vector character values specifying MGnify accession identifiers (type type) NULL. NULL, results defined parameters retrieved. (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. cases, .df = TRUE make sense. (default: .df = TRUE) max.hits single integer value specifying maximum number results return FALSE. actual number results actually higher max.hits, clipping occurs pagination page boundaries. disable limit, set max.hits = NULL. (default: max.hits = 200)","code":""},{"path":"/reference/doQuery.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"nested list data.frame containing results query.","code":""},{"path":"/reference/doQuery.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"doQuery flexible query function, harnessing \"full\" power JSONAPI MGnify search filters. Search results may filtered metadata value, associated study/sample/analyse etc. See [Api browser](https://www.ebi.ac.uk/metagenomics/api/v1/) information MGnify database filters. can find help customizing queries [](https://emg-docs.readthedocs.io/en/latest/api.html#customising-queries). example following filters available: studies: accession, biome_name, lineage, centre_name, include samples: accession, experiment_type, biome_name, lineage, geo_loc_name, latitude_gte, latitude_lte, longitude_gte, longitude_lte, species, instrument_model, instrument_platform, metadata_key, metadata_value_gte, metadata_value_lte, metadata_value, environment_material, environment_feature, study_accession, include runs: accession, experiment_type, biome_name, lineage, species, instrument_platform, instrument_model, metdata_key, metadata_value_gte, metadata_value_lte, metadata_value, sample_accession, study_accession, include analyses: biome_name, lineage, experiment_type, species, sample_accession, pipeline_version biomes: depth_gte, depth_lte assemblies: depth_gte, depth_lte Unfortunately appears cases, filters work expected, important check results returned match expected. Even unfortunately error parameter specification, query run filter parameters present . Thus result appear superficially correct infact correspond something completely different. behaviour hopefully fixed future incarnations MGnifyR JSONAPI, now users double check returned values. currently possible combine queries type single call (example search samples latitude). However, possible run multiple queries combine results using set operations R get desired behaviour.","code":""},{"path":"/reference/doQuery.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"","code":"mg <- MgnifyClient(useCache = FALSE) # Get a list of studies from the Agricultural Wastewater : agwaste_studies <- doQuery( mg, \"studies\", biome_name=\"Agricultural wastewater\" ) if (FALSE) { # Get all samples from a particular study samps <- doQuery(mg, \"samples\", accession=\"MGYS00004521\") # Search polar samples samps_np <- doQuery(mg, \"samples\", latitude_gte=66, max.hits=10) samps_sp <- doQuery(mg, \"samples\", latitude_lte=-66, max.hits=10) # Search studies that have studied drinking water tbl <- doQuery( mg, type = \"studies\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) }"},{"path":"/reference/getData.html","id":null,"dir":"Reference","previous_headings":"","what":"Versatile function to retrieve raw results — getData","title":"Versatile function to retrieve raw results — getData","text":"Versatile function retrieve raw results","code":""},{"path":"/reference/getData.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Versatile function to retrieve raw results — getData","text":"","code":"getData(x, ...) # S4 method for MgnifyClient getData(x, type, accession.type = NULL, accession = NULL, as.df = TRUE, ...)"},{"path":"/reference/getData.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Versatile function to retrieve raw results — getData","text":"x MgnifyClient object. ... optional arguments fed internal functions. type single character value specifying type data retrieve. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. accession.type single character value specifying type accession IDs (accession). Must specified accession specified. (default: accession.type = NULL) accession single character value vector character values specifying accession IDs return results . (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. (default: .df = TRUE)","code":""},{"path":"/reference/getData.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Versatile function to retrieve raw results — getData","text":"data.frame list","code":""},{"path":"/reference/getData.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Versatile function to retrieve raw results — getData","text":"function returns data MGnify database. Compared getResult, function allows flexible framework fetching data. However, drawbacks: counts data, getResult returns optimally structured data container easier downstream analysis. getData returns raw data database. However, want retrieve data pipelines publications, instance, getResult suitable , getData can utilized instead.","code":""},{"path":[]},{"path":"/reference/getData.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Versatile function to retrieve raw results — getData","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Find kegg modules for certain analysis df <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\")"},{"path":"/reference/getFile.html","id":null,"dir":"Reference","previous_headings":"","what":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"Download MGnify files, also including processed reads identified protein sequences Listing files available download","code":""},{"path":"/reference/getFile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"","code":"getFile(x, ...) searchFile(x, ...) # S4 method for MgnifyClient getFile(x, url, file = NULL, read.func = NULL, ...) # S4 method for MgnifyClient searchFile( x, accession, type = c(\"studies\", \"samples\", \"analyses\", \"assemblies\", \"genomes\", \"run\"), ... )"},{"path":"/reference/getFile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"x MgnifyClient object. ... Additional arguments; used currently. url single character value specifying url address file wish download. file single character value NULL specifying optional local filename use saving file. NULL (default), MGNify local cache settings used. file intended processed separate program, may sensible provide meaningful file, rather hunt cache folders. file NULL useCache(client) FALSE, read.func parameter must supplied file downloaded deleted. (default: file = NULL) read.func function specifying optional function process downloaded file return results, rather relying post processing. primary use-case parameter local disk space limited downloaded files can quickly processed discarded. function take single parameter, downloaded filename, may return valid R object. (default: read.func = NULL) accession single character value vector character values specifying accession IDs return results . type single character value specifying type objects query. Must one following options: analysis, samples, studies, assembly, genome run. (default: type = \"samples\")","code":""},{"path":"/reference/getFile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"Either local filename downloaded file, either location MGNifyR cache file. read.func used, result returned. data.frame containing discovered downloads. multiple accessions queried, accessions column may filter results - since rownames set (wouldn;'t make sense query return multiple items)","code":""},{"path":"/reference/getFile.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"getFile convenient wrapper round generic URL downloading functionality R, taking care things like local caching authentication. function wrapper function allowing easy enumeration downloads available given accession (list thereof). Returns single data.frame containing available downloads associated metadata, including url location description. can filtered extract urls interest, actually retrieving files using mgnify_download","code":""},{"path":"/reference/getFile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"","code":"# Make a client object mg <- MgnifyClient(useCache = FALSE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c(\"MGYA00563876\", \"MGYA00563877\") downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |=================================== | 50% | |======================================================================| 100% # Filter to find the urls of 16S encoding sequences url_list <- downloads[ downloads$attributes.description.label == \"Contigs encoding SSU rRNA\", \"download_url\"] # Example 1: # Download the first file supplied_filename <- getFile( mg, url_list[[1]], file=\"SSU_file.fasta.gz\") if (FALSE) { # Example 2: # Just use local caching cached_filename <- getFile(mg, url_list[[2]]) # Example 3: # Using read.func to open the reads with readDNAStringSet from # \\code{biostrings}. Without retaining on disk dna_seqs <- getFile( mg, url_list[[3]], read.func = readDNAStringSet) } # Make a client object mg <- MgnifyClient(useCache = TRUE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c( \"MGYA00563876\", \"MGYA00563877\", \"MGYA00563878\", \"MGYA00563879\", \"MGYA00563880\" ) downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%"},{"path":"/reference/getMetadata.html","id":null,"dir":"Reference","previous_headings":"","what":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"Get Study, Sample Analysis metadata supplied analyses accessions","code":""},{"path":"/reference/getMetadata.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"","code":"getMetadata(x, ...) # S4 method for MgnifyClient getMetadata(x, accession, ...)"},{"path":"/reference/getMetadata.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"x MgnifyClient object. ... Optional arguments; currently used. accession single character value vector analysis accession IDs specifying accessions retrieve data .","code":""},{"path":"/reference/getMetadata.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"data.frame metadata analysis accession list.","code":""},{"path":"/reference/getMetadata.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"function retrieves associated study, sample analysis metadata attributes list analyses accessions.","code":""},{"path":"/reference/getMetadata.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Download all associated study/sample and analysis metadata accession_list <- c(\"MGYA00377505\") meta_dataframe <- getMetadata(mg, accession_list) #> Fetching metadata... #> | | | 0% | |======================================================================| 100%"},{"path":"/reference/getResult.html","id":null,"dir":"Reference","previous_headings":"","what":"Get microbial and/or functional profiling data for a list of accessions — getResult","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Get microbial /functional profiling data list accessions","code":""},{"path":"/reference/getResult.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"getResult(x, ...) # S4 method for MgnifyClient getResult( x, accession, get.taxa = TRUE, get.func = TRUE, output = \"TreeSE\", ... )"},{"path":"/reference/getResult.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"x MgnifyClient object. ... optional arguments: taxa.su single character value specifying taxa subunit results selected? Currently, taxonomy assignments MGnify pipelines rely rRNA matches existing databases (GreenGenes SILVA), later pipelines checking SSU LSU portions rRNA sequence. taxa.su allows selection either Small subunit (SSU) Large subunit results final TreeSummarizedExperiment object. Older pipeline versions report results subunits, thus accessions value effect. get.tree single boolean value specifying whether include available phylogenetic trees TreeSummarizedExperiment object. (default: get.tree = TRUE) .df single boolean value enabled output = \"list\". argument specifies whether return functional data named list (one entry per element output list) data.frames, data.frame containing results requested accessions. FALSE, function returns list lists, element consisting results single accession. (default: .df = TRUE) bulk.dl single boolean value specifying MGnifyR attempt speed things downloading relevant studies TSV results extracting required columns, rather using JSONAPI interface. getting results multiple accessions share study, option may result significantly faster processing. However, appear (quite ) cases database TSV result columns match expected accession names. hopefully fixed future, now bulk.dl defaults TRUE. work, can orders magnitude efficient. (default: buld_dl = TRUE) accession single character value vector character values specifying accession IDs return results . get.taxa boolean value specifying whether retrieve metagenomic data. (default: get.taxa = TRUE) get.func boolean value single character value vector character values specifying functional analysis types retrieve. get.func = TRUE, available functional datatypes retrieved, FALSE, functional data retrieved. current list available types \"antismash-gene-clusters\", \"go-slim\", \"go-terms\", \"interpro-identifiers\", \"taxonomy\", \"taxonomy-itsonedb\", \"taxonomy-itsunite\", \"taxonomy-lsu\", \"taxonomy-ssu\". Note depending particular analysis type, pipeline version etc., functional results available. (default: get.func = TRUE) output single character value specifying format output. Must one following options: \"TreeSE\", \"list\", \"phyloseq\". (default: output = \"TreeSE\")","code":""},{"path":"/reference/getResult.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"metagenomic data retrieved, result returned TreeSummarizedExperiment object default. result can also returned phyloseq object list data.frames. Note phyloseq object can include one phylogenetic tree meaning taxa might lost data subsetted based tree. functional data retrieved addition metagenomic data, result returned MultiAssayExperiment object. options list containing phyloseq object data.frames just data.frames. Functional data can returned MultiAssayExperiment object list data.frames.","code":""},{"path":"/reference/getResult.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Given set analysis accessions collection annotation types, function queries MGNify API returns results. function convenient retrieving highly structured (analysis vs counts) data certain instances. example, BIOM files downloaded automatically. want just retrieve raw data database, see getData.","code":""},{"path":[]},{"path":"/reference/getResult.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Get OTU tables as TreeSE accession_list <- c(\"MGYA00377505\") tse <- getResult(mg, accession_list, get.func=FALSE, get.taxa=TRUE) #> Fetching taxonomy data... #> | | | 0% | |======================================================================| 100% #> Merging with full join... #> 1/1 #> if (FALSE) { # Get functional data along with OTU tables as MAE mae <- getResult(mg, accession_list, get.func=TRUE, get.taxa=TRUE) # Get same data as list list <- getResult( mg, accession_list, get.func=TRUE, get.taxa=TRUE, output = \"list\", as.df = TRUE, use.cache = TRUE) }"},{"path":"/reference/searchAnalysis.html","id":null,"dir":"Reference","previous_headings":"","what":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Look analysis accession IDs one study sample accessions","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"searchAnalysis(x, ...) # S4 method for MgnifyClient searchAnalysis(x, type, accession, ...)"},{"path":"/reference/searchAnalysis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"x MgnifyClient object. ... Optional arguments; currently used. type single character value specifying type accession IDs specified accession. Must \"studies\" \"samples\". accession single character value vector character values specifying study sample accession IDs used retrieve analyses IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"vector analysis accession IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Retrieve analysis accession IDs associated supplied study sample accession.","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Retrieve analysis ids from study MGYS00005058 result <- searchAnalysis(mg, \"studies\", c(\"MGYS00005058\")) #> Fetching analyses... #> | | | 0% | |======================================================================| 100% if (FALSE) { # Retrieve all analysis ids from samples result <- searchAnalysis( mg, \"samples\", c(\"SRS4392730\", \"SRS4392743\")) }"}] +[{"path":"/articles/MGnifyR.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. latest version MGnifyR seamlessly integrates miaverse framework providing access cutting-edge tools microbiome -stream analytics.","code":""},{"path":"/articles/MGnifyR.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR","text":"MGnifyR hosted Bioconductor, can installed using via BiocManager.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR) #> Loading required package: mia #> Loading required package: SummarizedExperiment #> Loading required package: MatrixGenerics #> Loading required package: matrixStats #> #> Attaching package: 'MatrixGenerics' #> The following objects are masked from 'package:matrixStats': #> #> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, #> colCounts, colCummaxs, colCummins, colCumprods, colCumsums, #> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, #> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, #> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, #> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, #> colWeightedMeans, colWeightedMedians, colWeightedSds, #> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, #> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, #> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, #> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins, #> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, #> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars, #> rowWeightedMads, rowWeightedMeans, rowWeightedMedians, #> rowWeightedSds, rowWeightedVars #> Loading required package: GenomicRanges #> Loading required package: stats4 #> Loading required package: BiocGenerics #> #> Attaching package: 'BiocGenerics' #> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs #> The following objects are masked from 'package:base': #> #> anyDuplicated, aperm, append, as.data.frame, basename, cbind, #> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, #> get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, #> match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, #> Position, rank, rbind, Reduce, rownames, sapply, setdiff, table, #> tapply, union, unique, unsplit, which.max, which.min #> Loading required package: S4Vectors #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:utils': #> #> findMatches #> The following objects are masked from 'package:base': #> #> expand.grid, I, unname #> Loading required package: IRanges #> Loading required package: GenomeInfoDb #> Loading required package: Biobase #> Welcome to Bioconductor #> #> Vignettes contain introductory material; view with #> 'browseVignettes()'. To cite Bioconductor, see #> 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'. #> #> Attaching package: 'Biobase' #> The following object is masked from 'package:MatrixGenerics': #> #> rowMedians #> The following objects are masked from 'package:matrixStats': #> #> anyMissing, rowMedians #> Loading required package: SingleCellExperiment #> Loading required package: TreeSummarizedExperiment #> Loading required package: Biostrings #> Loading required package: XVector #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit #> Loading required package: MultiAssayExperiment #> Loading required package: biomformat"},{"path":"/articles/MGnifyR.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . MgnifyClient object contains slots previously mentioned settings.","code":"mg <- MgnifyClient(useCache = TRUE) mg #> An object of class \"MgnifyClient\" #> Slot \"databaseUrl\": #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" #> #> Slot \"authTok\": #> [1] NA #> #> Slot \"useCache\": #> [1] TRUE #> #> Slot \"cacheDir\": #> [1] \"/tmp/RtmpeMUYT9/.MGnifyR_cache\" #> #> Slot \"showWarnings\": #> [1] FALSE #> #> Slot \"clearCache\": #> [1] FALSE #> #> Slot \"verbose\": #> [1] TRUE"},{"path":[]},{"path":"/articles/MGnifyR.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR","text":"doQuery() function can utilized search results samples studies MGnify database. , fetch information drinking water samples. result table containing accession IDs description – case – samples.","code":"# Fetch studies samples <- doQuery( mg, type = \"samples\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) colnames(samples) |> head() #> [1] \"biosample\" \"accession\" \"sample-desc\" #> [4] \"environment-biome\" \"environment-feature\" \"environment-material\""},{"path":"/articles/MGnifyR.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR","text":"Now want find analysis accessions. sample might multiple analyses. analysis ID corresponds single run particular pipeline single sample single study. running searchAnalysis() function, get vector analysis IDs samples fed input.","code":"analyses_accessions <- searchAnalysis(mg, \"samples\", samples$accession) analyses_accessions |> head() #> [1] \"MGYA00652201\" \"MGYA00652185\" \"MGYA00643487\" \"MGYA00643486\" \"MGYA00643485\" #> [6] \"MGYA00643484\""},{"path":"/articles/MGnifyR.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR","text":"can now check metadata get hint kind data . use getMetadata() function fetch data based analysis IDs. returned value data.frame includes metadata example analysis conducted kind samples analyzed.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) colnames(analyses_metadata) |> head() #> [1] \"analysis_analysis-status\" \"analysis_pipeline-version\" #> [3] \"analysis_experiment-type\" \"analysis_accession\" #> [5] \"analysis_is-private\" \"analysis_complete-time\""},{"path":"/articles/MGnifyR.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR","text":"selected data fetch, can use getResult() output TreeSummarizedExperiment (TreeSE) MultiAssayExperiment (MAE) depending dataset. dataset includes taxonomic profiling data, output single TreeSE. dataset includes also functional data, output multiple TreeSE objects linked together utilizing MAE. can get access individual TreeSE object MAE specifying index name. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples… … plot abundances abundant phyla. can also perform analyses principal component analysis microbial profiling data utilizing miaverse tools.","code":"mae <- getResult(mg, accession = analyses_accessions) mae #> A MultiAssayExperiment object of 6 listed #> experiments with user-defined names and respective classes. #> Containing an ExperimentList class object of length 6: #> [1] microbiota: TreeSummarizedExperiment with 3506 rows and 50 columns #> [2] go-slim: TreeSummarizedExperiment with 116 rows and 38 columns #> [3] go-terms: TreeSummarizedExperiment with 3133 rows and 38 columns #> [4] interpro-identifiers: TreeSummarizedExperiment with 18223 rows and 38 columns #> [5] taxonomy: TreeSummarizedExperiment with 3617 rows and 50 columns #> [6] taxonomy-lsu: TreeSummarizedExperiment with 3378 rows and 42 columns #> Functionality: #> experiments() - obtain the ExperimentList instance #> colData() - the primary/phenotype DataFrame #> sampleMap() - the sample coordination DataFrame #> `$`, `[`, `[[` - extract colData columns, subset, or experiment #> *Format() - convert into a long or wide DataFrame #> assays() - convert ExperimentList to a SimpleList of matrices #> exportClass() - save data to flat files mae[[1]] #> class: TreeSummarizedExperiment #> dim: 3506 50 #> metadata(0): #> assays(1): counts #> rownames(3506): 82608 62797 ... 5820 6794 #> rowData names(9): Kingdom Phylum ... taxonomy1 taxonomy #> colnames(50): MGYA00144458 MGYA00144419 ... MGYA00652185 MGYA00652201 #> colData names(64): analysis_analysis.status analysis_pipeline.version #> ... sample_geo.loc.name sample_instrument.model #> reducedDimNames(0): #> mainExpName: NULL #> altExpNames(0): #> rowLinks: NULL #> rowTree: NULL #> colLinks: NULL #> colTree: NULL mae[[1]] <- estimateDiversity(mae[[1]], index = \"shannon\") library(scater) #> Loading required package: scuttle #> Loading required package: ggplot2 plotColData(mae[[1]], \"shannon\", x = \"sample_environment..biome.\") # Agglomerate data altExps(mae[[1]]) <- splitByRanks(mae[[1]]) library(miaViz) #> Loading required package: ggraph # Plot top taxa top_taxa <- getTopFeatures(altExp(mae[[1]], \"Phylum\"), 10) plotAbundance(altExp(mae[[1]], \"Phylum\")[top_taxa, ], rank = \"Phylum\") # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim( mae[[1]], \"MDS\", colour_by = \"sample_environment..biome.\")"},{"path":"/articles/MGnifyR.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format. result data.frame default. case, includes information publications fetched data portal.","code":"publications <- getData(mg, type = \"publications\") colnames(publications) |> head() #> [1] \"document.id\" \"type\" #> [3] \"id\" \"attributes.pubmed-id\" #> [5] \"attributes.pubmed-central-id\" \"attributes.pub-title\""},{"path":"/articles/MGnifyR.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile(), can search files database. returned table contains search results related analyses fed input. table contains information file also URL address file can loaded. Finally, can download files getFile(). function returns path file stored.","code":"dl_urls <- searchFile(mg, analyses_accessions, type = \"analyses\") target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted alpha tmRNA\", ] colnames(target_urls) |> head() #> [1] \"type\" \"id\" #> [3] \"attributes.alias\" \"attributes.file.format.name\" #> [5] \"attributes.file.format.extension\" \"attributes.file.format.compression\" # Just select a single file from the target_urls list for demonstration. file_url <- target_urls$download_url[[1]] cached_location <- getFile(mg, file_url) # Where are the files? cached_location #> [1] \"/.MGnifyR_cache/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz\" sessionInfo() #> R Under development (unstable) (2024-03-24 r86185) #> Platform: x86_64-pc-linux-gnu #> Running under: Ubuntu 22.04.4 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 #> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 #> #> locale: #> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C #> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 #> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 #> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C #> [9] LC_ADDRESS=C LC_TELEPHONE=C #> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C #> #> time zone: UTC #> tzcode source: system (glibc) #> #> attached base packages: #> [1] stats4 stats graphics grDevices utils datasets methods #> [8] base #> #> other attached packages: #> [1] miaViz_1.11.0 ggraph_2.2.1 #> [3] scater_1.31.2 ggplot2_3.5.0 #> [5] scuttle_1.13.1 MGnifyR_0.99.27 #> [7] biomformat_1.31.0 mia_1.11.1 #> [9] MultiAssayExperiment_1.29.1 TreeSummarizedExperiment_2.11.0 #> [11] Biostrings_2.71.5 XVector_0.43.1 #> [13] SingleCellExperiment_1.25.0 SummarizedExperiment_1.33.3 #> [15] Biobase_2.63.0 GenomicRanges_1.55.4 #> [17] GenomeInfoDb_1.39.9 IRanges_2.37.1 #> [19] S4Vectors_0.41.5 BiocGenerics_0.49.1 #> [21] MatrixGenerics_1.15.0 matrixStats_1.2.0 #> [23] knitr_1.45 BiocStyle_2.31.0 #> #> loaded via a namespace (and not attached): #> [1] jsonlite_1.8.8 tidyjson_0.3.2 #> [3] magrittr_2.0.3 ggbeeswarm_0.7.2 #> [5] farver_2.1.1 rmarkdown_2.26 #> [7] fs_1.6.3 zlibbioc_1.49.3 #> [9] ragg_1.3.0 vctrs_0.6.5 #> [11] memoise_2.0.1 DelayedMatrixStats_1.25.1 #> [13] RCurl_1.98-1.14 ggtree_3.11.1 #> [15] htmltools_0.5.8 S4Arrays_1.3.6 #> [17] BiocBaseUtils_1.5.1 BiocNeighbors_1.21.2 #> [19] Rhdf5lib_1.25.1 gridGraphics_0.5-1 #> [21] SparseArray_1.3.4 rhdf5_2.47.6 #> [23] sass_0.4.9 bslib_0.6.2 #> [25] desc_1.4.3 plyr_1.8.9 #> [27] DECIPHER_2.31.3 cachem_1.0.8 #> [29] igraph_2.0.3 lifecycle_1.0.4 #> [31] pkgconfig_2.0.3 rsvd_1.0.5 #> [33] Matrix_1.7-0 R6_2.5.1 #> [35] fastmap_1.1.1 GenomeInfoDbData_1.2.11 #> [37] aplot_0.2.2 digest_0.6.35 #> [39] ggnewscale_0.4.10 colorspace_2.1-0 #> [41] patchwork_1.2.0 irlba_2.3.5.1 #> [43] textshaping_0.3.7 vegan_2.6-4 #> [45] beachmat_2.19.2 labeling_0.4.3 #> [47] fansi_1.0.6 urltools_1.7.3 #> [49] polyclip_1.10-6 httr_1.4.7 #> [51] abind_1.4-5 mgcv_1.9-1 #> [53] compiler_4.4.0 withr_3.0.0 #> [55] BiocParallel_1.37.1 viridis_0.6.5 #> [57] DBI_1.2.2 highr_0.10 #> [59] ggforce_0.4.2 MASS_7.3-60.2 #> [61] DelayedArray_0.29.9 bluster_1.13.0 #> [63] permute_0.9-7 tools_4.4.0 #> [65] vipor_0.4.7 beeswarm_0.4.0 #> [67] ape_5.7-1 glue_1.7.0 #> [69] nlme_3.1-164 rhdf5filters_1.15.4 #> [71] grid_4.4.0 cluster_2.1.6 #> [73] reshape2_1.4.4 generics_0.1.3 #> [75] gtable_0.3.4 tidyr_1.3.1 #> [77] tidygraph_1.3.1 BiocSingular_1.19.0 #> [79] ScaledMatrix_1.11.1 utf8_1.2.4 #> [81] ggrepel_0.9.5 pillar_1.9.0 #> [83] stringr_1.5.1 yulab.utils_0.1.4 #> [85] splines_4.4.0 tweenr_2.0.3 #> [87] dplyr_1.1.4 treeio_1.27.0 #> [89] lattice_0.22-6 tidyselect_1.2.1 #> [91] DirichletMultinomial_1.45.0 gridExtra_2.3 #> [93] bookdown_0.38 xfun_0.43 #> [95] graphlayouts_1.1.1 stringi_1.8.3 #> [97] ggfun_0.1.4 lazyeval_0.2.2 #> [99] yaml_2.3.8 evaluate_0.23 #> [101] codetools_0.2-19 tibble_3.2.1 #> [103] BiocManager_1.30.22 ggplotify_0.1.2 #> [105] cli_3.6.2 systemfonts_1.0.6 #> [107] munsell_0.5.0 jquerylib_0.1.4 #> [109] Rcpp_1.0.12 triebeard_0.4.1 #> [111] parallel_4.4.0 pkgdown_2.0.7 #> [113] assertthat_0.2.1 sparseMatrixStats_1.15.0 #> [115] bitops_1.0-7 decontam_1.23.0 #> [117] viridisLite_0.4.2 tidytree_0.4.6 #> [119] scales_1.3.0 purrr_1.0.2 #> [121] crayon_1.5.2 rlang_1.1.3"},{"path":"/articles/MGnifyR_long.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"MGnifyR, extended vignette","text":"MGnifyR package designed ease access EBI’s MGnify resource, allowing searching retrieval multiple datasets downstream analysis. MGnify pipelines undoubtedly useful, currently implemented produce results strictly per-sample basis. whole study results available, comparisons across studies difficult. MGnifyR package designed facilitate cross-study analyses handling per-sample data retrieval merging details internally, leaving user free perform analysis see fit. latest version MGnifyR seamlessly integrates miaverse framework providing access tools microbiome -stream analytics. integration enables users leverage optimized standardized methods analyzing microbiome. Additionally, users can benefit comprehensive tutorial book offers valuable guidance support.","code":""},{"path":"/articles/MGnifyR_long.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"MGnifyR, extended vignette","text":"MGnifyR currently hosted GitHub, can installed using via devtools. MGnifyR built using following snippet.","code":"BiocManager::install(\"MGnifyR\")"},{"path":"/articles/MGnifyR_long.html","id":"load-mgnifyr-package","dir":"Articles","previous_headings":"","what":"Load MGnifyR package","title":"MGnifyR, extended vignette","text":"installed, MGnifyR made available usual way.","code":"library(MGnifyR)"},{"path":"/articles/MGnifyR_long.html","id":"create-a-client","dir":"Articles","previous_headings":"","what":"Create a client","title":"MGnifyR, extended vignette","text":"functions MGnifyR make use MgnifyClient object keep track JSONAPI url, disk cache location user access tokens. Thus first thing starting analysis instantiate object. following snippet creates . ’s recommended local caching enabled useCache = TRUE. Queries MGnify API can quite slow, particularly retrieving multipage results many analyses (many Interpro results). Using local disk cache can significantly speed subsequent work, bypassing need re-query API. Use cache entirely transparent, caching occurs raw data level. cache can persist across MGnifyR sessions, can even used multiple sessions simultaneously - provided different sets accessions queried . Optionally, username password may specified client creation, causing MGnifyR attempt retrieval authentication token API. gives access non-public results, currently author imposed embargo period.","code":"mg <- MgnifyClient() mg mg <- MgnifyClient( username = \"Webin-username\", password = \"your-password\", useCache = TRUE)"},{"path":[]},{"path":"/articles/MGnifyR_long.html","id":"search-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Search data","title":"MGnifyR, extended vignette","text":"MGnifyR gives users access complete range search functionality implemented MGnify JSON API. single function doQuery() used perform searching, allowing Studies, Samples, Runs Accession interrogated common interface. MGnifyR functions first argument client must valid MgnifyClient instance. remaining required parameter qtype, specifying type data queried, may one studies, samples, runs, analyses assemblies. general parameter include max.hits. Unlike MGnifyR high level functions, caching turned default doQuery(). New data analyses added MGnify time, enabling caching default may lead --date search results long-lived sessions. However, ’s easy switch back , may useful many cases. Also, given huge ever increasing number datasets available MGnify, limit number results returned may set using max.hits. default set 200, exploratory queries sufficient. may increased decreased directly specifying max.hits, disabled completely (limit) setting max.hits=NULL. cases want specific search, also use either accession parameter, many filter options available API, discussed . Specifying accession id, case samples, runs assemblies may vector ids, returns data.frame metadata one row per matching accession. accession NULL (default) remaining parameters define filters applied API search result. Details parameters given help(doQuery). way example though, supposing interested amplicon Illumina samples arctic, might try following query: Specifying accession parameter restrict results just matching particular entry, study, sample run. example, retrieve information study “MGYS00002891”:","code":"northpolar <- doQuery( mg, \"samples\", latitude_gte=60.0, experiment_type=\"amplicon\", biome_name=\"Soil\", instrument_platform = \"Illumina\", max.hits = 10) head(northpolar) study_samples <- doQuery(mg, \"studies\", accession=\"MGYS00002891\") head(study_samples)"},{"path":"/articles/MGnifyR_long.html","id":"find-relevent-analyses-accessions","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Find relevent analyses accessions","title":"MGnifyR, extended vignette","text":"obtained particular set search hits, ’s now time retrieve associated results. General automated analysis complicated MGnify database design, wherein example samples may shared multiple studies, studies analysed multiple times using different versions pipeline. Navigating “many--one” relationships can tricky, MGnifyR resorts using analyses accessions ’s canonical identifier. analysis corresponds single run particular pipeline single sample single study. downside approach queries returning studies, samples (anything analyses) accessions need converting corresponding analyses. MGnifyR therefore provides helper function handle conversion - searchAnalysis(). Following previous search, list study accessions, convert corresponding analyses use: useful side effect call attribute metadata sample now retrieved stored local cache. Thus subsequent API calls samples (occur multiple times later steps) significantly faster. ’s important aware results searchAnalysis() command necessarily one--one match input accessions. MGnify analysis runs sometimes performed multiple times, perhaps using different versions pipeline. Thus filtering result list may required, easily performed illustrated next section.","code":"analyses_accessions <- searchAnalysis( mg, type=\"studies\", accession = study_samples$accession) head(analyses_accessions)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-metadata","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch metadata","title":"MGnifyR, extended vignette","text":"point long list analysis instances (potential duplicates) corresponding samples previously found. use getMetadata function download combine associated sample, run study metadata, filter required include rows want. resulting data.frame columns names prefixed source type. example, “sample_xxx” columns correspond metadata gleaned querying accession’s sample entry. MGnify allows quite flexible specification arbitray metadata submission time, many cases leading quite sparse data.frame results accession queries sourced one study. instance, one sample contains entry “sample_soil_PH”, entries rows filled NA. MGnifyR automatically clean missing values - instead opting allow user choose correct action. particular study ’re looking marine biome, suppose interested samples analyses sampling depth known. following snippet filters full data.frame selecting entries contain valid sample_depth. ’s worth noting .numeric call ensure column converted numeric type checked. sample data MGnifyR initially retrieved type character, ’s user make sure ostensibly numeric entries converted properly.","code":"analyses_metadata <- getMetadata(mg, analyses_accessions) head(analyses_metadata) known_depths <- analyses_metadata[ !is.na(as.numeric(analyses_metadata$sample_depth)), ] # How many are left? dim(known_depths)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-microbiome-data","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch microbiome data","title":"MGnifyR, extended vignette","text":"selected analyses wish examine , getResult() used download associated OTU tables taxonomy, join results single TreeSummarizedExperiment (TreeSE) object. TreeSE becoming defacto standard taxonomic abundance munging R. TreeSE objects integrate abundance, taxonomic, phylogenetic, sample sequence data single object, powerful facilities filtering, processing plotting results. Compared phyloseq object, TreeSE scalable capable efficient data analysis. miaverse framework developed around TreeSE data container. provides tools analysis visualization. Moreover, includes comprehensive tutorial book called OMA.","code":""},{"path":"/articles/MGnifyR_long.html","id":"amplicon-sequencing","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Amplicon sequencing","title":"MGnifyR, extended vignette","text":"dataset includes amplicon sequencing data, .e., dataset include function predictions, getResult() method returns dataset TreeSE default. See output types function documentation. TreeSE object uniquely positioned support SummarizedExperiment-based microbiome data manipulation visualization. Moreover, enables access miaverse tools. example, can estimate diversity samples. needed, TreeSE can converted phyloseq.","code":"tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE) tse tse <- estimateDiversity(tse, index = \"shannon\") library(scater) plotColData(tse, \"shannon\", x = \"sample_geo.loc.name\") library(miaViz) plotAbundance(tse[!is.na( rowData(tse)[[\"Kingdom\"]] ), ], rank = \"Kingdom\") pseq <- makePhyloseqFromTreeSE(tse) pseq"},{"path":"/articles/MGnifyR_long.html","id":"metagenomics","dir":"Articles","previous_headings":"Functions for fetching the data > Fetch microbiome data","what":"Metagenomics","title":"MGnifyR, extended vignette","text":"Although previous queries based results doQuery(), now concentrate combining comparing results specific studies. Since newly performed analyses retrieved first doQuery() call, ’s likely time vignette read, query results different. principally due rapid increase MGnify submissions, leading potential lack consistency even closely spaced queries. mentioned previously, may best use useCache=FALSE MgnifyCLient object doQuery() calls, ensure queries actually returning latest data. remainder vignette however, ’ll comparing 3 ostensibly different studies. study saltmarsh soils York University, human faecal samples survey healthy Sardinians, set samples hydrothermal vents Mid-Cayman rise Carribbean Sea. simplify things, first 20 samples study used. Furthermore, intention demonstrate functionality MGnifyR package, rather produce scientifically rigorous results. first step new accession list , previously, retrieve associated metadata using getMetadata(), seen doQuery() results, returned data.frame contains large number columns. autogenerated flexible, column names can little difficult predict, examining colnames(full_metadata) make things clearer. full_metadata get idea type data ’re dealing , can extract useul information sequencing platform, source biome, etc. next code snippet tallies columns give idea ’s available. boxplot also indicates within study read counts similar, probably need use sort normalization procedure comparing across samples. might also want drop particularly low read coverage samples analysis. , can fetch data calling getResult(). bulk.dl=TRUE potential significantly speed data retrieval. MGnify makes functional results available two separate ways, either per-analysis basis web api, whole study level large files, tab separated (TSV), columns representing results analysis. bulk.dl FALSE, MGnifyR queries web api get results (given functional analyses results may consist thousands entries) may take significant time. Setting bulk.dl TRUE causes MGnifyR determine source study associated particular analysis instead download parse corresponding results file. Since result file contains entries analyses associated study, taking advantage MGnifyR’s local caching single download provides results many future analyses. cases affords several orders magnitude speedup api query case. Unfortunately, column entries per-study results files always directly correspond particular analysis run, causing retrieval fail. principal cause believed running multiple analyses jobs sample. Thus reliability, bulk.dl FALSE default. general recommendation though, try setting TRUE first time getResult() used set accessions. fails, setting bulk.dl FALSE enable robust approach allowing analysis continue. might take though. Hopefully future sample/analysis correspondence mismatches fixed default bulk.dl switch TRUE. metagenomic samples, result MultiAssayExperiment (MAE) links multiple TreeSE objects one dataset. TreeSE objects include taxonomic profiling data along functional data unique objects. objects linked sample names. can get access individual object experiment specifying index name. can perform principal component analysis microbial profiling data utilizing miaverse tools.","code":"soil <- searchAnalysis(mg, \"studies\", \"MGYS00001447\") human <- searchAnalysis(mg, \"studies\", \"MGYS00001442\") marine <- searchAnalysis(mg, \"studies\", \"MGYS00001282\") # Combine analyses all_accessions <- c(soil, human, marine) head(all_accessions) full_metadata <- getMetadata(mg, all_accessions) colnames(full_metadata) head(full_metadata) # Load ggplot2 library(ggplot2) #Distribution of sample source material: table(full_metadata$`sample_environment-material`) #What sequencing machine(s) were used? table(full_metadata$`sample_instrument model`) # Boxplot of raw read counts: ggplot( full_metadata, aes(x=study_accession, y=log( as.numeric(`analysis_Submitted nucleotide sequences`)))) + geom_boxplot(aes(group=study_accession)) + theme_bw() + ylab(\"log(submitted reads)\") mae <- getResult(mg, all_accessions, bulk.dl = TRUE) mae mae[[2]] # Apply relative transformation mae[[1]] <- transformAssay(mae[[1]], method = \"relabundance\") # Perform PCoA mae[[1]] <- runMDS( mae[[1]], assay.type = \"relabundance\", FUN = vegan::vegdist, method = \"bray\") # Plot plotReducedDim(mae[[1]], \"MDS\", colour_by = \"sample_environment.feature\")"},{"path":"/articles/MGnifyR_long.html","id":"fetch-raw-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch raw files","title":"MGnifyR, extended vignette","text":"getResult() can utilized retrieve microbial profiling data, getData() can used flexibly retrieve kind data database. returns data simple data.frame list format.","code":"kegg <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\") head(kegg)"},{"path":"/articles/MGnifyR_long.html","id":"fetch-sequence-files","dir":"Articles","previous_headings":"Functions for fetching the data","what":"Fetch sequence files","title":"MGnifyR, extended vignette","text":"Finally, can use searchFile() getFile() retrieve MGnify pipeline outputs merged sequence reads, assembled contigs, details functional analyses. searchFile() simple wrapper function , supplied list accessions, finds urls files ’re . cases ’ll want filter returned list files interest, easily done resulting data.frame object. addition actual download location (download_url column), extra columns include file type, contents compression. ’s recommended colnames data.frame examined get grasp available metadata. demonstrate process, code retrieves data.frame containing available downloads accession ’ve examining previously. filters retain files corresponding retain annotated amino acid sequence files. list types available files, guide filtering, something like following might useful. Unlike MGnifyR functions, searchFile() limited analyses, specifying accession_type results types may found. instance, general genome functionality yet integrated MGnifyR, can retrieve associated files particular genome accession following: found set target urls, final step use getFile() actually retrieve file. Unlike functions, works single url location , entry target_urls must downloaded individually - easily done either looping applying list. files intended used external programs, might easiest provide file parameter function call, specifies local filename writing file. default MGnifyR use local cache, can make getting file afterwards awkward. Regardless, default behaviour getFile() retrieve file specified parameter url, save disk, return filepath saved . second download option available, allows built-parsing file. know ahead time processing performed, may possible integrate function, pass function getFile() read.func argument. function question take single argument (complete path name locally downloaded file) result call returned place usual output file name. Alternatively files first downloaded standard way, processed using function loop. Therefore many cases read.func parameter redundant. However, many outputs MGnify can quite large, meaning local storage many files may become issue. providing read_func parameter (necessarily setting MgnifyClient object: useCache=FALSE) analysis large number datasets may possible minimal storage requirements. illustrate, suppose interested retrieving detected sequences matching particular PFAM motif set analyses. simple function uses Biostrings package read amino acid fasta file, searches matching PFAM tag sequence name, tallies unique sequences single data.frame row. case PFAM motif identifies sequences coding amoC gene, found ammonia methane oxidizing organisms, filtering method used. defined function, just remains include call getFile().","code":"# Find list of available downloads dl_urls <- searchFile( mg, full_metadata$analysis_accession, type = \"analyses\") # Filter table target_urls <- dl_urls[ dl_urls$attributes.description.label == \"Predicted CDS with annotation\", ] head(target_urls) table(dl_urls$attributes.description.label) genome_urls <- searchFile(mg, \"MGYG000433953\", type = \"genomes\") genome_urls[ , c(\"id\", \"attributes.file.format.name\", \"download_url\")] # Just select a single file from the target_urls list for demonstration. # Default behavior - use local cache. cached_location1 = getFile(mg, target_urls$download_url[[1]]) # Specifying a file cached_location2 <- getFile( mg, target_urls$download_url[[1]]) cached_location <- c(cached_location1, cached_location2) # Where are the files? cached_location library(Biostrings) # Simple function to a count of unique sequences matching PFAM amoC/mmoC motif getAmoCseqs <- function(fname){ sequences <- readAAStringSet(fname) tgtvec <- grepl(\"PF04896\", names(sequences)) as.data.frame(as.list(table(as.character(sequences[tgtvec])))) } # Just download a single accession for demonstration, specifying a read_function amoC_seq_counts <- getFile( mg, target_urls$download_url[[1]], read_func = getAmoCseqs) amoC_seq_counts sessionInfo()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Tuomas Borman. Author, maintainer. Ben Allen. Author. Leo Lahti. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Borman T, Allen B, Lahti L (2024). MGnifyR: R interface EBI MGnify metagenomics resource. R package version 0.99.27, https://github.com/EBI-Metagenomics/MGnifyR.","code":"@Manual{, title = {MGnifyR: R interface to EBI MGnify metagenomics resource}, author = {Tuomas Borman and Ben Allen and Leo Lahti}, year = {2024}, note = {R package version 0.99.27}, url = {https://github.com/EBI-Metagenomics/MGnifyR}, }"},{"path":"/index.html","id":"mgnifyr-","dir":"","previous_headings":"","what":"R interface to EBI MGnify metagenomics resource","title":"R interface to EBI MGnify metagenomics resource","text":"R package searching retrieving data EBI Metagenomics resource. cases, MGnifyR interacts directly JSONAPI, rather relying downloading analyses outputs TSV files. Thus general - allowing example intuitive combining multiple studies analyses single workflow, cases slower afformentioned direct access. Local caching results disk implemented help counter overheads, data downloads can slow - particularly functional annotation retrieval. MGnifyR package part miaverse microbiome analysis ecosystem enabling usage mia miaverse packages. research received funding Horizon 2020 Programme European Union within FindingPheno project grant agreement 952914. FindingPheno, EU-funded project, dedicated developing computational tools methodologies integration analysis multi-omics data. primary objective deepen understanding interactions hosts microbiomes. can find information FindingPheno website.","code":""},{"path":"/index.html","id":"requirements","dir":"","previous_headings":"","what":"Requirements","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"devtools # for installation mia plyr dplyr reshape2 httr urltools"},{"path":[]},{"path":"/index.html","id":"bioc-release","dir":"","previous_headings":"Installation","what":"Bioc-release","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"bioc-devel","dir":"","previous_headings":"Installation","what":"Bioc-devel","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"if (!requireNamespace(\"BiocManager\", quietly = TRUE)) install.packages(\"BiocManager\") # The following initializes usage of Bioc devel BiocManager::install(version='devel') BiocManager::install(\"MGnifyR\")"},{"path":"/index.html","id":"github","dir":"","previous_headings":"Installation","what":"GitHub","title":"R interface to EBI MGnify metagenomics resource","text":"","code":"remotes::install_github(\"EBI-Metagenomics/MGnifyR\")"},{"path":"/index.html","id":"basic-usage","dir":"","previous_headings":"","what":"Basic usage","title":"R interface to EBI MGnify metagenomics resource","text":"detailed instructions read associated function help vignette (vignette(\"MGNifyR\"))","code":"library(MGnifyR) # Set up the MGnify client instance mgclnt <- MgnifyClient(usecache = TRUE, cache_dir = '/tmp/MGnify_cache') # Retrieve the list of analyses associated with a study accession_list <- searchAnalysis(mgclnt, \"studies\", \"MGYS00005058\", usecache = TRUE) # Download all associated study/sample and analysis metadata meta_dataframe <- getMetadata(mgclnt, accession_list, usecache = TRUE) # Convert analyses outputs to a single `MultiAssayExperiment` object mae <- getResult(mgclnt, meta_dataframe$analysis_accession, usecache = TRUE) mae"},{"path":"/reference/MGnifyR-package.html","id":null,"dir":"Reference","previous_headings":"","what":"MGnifyR Package. — MGnifyR-package","title":"MGnifyR Package. — MGnifyR-package","text":"MGnifyR implements interface EBI MGnify database. See vignette general introduction package. MGnify general MGnify information, API documentation details JSONAPI implementation.","code":""},{"path":[]},{"path":"/reference/MGnifyR-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"MGnifyR Package. — MGnifyR-package","text":"Maintainer: Tuomas Borman tuomas.v.borman@utu.fi (ORCID) Authors: Ben Allen ben.allen@ncl.ac.uk Leo Lahti leo.lahti@iki.fi (ORCID)","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":null,"dir":"Reference","previous_headings":"","what":"MgnifyClient accessors and mutators — databaseUrl","title":"MgnifyClient accessors and mutators — databaseUrl","text":"MgnifyClient accessors mutators","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"databaseUrl(x) authTok(x) useCache(x) cacheDir(x) showWarnings(x) clearCache(x) verbose(x) databaseUrl(x) <- value authTok(x) <- value useCache(x) <- value cacheDir(x) <- value showWarnings(x) <- value clearCache(x) <- value verbose(x) <- value # S4 method for MgnifyClient databaseUrl(x) # S4 method for MgnifyClient authTok(x) # S4 method for MgnifyClient useCache(x) # S4 method for MgnifyClient cacheDir(x) # S4 method for MgnifyClient showWarnings(x) # S4 method for MgnifyClient clearCache(x) # S4 method for MgnifyClient verbose(x) # S4 method for MgnifyClient databaseUrl(x) <- value # S4 method for MgnifyClient authTok(x) <- value # S4 method for MgnifyClient useCache(x) <- value # S4 method for MgnifyClient cacheDir(x) <- value # S4 method for MgnifyClient showWarnings(x) <- value # S4 method for MgnifyClient clearCache(x) <- value # S4 method for MgnifyClient verbose(x) <- value"},{"path":"/reference/MgnifyClient-accessors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"MgnifyClient accessors and mutators — databaseUrl","text":"x MgnifyClient object. value value added certain slot.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MgnifyClient accessors and mutators — databaseUrl","text":"value MgnifyClient object nothing.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"MgnifyClient accessors and mutators — databaseUrl","text":"functions fetching mutating slots MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient-accessors.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"MgnifyClient accessors and mutators — databaseUrl","text":"","code":"mg <- MgnifyClient() databaseUrl(mg) #> [1] \"https://www.ebi.ac.uk/metagenomics/api/v1\" showWarnings(mg) <- FALSE"},{"path":"/reference/MgnifyClient.html","id":null,"dir":"Reference","previous_headings":"","what":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"Constructor creating MgnifyClient object allow access MGnify database. MgnifyClient object","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"","code":"MgnifyClient( username = NULL, password = NULL, useCache = FALSE, cacheDir = tempdir(), showWarnings = FALSE, verbose = TRUE, clearCache = FALSE, ... )"},{"path":"/reference/MgnifyClient.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"username single character value specifying optional username authentication. (default: username = NULL) password single character value specifying optional password authentication. (default: password = NULL) useCache single boolean value specifying whether enable -disk caching results session. use cases TRUE. (default: useCache = FALSE) cacheDir single character value specifying folder contain local cache. Note cached files persistent, cache directory may reused sessions, taking advantage previously downloaded results. directory created exist already. (default: cacheDir = tempdir()) showWarnings single boolean value specifying whether print warnings invocation MGnifyR functions. (default: showWarnings = FALSE) verbose single boolean value specifying whether print extra output invocation MGnifyR functions. (default: verbose = FALSE) clearCache single boolean value specifying whether clear cache. (default: clearCache = FALSE) ... optional arguments: url single character value specifying url address database. (default: url = \"https://www.ebi.ac.uk/metagenomics/api/v1\")","code":""},{"path":"/reference/MgnifyClient.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"MgnifyClient object.","code":""},{"path":"/reference/MgnifyClient.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"functions MGnifyR package take MgnifyClient object first argument. essential querying raw MGnify API (exposed relative standard JSONAPI), object allows simple handling user authentication access private data, local -disk caching results. object required functions MGnifyR package.","code":""},{"path":"/reference/MgnifyClient.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"databaseUrl single character value specifying URL address database. authTok single character value specifying authentication token. useCache single boolean value specifying whether use cache. cacheDir single character value specifying cache directory. showWarnings single boolean value specifying whether show warnings. clearCache single boolean value specifying whether clear cache. verbose single boolean value specifying whether show messages.","code":""},{"path":"/reference/MgnifyClient.html","id":"constructor","dir":"Reference","previous_headings":"","what":"Constructor","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"See MgnifyClient constructor.","code":""},{"path":"/reference/MgnifyClient.html","id":"accessor","dir":"Reference","previous_headings":"","what":"Accessor","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"See MgnifyClient-accessors accessor functions.","code":""},{"path":"/reference/MgnifyClient.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constructor for creating a MgnifyClient object to allow the access to\nMGnify database. — MgnifyClient","text":"","code":"my_client <- MgnifyClient( useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) if (FALSE) { # Use username and password to get access to non-public data my_client <- MgnifyClient( username = \"Webin-1122334\", password = \"SecretPassword\", useCache = TRUE, cacheDir = \"/scratch/MGnify_cache_location\" ) }"},{"path":"/reference/deprecate.html","id":null,"dir":"Reference","previous_headings":"","what":"These functions will be deprecated. Please use other functions instead. — deprecate","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"functions deprecated. Please use functions instead.","code":""},{"path":"/reference/deprecate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"","code":"mgnify_client( username = NULL, password = NULL, usecache = FALSE, cache_dir = NULL, warnings = FALSE, use_memcache = FALSE, ... ) mgnify_query( client, qtype = \"samples\", accession = NULL, asDataFrame = TRUE, maxhits = 200, usecache = FALSE, ... ) mgnify_analyses_from_samples(client, accession, usecache = TRUE, ...) mgnify_analyses_from_studies(client, accession, usecache = TRUE, ...) mgnify_get_download_urls( client, accessions, accession_type, usecache = TRUE, ... ) mgnify_download( client, url, file = NULL, read_func = NULL, usecache = TRUE, Debug = FALSE, ... ) mgnify_get_analyses_results( client = NULL, accessions, retrievelist = c(), compact_results = TRUE, usecache = TRUE, bulk_dl = FALSE, ... ) mgnify_get_analyses_phyloseq( client = NULL, accessions, usecache = TRUE, returnLists = FALSE, tax_SU = \"SSU\", get_tree = FALSE, ... ) mgnify_get_analyses_metadata(client, accessions, usecache = TRUE, ...) mgnify_retrieve_json( client, path = \"biomes\", complete_url = NULL, qopts = NULL, maxhits = 200, usecache = FALSE, Debug = FALSE )"},{"path":"/reference/deprecate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"These functions will be deprecated. Please use other functions instead. — deprecate","text":"username - password - usecache - cache_dir - warnings - use_memcache - ... - client - qtype - accession - asDataFrame - maxhits - accessions - accession_type - url - file - read_func - Debug - retrievelist - compact_results - bulk_dl - returnLists - tax_SU - get_tree - path - complete_url - qopts -","code":""},{"path":"/reference/doQuery.html","id":null,"dir":"Reference","previous_headings":"","what":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"Search MGnify database studies, samples, runs, analyses, biomes, assemblies, genomes.","code":""},{"path":"/reference/doQuery.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"","code":"doQuery(x, ...) # S4 method for MgnifyClient doQuery( x, type = \"studies\", accession = NULL, as.df = TRUE, max.hits = 200, ... )"},{"path":"/reference/doQuery.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"x MgnifyClient object. ... Remaining parameter key/value pairs may supplied filter returned values. Available options differ types. See discussion details. type single character value specifying type objects query. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. (default: type = \"studies\") accession single character value vector character values specifying MGnify accession identifiers (type type) NULL. NULL, results defined parameters retrieved. (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. cases, .df = TRUE make sense. (default: .df = TRUE) max.hits single integer value specifying maximum number results return FALSE. actual number results actually higher max.hits, clipping occurs pagination page boundaries. disable limit, set max.hits = NULL. (default: max.hits = 200)","code":""},{"path":"/reference/doQuery.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"nested list data.frame containing results query.","code":""},{"path":"/reference/doQuery.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"doQuery flexible query function, harnessing \"full\" power JSONAPI MGnify search filters. Search results may filtered metadata value, associated study/sample/analyse etc. See [Api browser](https://www.ebi.ac.uk/metagenomics/api/v1/) information MGnify database filters. can find help customizing queries [](https://emg-docs.readthedocs.io/en/latest/api.html#customising-queries). example following filters available: studies: accession, biome_name, lineage, centre_name, include samples: accession, experiment_type, biome_name, lineage, geo_loc_name, latitude_gte, latitude_lte, longitude_gte, longitude_lte, species, instrument_model, instrument_platform, metadata_key, metadata_value_gte, metadata_value_lte, metadata_value, environment_material, environment_feature, study_accession, include runs: accession, experiment_type, biome_name, lineage, species, instrument_platform, instrument_model, metdata_key, metadata_value_gte, metadata_value_lte, metadata_value, sample_accession, study_accession, include analyses: biome_name, lineage, experiment_type, species, sample_accession, pipeline_version biomes: depth_gte, depth_lte assemblies: depth_gte, depth_lte Unfortunately appears cases, filters work expected, important check results returned match expected. Even unfortunately error parameter specification, query run filter parameters present . Thus result appear superficially correct infact correspond something completely different. behaviour hopefully fixed future incarnations MGnifyR JSONAPI, now users double check returned values. currently possible combine queries type single call (example search samples latitude). However, possible run multiple queries combine results using set operations R get desired behaviour.","code":""},{"path":"/reference/doQuery.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Search MGnify database for studies, samples, runs, analyses, biomes,\nassemblies, and genomes. — doQuery","text":"","code":"mg <- MgnifyClient(useCache = FALSE) # Get a list of studies from the Agricultural Wastewater : agwaste_studies <- doQuery( mg, \"studies\", biome_name=\"Agricultural wastewater\" ) if (FALSE) { # Get all samples from a particular study samps <- doQuery(mg, \"samples\", accession=\"MGYS00004521\") # Search polar samples samps_np <- doQuery(mg, \"samples\", latitude_gte=66, max.hits=10) samps_sp <- doQuery(mg, \"samples\", latitude_lte=-66, max.hits=10) # Search studies that have studied drinking water tbl <- doQuery( mg, type = \"studies\", biome_name = \"root:Environmental:Aquatic:Freshwater:Drinking water\", max.hits = 10) }"},{"path":"/reference/getData.html","id":null,"dir":"Reference","previous_headings":"","what":"Versatile function to retrieve raw results — getData","title":"Versatile function to retrieve raw results — getData","text":"Versatile function retrieve raw results","code":""},{"path":"/reference/getData.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Versatile function to retrieve raw results — getData","text":"","code":"getData(x, ...) # S4 method for MgnifyClient getData(x, type, accession.type = NULL, accession = NULL, as.df = TRUE, ...)"},{"path":"/reference/getData.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Versatile function to retrieve raw results — getData","text":"x MgnifyClient object. ... optional arguments fed internal functions. type single character value specifying type data retrieve. Must one following options: studies, samples, runs, analyses, biomes, assemblies, super-studies, experiment-types, pipelines, pipeline-tools, publications, genomes, genome-search, genome-search/gather, genome-catalogues, genomeset, cogs, kegg-modules, kegg-classes, antismash-geneclusters, annotations/go-terms, annotations/interpro-identifiers, annotations/kegg-modules, annotations/pfam-entries, annotations/kegg-orthologs, annotations/genome-properties, annotations/antismash-gene-clusters, annotations/organisms, mydata. accession.type single character value specifying type accession IDs (accession). Must specified accession specified. (default: accession.type = NULL) accession single character value vector character values specifying accession IDs return results . (default: accession = NULL) .df single boolean value specifying whether return results data.frame leave nested list. (default: .df = TRUE)","code":""},{"path":"/reference/getData.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Versatile function to retrieve raw results — getData","text":"data.frame list","code":""},{"path":"/reference/getData.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Versatile function to retrieve raw results — getData","text":"function returns data MGnify database. Compared getResult, function allows flexible framework fetching data. However, drawbacks: counts data, getResult returns optimally structured data container easier downstream analysis. getData returns raw data database. However, want retrieve data pipelines publications, instance, getResult suitable , getData can utilized instead.","code":""},{"path":[]},{"path":"/reference/getData.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Versatile function to retrieve raw results — getData","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Find kegg modules for certain analysis df <- getData( mg, type = \"kegg-modules\", accession = \"MGYA00642773\", accession.type = \"analyses\")"},{"path":"/reference/getFile.html","id":null,"dir":"Reference","previous_headings":"","what":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"Download MGnify files, also including processed reads identified protein sequences Listing files available download","code":""},{"path":"/reference/getFile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"","code":"getFile(x, ...) searchFile(x, ...) # S4 method for MgnifyClient getFile(x, url, file = NULL, read.func = NULL, ...) # S4 method for MgnifyClient searchFile( x, accession, type = c(\"studies\", \"samples\", \"analyses\", \"assemblies\", \"genomes\", \"run\"), ... )"},{"path":"/reference/getFile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"x MgnifyClient object. ... Additional arguments; used currently. url single character value specifying url address file wish download. file single character value NULL specifying optional local filename use saving file. NULL (default), MGNify local cache settings used. file intended processed separate program, may sensible provide meaningful file, rather hunt cache folders. file NULL useCache(client) FALSE, read.func parameter must supplied file downloaded deleted. (default: file = NULL) read.func function specifying optional function process downloaded file return results, rather relying post processing. primary use-case parameter local disk space limited downloaded files can quickly processed discarded. function take single parameter, downloaded filename, may return valid R object. (default: read.func = NULL) accession single character value vector character values specifying accession IDs return results . type single character value specifying type objects query. Must one following options: analysis, samples, studies, assembly, genome run. (default: type = \"samples\")","code":""},{"path":"/reference/getFile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"Either local filename downloaded file, either location MGNifyR cache file. read.func used, result returned. data.frame containing discovered downloads. multiple accessions queried, accessions column may filter results - since rownames set (wouldn;'t make sense query return multiple items)","code":""},{"path":"/reference/getFile.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"getFile convenient wrapper round generic URL downloading functionality R, taking care things like local caching authentication. function wrapper function allowing easy enumeration downloads available given accession (list thereof). Returns single data.frame containing available downloads associated metadata, including url location description. can filtered extract urls interest, actually retrieving files using mgnify_download","code":""},{"path":"/reference/getFile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download any MGnify files, also including processed reads and\nidentified protein sequences — getFile","text":"","code":"# Make a client object mg <- MgnifyClient(useCache = FALSE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c(\"MGYA00563876\", \"MGYA00563877\") downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |=================================== | 50% | |======================================================================| 100% # Filter to find the urls of 16S encoding sequences url_list <- downloads[ downloads$attributes.description.label == \"Contigs encoding SSU rRNA\", \"download_url\"] # Example 1: # Download the first file supplied_filename <- getFile( mg, url_list[[1]], file=\"SSU_file.fasta.gz\") if (FALSE) { # Example 2: # Just use local caching cached_filename <- getFile(mg, url_list[[2]]) # Example 3: # Using read.func to open the reads with readDNAStringSet from # \\code{biostrings}. Without retaining on disk dna_seqs <- getFile( mg, url_list[[3]], read.func = readDNAStringSet) } # Make a client object mg <- MgnifyClient(useCache = TRUE) # Create a vector of accession ids - these happen to be \\code{analysis} # accessions accession_vect <- c( \"MGYA00563876\", \"MGYA00563877\", \"MGYA00563878\", \"MGYA00563879\", \"MGYA00563880\" ) downloads <- searchFile(mg, accession_vect, \"analyses\") #> Searching files... #> | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%"},{"path":"/reference/getMetadata.html","id":null,"dir":"Reference","previous_headings":"","what":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"Get Study, Sample Analysis metadata supplied analyses accessions","code":""},{"path":"/reference/getMetadata.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"","code":"getMetadata(x, ...) # S4 method for MgnifyClient getMetadata(x, accession, ...)"},{"path":"/reference/getMetadata.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"x MgnifyClient object. ... Optional arguments; currently used. accession single character value vector analysis accession IDs specifying accessions retrieve data .","code":""},{"path":"/reference/getMetadata.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"data.frame metadata analysis accession list.","code":""},{"path":"/reference/getMetadata.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"function retrieves associated study, sample analysis metadata attributes list analyses accessions.","code":""},{"path":"/reference/getMetadata.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get all Study, Sample and Analysis metadata for the supplied analyses\naccessions — getMetadata","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Download all associated study/sample and analysis metadata accession_list <- c(\"MGYA00377505\") meta_dataframe <- getMetadata(mg, accession_list) #> Fetching metadata... #> | | | 0% | |======================================================================| 100%"},{"path":"/reference/getResult.html","id":null,"dir":"Reference","previous_headings":"","what":"Get microbial and/or functional profiling data for a list of accessions — getResult","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Get microbial /functional profiling data list accessions","code":""},{"path":"/reference/getResult.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"getResult(x, ...) # S4 method for MgnifyClient getResult( x, accession, get.taxa = TRUE, get.func = TRUE, output = \"TreeSE\", ... )"},{"path":"/reference/getResult.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"x MgnifyClient object. ... optional arguments: taxa.su single character value specifying taxa subunit results selected? Currently, taxonomy assignments MGnify pipelines rely rRNA matches existing databases (GreenGenes SILVA), later pipelines checking SSU LSU portions rRNA sequence. taxa.su allows selection either Small subunit (SSU) Large subunit results final TreeSummarizedExperiment object. Older pipeline versions report results subunits, thus accessions value effect. get.tree single boolean value specifying whether include available phylogenetic trees TreeSummarizedExperiment object. (default: get.tree = TRUE) .df single boolean value enabled output = \"list\". argument specifies whether return functional data named list (one entry per element output list) data.frames, data.frame containing results requested accessions. FALSE, function returns list lists, element consisting results single accession. (default: .df = TRUE) bulk.dl single boolean value specifying MGnifyR attempt speed things downloading relevant studies TSV results extracting required columns, rather using JSONAPI interface. getting results multiple accessions share study, option may result significantly faster processing. However, appear (quite ) cases database TSV result columns match expected accession names. hopefully fixed future, now bulk.dl defaults TRUE. work, can orders magnitude efficient. (default: buld_dl = TRUE) accession single character value vector character values specifying accession IDs return results . get.taxa boolean value specifying whether retrieve metagenomic data. (default: get.taxa = TRUE) get.func boolean value single character value vector character values specifying functional analysis types retrieve. get.func = TRUE, available functional datatypes retrieved, FALSE, functional data retrieved. current list available types \"antismash-gene-clusters\", \"go-slim\", \"go-terms\", \"interpro-identifiers\", \"taxonomy\", \"taxonomy-itsonedb\", \"taxonomy-itsunite\", \"taxonomy-lsu\", \"taxonomy-ssu\". Note depending particular analysis type, pipeline version etc., functional results available. (default: get.func = TRUE) output single character value specifying format output. Must one following options: \"TreeSE\", \"list\", \"phyloseq\". (default: output = \"TreeSE\")","code":""},{"path":"/reference/getResult.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"metagenomic data retrieved, result returned TreeSummarizedExperiment object default. result can also returned phyloseq object list data.frames. Note phyloseq object can include one phylogenetic tree meaning taxa might lost data subsetted based tree. functional data retrieved addition metagenomic data, result returned MultiAssayExperiment object. options list containing phyloseq object data.frames just data.frames. Functional data can returned MultiAssayExperiment object list data.frames.","code":""},{"path":"/reference/getResult.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"Given set analysis accessions collection annotation types, function queries MGNify API returns results. function convenient retrieving highly structured (analysis vs counts) data certain instances. example, BIOM files downloaded automatically. want just retrieve raw data database, see getData.","code":""},{"path":[]},{"path":"/reference/getResult.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get microbial and/or functional profiling data for a list of accessions — getResult","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Get OTU tables as TreeSE accession_list <- c(\"MGYA00377505\") tse <- getResult(mg, accession_list, get.func=FALSE, get.taxa=TRUE) #> Fetching taxonomy data... #> | | | 0% | |======================================================================| 100% #> Merging with full join... #> 1/1 #> if (FALSE) { # Get functional data along with OTU tables as MAE mae <- getResult(mg, accession_list, get.func=TRUE, get.taxa=TRUE) # Get same data as list list <- getResult( mg, accession_list, get.func=TRUE, get.taxa=TRUE, output = \"list\", as.df = TRUE, use.cache = TRUE) }"},{"path":"/reference/searchAnalysis.html","id":null,"dir":"Reference","previous_headings":"","what":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Look analysis accession IDs one study sample accessions","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"searchAnalysis(x, ...) # S4 method for MgnifyClient searchAnalysis(x, type, accession, ...)"},{"path":"/reference/searchAnalysis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"x MgnifyClient object. ... Optional arguments; currently used. type single character value specifying type accession IDs specified accession. Must \"studies\" \"samples\". accession single character value vector character values specifying study sample accession IDs used retrieve analyses IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"vector analysis accession IDs.","code":""},{"path":"/reference/searchAnalysis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"Retrieve analysis accession IDs associated supplied study sample accession.","code":""},{"path":"/reference/searchAnalysis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Look up analysis accession IDs for one or more study or sample accessions — searchAnalysis","text":"","code":"# Create a client object mg <- MgnifyClient(useCache = FALSE) # Retrieve analysis ids from study MGYS00005058 result <- searchAnalysis(mg, \"studies\", c(\"MGYS00005058\")) #> Fetching analyses... #> | | | 0% | |======================================================================| 100% if (FALSE) { # Retrieve all analysis ids from samples result <- searchAnalysis( mg, \"samples\", c(\"SRS4392730\", \"SRS4392743\")) }"}]