diff --git a/articles/MGnifyR.html b/articles/MGnifyR.html index d55f0849..5f676583 100644 --- a/articles/MGnifyR.html +++ b/articles/MGnifyR.html @@ -80,7 +80,7 @@
vignettes/MGnifyR.Rmd
MGnifyR.Rmd
-mg <- MgnifyClient()
+mg <- MgnifyClient(useCache = TRUE)
mg
#> An object of class "MgnifyClient"
#> Slot "databaseUrl":
@@ -210,7 +210,7 @@ Create a client#> [1] NA
#>
#> Slot "useCache":
-#> [1] FALSE
+#> [1] TRUE
#>
#> Slot "cacheDir":
#> [1] "/__w/MGnifyR/MGnifyR/vignettes/.MGnifyR_cache"
@@ -237,103 +237,107 @@ Search datamg,
type = "samples",
biome_name = "root:Environmental:Aquatic:Freshwater:Drinking water",
- max.hits = 10)
-
-# For demonstrative purpose, take only few samples
-set.seed(595)
-samples <- samples[ sample(rownames(samples), 5), ]
-samples
-#> biosample accession
-#> ERS14399432 SAMEA112288606 ERS14399432
-#> ERS14399426 SAMEA112288600 ERS14399426
-#> ERS5222929 SAMEA7465225 ERS5222929
-#> ERS5222919 SAMEA7465215 ERS5222919
-#> ERS5222921 SAMEA7465217 ERS5222921
-#> sample-desc
-#> ERS14399432 biomass
-#> ERS14399426 biomass
-#> ERS5222929 Raw water source [Surface water], Treatment [Ferric sulfate coagulation, clarification, sand filtration, ozonisation, activated carbon filtration], Disinfection [UV-light, NH2Cl]
-#> ERS5222919 Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, addition of sulphuric acid, sand filtration], Disinfection [No disinfection]
-#> ERS5222921 Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, sand filtration], Disinfection [No disinfection]
-#> environment-biome environment-feature
-#> ERS14399432 <NA> <NA>
-#> ERS14399426 <NA> <NA>
-#> ERS5222929 Freshwater Drinking water distribution system
-#> ERS5222919 Freshwater Drinking water distribution system
-#> ERS5222921 Freshwater Drinking water distribution system
-#> environment-material sample-name sample-alias
-#> ERS14399432 <NA> GREEN64 GREEN64
-#> ERS14399426 <NA> GREEN58 GREEN58
-#> ERS5222929 water THS_D2_SW_CHM_R THS_D2_SW_CHM_R
-#> ERS5222919 water THS_A1_GW_ND_R THS_A1_GW_ND_R
-#> ERS5222921 water THS_B1_GW_ND_D THS_B1_GW_ND_D
-#> last-update
-#> ERS14399432 2023-04-23T00:43:15
-#> ERS14399426 2023-04-23T02:07:52
-#> ERS5222929 2020-11-10T18:16:23
-#> ERS5222919 2020-11-10T18:02:30
-#> ERS5222921 2021-06-02T14:21:12
-#> project name
-#> ERS14399432 ARGs study in bioelectrochemical remediation
-#> ERS14399426 ARGs study in bioelectrochemical remediation
-#> ERS5222929 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> ERS5222919 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> ERS5222921 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
+head(samples)
+#> biosample accession sample-desc
+#> ERS14399436 SAMEA112288610 ERS14399436 biofilm
+#> ERS14399429 SAMEA112288603 ERS14399429 biofilm
+#> ERS14399431 SAMEA112288605 ERS14399431 biomass
+#> ERS14399428 SAMEA112288602 ERS14399428 biofilm
+#> ERS14399416 SAMEA112288590 ERS14399416 biofilm
+#> ERS14399423 SAMEA112288597 ERS14399423 biofilm
+#> environment-biome
+#> ERS14399436 Laboratory environment (ENVO_01001405)
+#> ERS14399429 Laboratory environment (ENVO_01001405)
+#> ERS14399431 <NA>
+#> ERS14399428 <NA>
+#> ERS14399416 <NA>
+#> ERS14399423 <NA>
+#> environment-feature
+#> ERS14399436 Laboratory environment (ENVO_01001405)
+#> ERS14399429 Laboratory environment (ENVO_01001405)
+#> ERS14399431 <NA>
+#> ERS14399428 <NA>
+#> ERS14399416 <NA>
+#> ERS14399423 <NA>
+#> environment-material sample-name sample-alias
+#> ERS14399436 Biofilm material (ENVO:01000156) GREEN68 GREEN68
+#> ERS14399429 Biofilm material (ENVO:01000156) GREEN61 GREEN61
+#> ERS14399431 <NA> GREEN63 GREEN63
+#> ERS14399428 <NA> GREEN60 GREEN60
+#> ERS14399416 <NA> GREEN48 GREEN48
+#> ERS14399423 <NA> GREEN55 GREEN55
+#> last-update project name
+#> ERS14399436 2023-08-07T11:15:24 ARGs study in bioelectrochemical remediation
+#> ERS14399429 2023-08-07T10:52:31 ARGs study in bioelectrochemical remediation
+#> ERS14399431 2023-04-23T08:53:19 ARGs study in bioelectrochemical remediation
+#> ERS14399428 2023-04-23T07:57:35 ARGs study in bioelectrochemical remediation
+#> ERS14399416 2023-04-23T06:14:08 ARGs study in bioelectrochemical remediation
+#> ERS14399423 2023-04-23T05:00:17 ARGs study in bioelectrochemical remediation
#> geographic location (country and/or sea,region) collection date
-#> ERS14399432 Spain 2022-09-14
-#> ERS14399426 Spain 2022-09-14
-#> ERS5222929 Finland 2015-09-01
-#> ERS5222919 Finland 2015-08-03
-#> ERS5222921 Finland 2015-08-03
-#> environment (biome) environment (feature)
-#> ERS14399432 <NA> <NA>
-#> ERS14399426 <NA> <NA>
-#> ERS5222929 Freshwater Drinking water distribution system
-#> ERS5222919 Freshwater Drinking water distribution system
-#> ERS5222921 Freshwater Drinking water distribution system
-#> environment (material) ENA checklist acc_type
-#> ERS14399432 <NA> ERC000023 samples
-#> ERS14399426 <NA> ERC000023 samples
-#> ERS5222929 water ERC000025 samples
-#> ERS5222919 water ERC000025 samples
-#> ERS5222921 water ERC000025 samples
-#> biome
-#> ERS14399432 root:Environmental:Aquatic:Freshwater:Drinking water
-#> ERS14399426 root:Environmental:Aquatic:Freshwater:Drinking water
-#> ERS5222929 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> ERS5222919 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> ERS5222921 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> studies type collection-date latitude longitude
-#> ERS14399432 MGYS00006211 samples 2022-09-14 <NA> <NA>
-#> ERS14399426 MGYS00006211 samples 2022-09-14 <NA> <NA>
-#> ERS5222929 MGYS00005650 samples 2015-09-01 61.92 25.75
-#> ERS5222919 MGYS00005650 samples 2015-08-03 61.92 25.75
-#> ERS5222921 MGYS00005650 samples 2015-08-03 61.92 25.75
-#> investigation type geographic location (longitude)
-#> ERS14399432 <NA> <NA>
-#> ERS14399426 <NA> <NA>
-#> ERS5222929 metatranscriptome 25.75
-#> ERS5222919 metatranscriptome 25.75
-#> ERS5222921 metagenome 25.75
-#> environmental package sequencing method
-#> ERS14399432 <NA> <NA>
-#> ERS14399426 <NA> <NA>
-#> ERS5222929 miscellaneous natural or artificial environment Illumina
-#> ERS5222919 miscellaneous natural or artificial environment Illumina
-#> ERS5222921 miscellaneous natural or artificial environment Illumina
-#> geographic location (latitude) analysis-completed geo-loc-name
-#> ERS14399432 <NA> <NA> <NA>
-#> ERS14399426 <NA> <NA> <NA>
-#> ERS5222929 61.92 <NA> <NA>
-#> ERS5222919 61.92 <NA> <NA>
-#> ERS5222921 61.92 <NA> <NA>
-#> instrument model
-#> ERS14399432 <NA>
-#> ERS14399426 <NA>
-#> ERS5222929 <NA>
-#> ERS5222919 <NA>
-#> ERS5222921 <NA>
head(analyses_accessions)
-#> NULL
head(analyses_metadata)
-#> analysis_experiment-type analysis_pipeline-version
-#> MGYA00643475 assembly 5.0
-#> MGYA00643477 assembly 5.0
-#> MGYA00575721 metatranscriptomic 5.0
-#> MGYA00575717 metatranscriptomic 5.0
-#> MGYA00575713 metagenomic 5.0
-#> analysis_analysis-status analysis_accession analysis_is-private
-#> MGYA00643475 completed MGYA00643475 TRUE
-#> MGYA00643477 completed MGYA00643477 TRUE
-#> MGYA00575721 completed MGYA00575721 FALSE
-#> MGYA00575717 completed MGYA00575717 FALSE
-#> MGYA00575713 completed MGYA00575713 FALSE
+#> analysis_analysis-status analysis_pipeline-version
+#> MGYA00652201 completed 5.0
+#> MGYA00652185 completed 5.0
+#> MGYA00643487 completed 5.0
+#> MGYA00643486 completed 5.0
+#> MGYA00643485 completed 5.0
+#> MGYA00643484 completed 5.0
+#> analysis_experiment-type analysis_accession analysis_is-private
+#> MGYA00652201 assembly MGYA00652201 TRUE
+#> MGYA00652185 assembly MGYA00652185 TRUE
+#> MGYA00643487 assembly MGYA00643487 TRUE
+#> MGYA00643486 assembly MGYA00643486 TRUE
+#> MGYA00643485 assembly MGYA00643485 TRUE
+#> MGYA00643484 assembly MGYA00643484 TRUE
#> analysis_complete-time analysis_instrument-platform
-#> MGYA00643475 2023-04-23T00:43:18 ILLUMINA
-#> MGYA00643477 2023-04-23T02:07:56 ILLUMINA
-#> MGYA00575721 2020-11-10T18:16:27 ILLUMINA
-#> MGYA00575717 2020-11-10T18:02:34 ILLUMINA
-#> MGYA00575713 2020-11-10T17:47:00 ILLUMINA
+#> MGYA00652201 2023-08-07T11:15:25 ILLUMINA
+#> MGYA00652185 2023-08-07T10:52:31 ILLUMINA
+#> MGYA00643487 2023-04-23T08:53:23 ILLUMINA
+#> MGYA00643486 2023-04-23T07:57:38 ILLUMINA
+#> MGYA00643485 2023-04-23T06:14:11 ILLUMINA
+#> MGYA00643484 2023-04-23T05:00:21 ILLUMINA
#> analysis_instrument-model analysis_Submitted nucleotide sequences
-#> MGYA00643475 Illumina NovaSeq 6000 187536
-#> MGYA00643477 Illumina NovaSeq 6000 167208
-#> MGYA00575721 Illumina HiSeq 4000 1766611
-#> MGYA00575717 Illumina HiSeq 4000 5454850
-#> MGYA00575713 Illumina HiSeq 4000 17338992
+#> MGYA00652201 Illumina NovaSeq 6000 223726
+#> MGYA00652185 Illumina NovaSeq 6000 292409
+#> MGYA00643487 Illumina NovaSeq 6000 162292
+#> MGYA00643486 Illumina NovaSeq 6000 233327
+#> MGYA00643485 Illumina NovaSeq 6000 318625
+#> MGYA00643484 Illumina NovaSeq 6000 341952
#> analysis_Nucleotide sequences after format-specific filtering
-#> MGYA00643475 187536
-#> MGYA00643477 167208
-#> MGYA00575721 293112
-#> MGYA00575717 1159251
-#> MGYA00575713 2179562
+#> MGYA00652201 223726
+#> MGYA00652185 292409
+#> MGYA00643487 162292
+#> MGYA00643486 233327
+#> MGYA00643485 318625
+#> MGYA00643484 341952
#> analysis_Nucleotide sequences after length filtering
-#> MGYA00643475 187536
-#> MGYA00643477 167208
-#> MGYA00575721 293112
-#> MGYA00575717 1159251
-#> MGYA00575713 2179562
+#> MGYA00652201 223726
+#> MGYA00652185 292409
+#> MGYA00643487 162292
+#> MGYA00643486 233327
+#> MGYA00643485 318625
+#> MGYA00643484 341952
#> analysis_Nucleotide sequences after undetermined bases filtering
-#> MGYA00643475 187536
-#> MGYA00643477 167208
-#> MGYA00575721 293112
-#> MGYA00575717 1159251
-#> MGYA00575713 2179562
+#> MGYA00652201 223726
+#> MGYA00652185 292409
+#> MGYA00643487 162292
+#> MGYA00643486 233327
+#> MGYA00643485 318625
+#> MGYA00643484 341952
#> analysis_Reads with predicted CDS
-#> MGYA00643475 185060
-#> MGYA00643477 163559
-#> MGYA00575721 34145
-#> MGYA00575717 79238
-#> MGYA00575713 2132038
+#> MGYA00652201 223146
+#> MGYA00652185 291506
+#> MGYA00643487 159860
+#> MGYA00643486 232596
+#> MGYA00643485 317405
+#> MGYA00643484 341116
#> analysis_Reads with predicted RNA
-#> MGYA00643475 3748
-#> MGYA00643477 2841
-#> MGYA00575721 254446
-#> MGYA00575717 1065067
-#> MGYA00575713 6062
+#> MGYA00652201 3008
+#> MGYA00652185 4472
+#> MGYA00643487 3518
+#> MGYA00643486 3513
+#> MGYA00643485 5420
+#> MGYA00643484 4849
#> analysis_Reads with InterProScan match analysis_Predicted CDS
-#> MGYA00643475 140771 586640
-#> MGYA00643477 127017 464277
-#> MGYA00575721 7922 34824
-#> MGYA00575717 14140 80536
-#> MGYA00575713 850215 2213838
+#> MGYA00652201 182364 489141
+#> MGYA00652185 241005 674191
+#> MGYA00643487 124819 519411
+#> MGYA00643486 192842 542005
+#> MGYA00643485 259359 893435
+#> MGYA00643484 280422 826459
#> analysis_Predicted CDS with InterProScan match
-#> MGYA00643475 368387
-#> MGYA00643477 313613
-#> MGYA00575721 7947
-#> MGYA00575717 14203
-#> MGYA00575713 853169
+#> MGYA00652201 331022
+#> MGYA00652185 472354
+#> MGYA00643487 330317
+#> MGYA00643486 379304
+#> MGYA00643485 585352
+#> MGYA00643484 572175
#> analysis_Total InterProScan matches
-#> MGYA00643475 1261625
-#> MGYA00643477 1069719
-#> MGYA00575721 12971
-#> MGYA00575717 24971
-#> MGYA00575713 1513676
+#> MGYA00652201 1035846
+#> MGYA00652185 1520845
+#> MGYA00643487 1134950
+#> MGYA00643486 1221863
+#> MGYA00643485 1928199
+#> MGYA00643484 1847000
#> analysis_Predicted SSU sequences analysis_Predicted LSU sequences
-#> MGYA00643475 192 318
-#> MGYA00643477 171 278
-#> MGYA00575721 118755 143666
-#> MGYA00575717 573080 504459
-#> MGYA00575713 1593 2969
+#> MGYA00652201 141 222
+#> MGYA00652185 268 394
+#> MGYA00643487 217 330
+#> MGYA00643486 199 345
+#> MGYA00643485 273 431
+#> MGYA00643484 243 399
#> analysis_acc_type study_attributes.accession
-#> MGYA00643475 analysis-jobs MGYS00006211
-#> MGYA00643477 analysis-jobs MGYS00006211
-#> MGYA00575721 analysis-jobs MGYS00005650
-#> MGYA00575717 analysis-jobs MGYS00005650
-#> MGYA00575713 analysis-jobs MGYS00005650
+#> MGYA00652201 analysis-jobs MGYS00006211
+#> MGYA00652185 analysis-jobs MGYS00006211
+#> MGYA00643487 analysis-jobs MGYS00006211
+#> MGYA00643486 analysis-jobs MGYS00006211
+#> MGYA00643485 analysis-jobs MGYS00006211
+#> MGYA00643484 analysis-jobs MGYS00006211
#> study_attributes.bioproject study_attributes.samples-count
-#> MGYA00643475 PRJEB58755 23
-#> MGYA00643477 PRJEB58755 23
-#> MGYA00575721 PRJEB40814 15
-#> MGYA00575717 PRJEB40814 15
-#> MGYA00575713 PRJEB40814 15
+#> MGYA00652201 PRJEB58755 23
+#> MGYA00652185 PRJEB58755 23
+#> MGYA00643487 PRJEB58755 23
+#> MGYA00643486 PRJEB58755 23
+#> MGYA00643485 PRJEB58755 23
+#> MGYA00643484 PRJEB58755 23
#> study_attributes.is-private study_attributes.secondary-accession
-#> MGYA00643475 FALSE ERP143823
-#> MGYA00643477 FALSE ERP143823
-#> MGYA00575721 FALSE ERP124491
-#> MGYA00575717 FALSE ERP124491
-#> MGYA00575713 FALSE ERP124491
+#> MGYA00652201 FALSE ERP143823
+#> MGYA00652185 FALSE ERP143823
+#> MGYA00643487 FALSE ERP143823
+#> MGYA00643486 FALSE ERP143823
+#> MGYA00643485 FALSE ERP143823
+#> MGYA00643484 FALSE ERP143823
#> study_attributes.centre-name
-#> MGYA00643475 Leitat Technological Center
-#> MGYA00643477 Leitat Technological Center
-#> MGYA00575721 US EPA
-#> MGYA00575717 US EPA
-#> MGYA00575713 US EPA
-#> study_attributes.study-abstract
-#> MGYA00643475 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
-#> MGYA00643477 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
-#> MGYA00575721 The water microbiome in the drinking water distribution systems (DWDSs) of five waterworks in Finland with different raw water sources and treatment processes was explored. The sampled DWDSs were from two waterworks AB with non-disinfected, recharged groundwater as source water and from three waterworks utilizing chlorinated water (two DWDSs of surface waterworks CD and one of ground waterworks E). The water microbiome was characterized by Illumina high-throughput sequencing technology.
-#> MGYA00575717 The water microbiome in the drinking water distribution systems (DWDSs) of five waterworks in Finland with different raw water sources and treatment processes was explored. The sampled DWDSs were from two waterworks AB with non-disinfected, recharged groundwater as source water and from three waterworks utilizing chlorinated water (two DWDSs of surface waterworks CD and one of ground waterworks E). The water microbiome was characterized by Illumina high-throughput sequencing technology.
-#> MGYA00575713 The water microbiome in the drinking water distribution systems (DWDSs) of five waterworks in Finland with different raw water sources and treatment processes was explored. The sampled DWDSs were from two waterworks AB with non-disinfected, recharged groundwater as source water and from three waterworks utilizing chlorinated water (two DWDSs of surface waterworks CD and one of ground waterworks E). The water microbiome was characterized by Illumina high-throughput sequencing technology.
-#> study_attributes.study-name
-#> MGYA00643475 ARGs study in bioelectrochemical remediation
-#> MGYA00643477 ARGs study in bioelectrochemical remediation
-#> MGYA00575721 Metagenomic and metatranscriptomic analysis of the microbial community in drinking water distribution systems of ground and surface waterworks in Finland
-#> MGYA00575717 Metagenomic and metatranscriptomic analysis of the microbial community in drinking water distribution systems of ground and surface waterworks in Finland
-#> MGYA00575713 Metagenomic and metatranscriptomic analysis of the microbial community in drinking water distribution systems of ground and surface waterworks in Finland
+#> MGYA00652201 Leitat Technological Center
+#> MGYA00652185 Leitat Technological Center
+#> MGYA00643487 Leitat Technological Center
+#> MGYA00643486 Leitat Technological Center
+#> MGYA00643485 Leitat Technological Center
+#> MGYA00643484 Leitat Technological Center
+#> study_attributes.study-abstract
+#> MGYA00652201 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00652185 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643487 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643486 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643485 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643484 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> study_attributes.study-name
+#> MGYA00652201 ARGs study in bioelectrochemical remediation
+#> MGYA00652185 ARGs study in bioelectrochemical remediation
+#> MGYA00643487 ARGs study in bioelectrochemical remediation
+#> MGYA00643486 ARGs study in bioelectrochemical remediation
+#> MGYA00643485 ARGs study in bioelectrochemical remediation
+#> MGYA00643484 ARGs study in bioelectrochemical remediation
#> study_attributes.data-origination study_attributes.last-update
-#> MGYA00643475 SUBMITTED 2023-08-07T11:15:24
-#> MGYA00643477 SUBMITTED 2023-08-07T11:15:24
-#> MGYA00575721 SUBMITTED 2021-06-02T16:00:57
-#> MGYA00575717 SUBMITTED 2021-06-02T16:00:57
-#> MGYA00575713 SUBMITTED 2021-06-02T16:00:57
+#> MGYA00652201 SUBMITTED 2023-08-07T11:15:24
+#> MGYA00652185 SUBMITTED 2023-08-07T11:15:24
+#> MGYA00643487 SUBMITTED 2023-08-07T11:15:24
+#> MGYA00643486 SUBMITTED 2023-08-07T11:15:24
+#> MGYA00643485 SUBMITTED 2023-08-07T11:15:24
+#> MGYA00643484 SUBMITTED 2023-08-07T11:15:24
#> study_accession study_acc_type sample_biosample sample_accession
-#> MGYA00643475 MGYS00006211 studies SAMEA112288606 ERS14399432
-#> MGYA00643477 MGYS00006211 studies SAMEA112288600 ERS14399426
-#> MGYA00575721 MGYS00005650 studies SAMEA7465225 ERS5222929
-#> MGYA00575717 MGYS00005650 studies SAMEA7465215 ERS5222919
-#> MGYA00575713 MGYS00005650 studies SAMEA7465217 ERS5222921
-#> sample_collection-date
-#> MGYA00643475 2022-09-14
-#> MGYA00643477 2022-09-14
-#> MGYA00575721 2015-09-01
-#> MGYA00575717 2015-08-03
-#> MGYA00575713 2015-08-03
-#> sample_sample-desc
-#> MGYA00643475 biomass
-#> MGYA00643477 biomass
-#> MGYA00575721 Raw water source [Surface water], Treatment [Ferric sulfate coagulation, clarification, sand filtration, ozonisation, activated carbon filtration], Disinfection [UV-light, NH2Cl]
-#> MGYA00575717 Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, addition of sulphuric acid, sand filtration], Disinfection [No disinfection]
-#> MGYA00575713 Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, sand filtration], Disinfection [No disinfection]
-#> sample_sample-name sample_sample-alias sample_last-update
-#> MGYA00643475 GREEN64 GREEN64 2023-04-23T00:43:15
-#> MGYA00643477 GREEN58 GREEN58 2023-04-23T02:07:52
-#> MGYA00575721 THS_D2_SW_CHM_R THS_D2_SW_CHM_R 2020-11-10T18:16:23
-#> MGYA00575717 THS_A1_GW_ND_R THS_A1_GW_ND_R 2020-11-10T18:02:30
-#> MGYA00575713 THS_B1_GW_ND_D THS_B1_GW_ND_D 2021-06-02T14:21:12
-#> sample_project name
-#> MGYA00643475 ARGs study in bioelectrochemical remediation
-#> MGYA00643477 ARGs study in bioelectrochemical remediation
-#> MGYA00575721 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> MGYA00575717 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> MGYA00575713 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
+#> MGYA00652201 MGYS00006211 studies SAMEA112288610 ERS14399436
+#> MGYA00652185 MGYS00006211 studies SAMEA112288603 ERS14399429
+#> MGYA00643487 MGYS00006211 studies SAMEA112288605 ERS14399431
+#> MGYA00643486 MGYS00006211 studies SAMEA112288602 ERS14399428
+#> MGYA00643485 MGYS00006211 studies SAMEA112288590 ERS14399416
+#> MGYA00643484 MGYS00006211 studies SAMEA112288597 ERS14399423
+#> sample_sample-desc sample_environment-biome
+#> MGYA00652201 biofilm Laboratory environment (ENVO_01001405)
+#> MGYA00652185 biofilm Laboratory environment (ENVO_01001405)
+#> MGYA00643487 biomass <NA>
+#> MGYA00643486 biofilm <NA>
+#> MGYA00643485 biofilm <NA>
+#> MGYA00643484 biofilm <NA>
+#> sample_environment-feature
+#> MGYA00652201 Laboratory environment (ENVO_01001405)
+#> MGYA00652185 Laboratory environment (ENVO_01001405)
+#> MGYA00643487 <NA>
+#> MGYA00643486 <NA>
+#> MGYA00643485 <NA>
+#> MGYA00643484 <NA>
+#> sample_environment-material sample_sample-name
+#> MGYA00652201 Biofilm material (ENVO:01000156) GREEN68
+#> MGYA00652185 Biofilm material (ENVO:01000156) GREEN61
+#> MGYA00643487 <NA> GREEN63
+#> MGYA00643486 <NA> GREEN60
+#> MGYA00643485 <NA> GREEN48
+#> MGYA00643484 <NA> GREEN55
+#> sample_sample-alias sample_last-update
+#> MGYA00652201 GREEN68 2023-08-07T11:15:24
+#> MGYA00652185 GREEN61 2023-08-07T10:52:31
+#> MGYA00643487 GREEN63 2023-04-23T08:53:19
+#> MGYA00643486 GREEN60 2023-04-23T07:57:35
+#> MGYA00643485 GREEN48 2023-04-23T06:14:08
+#> MGYA00643484 GREEN55 2023-04-23T05:00:17
+#> sample_project name
+#> MGYA00652201 ARGs study in bioelectrochemical remediation
+#> MGYA00652185 ARGs study in bioelectrochemical remediation
+#> MGYA00643487 ARGs study in bioelectrochemical remediation
+#> MGYA00643486 ARGs study in bioelectrochemical remediation
+#> MGYA00643485 ARGs study in bioelectrochemical remediation
+#> MGYA00643484 ARGs study in bioelectrochemical remediation
#> sample_geographic location (country and/or sea,region)
-#> MGYA00643475 Spain
-#> MGYA00643477 Spain
-#> MGYA00575721 Finland
-#> MGYA00575717 Finland
-#> MGYA00575713 Finland
-#> sample_collection date sample_ENA checklist sample_acc_type
-#> MGYA00643475 2022-09-14 ERC000023 samples
-#> MGYA00643477 2022-09-14 ERC000023 samples
-#> MGYA00575721 2015-09-01 ERC000025 samples
-#> MGYA00575717 2015-08-03 ERC000025 samples
-#> MGYA00575713 2015-08-03 ERC000025 samples
-#> assembly_accession
-#> MGYA00643475 ERZ16299693
-#> MGYA00643477 ERZ16299677
-#> MGYA00575721 <NA>
-#> MGYA00575717 <NA>
-#> MGYA00575713 <NA>
-#> biome_string
-#> MGYA00643475 root:Environmental:Aquatic:Freshwater:Drinking water
-#> MGYA00643477 root:Environmental:Aquatic:Freshwater:Drinking water
-#> MGYA00575721 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> MGYA00575717 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> MGYA00575713 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> sample_latitude sample_longitude sample_environment-biome
-#> MGYA00643475 <NA> <NA> <NA>
-#> MGYA00643477 <NA> <NA> <NA>
-#> MGYA00575721 61.92 25.75 Freshwater
-#> MGYA00575717 61.92 25.75 Freshwater
-#> MGYA00575713 61.92 25.75 Freshwater
-#> sample_environment-feature sample_environment-material
-#> MGYA00643475 <NA> <NA>
-#> MGYA00643477 <NA> <NA>
-#> MGYA00575721 Drinking water distribution system water
-#> MGYA00575717 Drinking water distribution system water
-#> MGYA00575713 Drinking water distribution system water
+#> MGYA00652201 Spain
+#> MGYA00652185 Spain
+#> MGYA00643487 Spain
+#> MGYA00643486 Spain
+#> MGYA00643485 Spain
+#> MGYA00643484 Spain
+#> sample_collection date sample_environment (biome)
+#> MGYA00652201 2022-09-14 Laboratory environment (ENVO_01001405)
+#> MGYA00652185 2022-09-14 Laboratory environment (ENVO_01001405)
+#> MGYA00643487 2022-09-14 <NA>
+#> MGYA00643486 2022-09-14 <NA>
+#> MGYA00643485 2022-09-14 <NA>
+#> MGYA00643484 2022-09-14 <NA>
+#> sample_environment (feature)
+#> MGYA00652201 Laboratory environment (ENVO_01001405)
+#> MGYA00652185 Laboratory environment (ENVO_01001405)
+#> MGYA00643487 <NA>
+#> MGYA00643486 <NA>
+#> MGYA00643485 <NA>
+#> MGYA00643484 <NA>
+#> sample_environment (material) sample_ENA checklist
+#> MGYA00652201 Biofilm material (ENVO:01000156) ERC000023
+#> MGYA00652185 Biofilm material (ENVO:01000156) ERC000023
+#> MGYA00643487 <NA> ERC000023
+#> MGYA00643486 <NA> ERC000023
+#> MGYA00643485 <NA> ERC000023
+#> MGYA00643484 <NA> ERC000023
+#> sample_acc_type assembly_accession
+#> MGYA00652201 samples ERZ20300939
+#> MGYA00652185 samples ERZ20300942
+#> MGYA00643487 samples ERZ16299686
+#> MGYA00643486 samples ERZ16299690
+#> MGYA00643485 samples ERZ16299649
+#> MGYA00643484 samples ERZ16299683
+#> biome_string
+#> MGYA00652201 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00652185 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643487 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643486 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643485 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643484 root:Environmental:Aquatic:Freshwater:Drinking water
+#> sample_collection-date sample_latitude sample_longitude
+#> MGYA00652201 <NA> <NA> <NA>
+#> MGYA00652185 <NA> <NA> <NA>
+#> MGYA00643487 2022-09-14 <NA> <NA>
+#> MGYA00643486 2022-09-14 <NA> <NA>
+#> MGYA00643485 2022-09-14 <NA> <NA>
+#> MGYA00643484 2022-09-14 <NA> <NA>
#> sample_investigation type sample_geographic location (longitude)
-#> MGYA00643475 <NA> <NA>
-#> MGYA00643477 <NA> <NA>
-#> MGYA00575721 metatranscriptome 25.75
-#> MGYA00575717 metatranscriptome 25.75
-#> MGYA00575713 metagenome 25.75
-#> sample_environment (biome) sample_environment (feature)
-#> MGYA00643475 <NA> <NA>
-#> MGYA00643477 <NA> <NA>
-#> MGYA00575721 Freshwater Drinking water distribution system
-#> MGYA00575717 Freshwater Drinking water distribution system
-#> MGYA00575713 Freshwater Drinking water distribution system
-#> sample_environment (material)
-#> MGYA00643475 <NA>
-#> MGYA00643477 <NA>
-#> MGYA00575721 water
-#> MGYA00575717 water
-#> MGYA00575713 water
-#> sample_environmental package
-#> MGYA00643475 <NA>
-#> MGYA00643477 <NA>
-#> MGYA00575721 miscellaneous natural or artificial environment
-#> MGYA00575717 miscellaneous natural or artificial environment
-#> MGYA00575713 miscellaneous natural or artificial environment
-#> sample_sequencing method sample_geographic location (latitude)
-#> MGYA00643475 <NA> <NA>
-#> MGYA00643477 <NA> <NA>
-#> MGYA00575721 Illumina 61.92
-#> MGYA00575717 Illumina 61.92
-#> MGYA00575713 Illumina 61.92
-#> run_accession
-#> MGYA00643475 <NA>
-#> MGYA00643477 <NA>
-#> MGYA00575721 ERR4702562
-#> MGYA00575717 ERR4702552
-#> MGYA00575713 ERR4702554
mae[[1]]
#> class: TreeSummarizedExperiment
-#> dim: 2029 5
+#> dim: 3506 50
#> metadata(0):
#> assays(1): counts
-#> rownames(2029): 200154 3353 ... 5820 100053
-#> rowData names(8): Kingdom Phylum ... Species taxonomy
-#> colnames(5): MGYA00575713 MGYA00575717 MGYA00575721 MGYA00643475
-#> MGYA00643477
-#> colData names(61): analysis_experiment.type analysis_pipeline.version
-#> ... sample_geographic.location..latitude. run_accession
+#> rownames(3506): 82608 62797 ... 5820 6794
+#> rowData names(9): Kingdom Phylum ... taxonomy1 taxonomy
+#> colnames(50): MGYA00144458 MGYA00144419 ... MGYA00652185 MGYA00652201
+#> colData names(64): analysis_analysis.status analysis_pipeline.version
+#> ... sample_geo.loc.name sample_instrument.model
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
@@ -644,11 +692,8 @@ Fetch microbiome data#> Loading required package: scuttle
#> Loading required package: ggplot2
-plotColData(mae[[1]], "shannon", x = "sample_sample.desc")
-#> Warning: Groups with fewer than two data points have been dropped.
-#> Groups with fewer than two data points have been dropped.
-#> Groups with fewer than two data points have been dropped.
# Agglomerate data
altExps(mae[[1]]) <- splitByRanks(mae[[1]])
@@ -671,7 +716,7 @@ Fetch microbiome data FUN = vegan::vegdist, method = "bray")
# Plot
plotReducedDim(
- mae[[1]], "MDS", colour_by = "sample_sample.desc")
Finally, we can download the files with getFile()
.
# Just select a single file from the target_urls list for demonstration.
@@ -753,10 +820,10 @@ Fetch sequence files
# Where are the files?
cached_location
-#> [1] "/tmp/Rtmp5cE7Yz/filebb468bb04c3"
sessionInfo()
-#> R Under development (unstable) (2024-01-23 r85822)
+#> R Under development (unstable) (2024-01-31 r85845)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.3 LTS
#>
diff --git a/articles/MGnifyR_files/figure-html/calculate_diversity-1.png b/articles/MGnifyR_files/figure-html/calculate_diversity-1.png
index ac682bb5..68030359 100644
Binary files a/articles/MGnifyR_files/figure-html/calculate_diversity-1.png and b/articles/MGnifyR_files/figure-html/calculate_diversity-1.png differ
diff --git a/articles/MGnifyR_files/figure-html/pcoa-1.png b/articles/MGnifyR_files/figure-html/pcoa-1.png
index a9716776..b7f95115 100644
Binary files a/articles/MGnifyR_files/figure-html/pcoa-1.png and b/articles/MGnifyR_files/figure-html/pcoa-1.png differ
diff --git a/articles/MGnifyR_files/figure-html/plot_abundance-1.png b/articles/MGnifyR_files/figure-html/plot_abundance-1.png
index 96d3fed0..fb0951e1 100644
Binary files a/articles/MGnifyR_files/figure-html/plot_abundance-1.png and b/articles/MGnifyR_files/figure-html/plot_abundance-1.png differ
diff --git a/articles/MGnifyR_long.html b/articles/MGnifyR_long.html
index d98c8b4d..e26aab6c 100644
--- a/articles/MGnifyR_long.html
+++ b/articles/MGnifyR_long.html
@@ -80,7 +80,7 @@
MGnifyR: interface to MGnify database
- 2024-02-02
+ 2024-02-04
Source: vignettes/MGnifyR_long.Rmd
MGnifyR_long.Rmd
@@ -126,7 +126,82 @@ Load MGnifyR
packageOnce installed, MGnifyR
is made available in the usual
way.
+library(MGnifyR)
+#> Loading required package: mia
+#> Loading required package: SummarizedExperiment
+#> Loading required package: MatrixGenerics
+#> Loading required package: matrixStats
+#>
+#> Attaching package: 'MatrixGenerics'
+#> The following objects are masked from 'package:matrixStats':
+#>
+#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
+#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
+#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
+#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
+#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
+#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
+#> colWeightedMeans, colWeightedMedians, colWeightedSds,
+#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
+#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
+#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
+#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
+#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
+#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
+#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
+#> rowWeightedSds, rowWeightedVars
+#> Loading required package: GenomicRanges
+#> Loading required package: stats4
+#> Loading required package: BiocGenerics
+#>
+#> Attaching package: 'BiocGenerics'
+#> The following objects are masked from 'package:stats':
+#>
+#> IQR, mad, sd, var, xtabs
+#> The following objects are masked from 'package:base':
+#>
+#> anyDuplicated, aperm, append, as.data.frame, basename, cbind,
+#> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
+#> get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
+#> match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
+#> Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
+#> tapply, union, unique, unsplit, which.max, which.min
+#> Loading required package: S4Vectors
+#>
+#> Attaching package: 'S4Vectors'
+#> The following object is masked from 'package:utils':
+#>
+#> findMatches
+#> The following objects are masked from 'package:base':
+#>
+#> expand.grid, I, unname
+#> Loading required package: IRanges
+#> Loading required package: GenomeInfoDb
+#> Loading required package: Biobase
+#> Welcome to Bioconductor
+#>
+#> Vignettes contain introductory material; view with
+#> 'browseVignettes()'. To cite Bioconductor, see
+#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
+#>
+#> Attaching package: 'Biobase'
+#> The following object is masked from 'package:MatrixGenerics':
+#>
+#> rowMedians
+#> The following objects are masked from 'package:matrixStats':
+#>
+#> anyMissing, rowMedians
+#> Loading required package: SingleCellExperiment
+#> Loading required package: TreeSummarizedExperiment
+#> Loading required package: Biostrings
+#> Loading required package: XVector
+#>
+#> Attaching package: 'Biostrings'
+#> The following object is masked from 'package:base':
+#>
+#> strsplit
+#> Loading required package: MultiAssayExperiment
+#> Loading required package: biomformat
mg <- MgnifyClient()
-mg
It’s recommended that local caching is enabled with
useCache = TRUE
. Queries to the MGnify API can be quite
slow, particularly when retrieving multipage results for many analyses
@@ -204,14 +300,108 @@
-head(northpolar)
head(northpolar)
+#> latitude longitude biosample accession analysis-completed
+#> SRS518212 78.7857 -103.5513 SAMN02484608 SRS518212 2016-05-04
+#> SRS522877 78.7857 -103.5513 SAMN02484612 SRS522877 2016-05-04
+#> SRS522878 78.7849 -103.5551 SAMN02484613 SRS522878 2016-05-04
+#> SRS522883 78.7839 -103.5574 SAMN02484618 SRS522883 2016-05-04
+#> SRS522884 78.7834 -103.5482 SAMN02484619 SRS522884 2016-05-04
+#> SRS522886 78.7854 -103.5433 SAMN02484621 SRS522886 2016-05-04
+#> sample-desc environment-biome sample-name
+#> SRS518212 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B1
+#> SRS522877 Keywords: GSC:MIxS MIMS:5.0 tundra ER-I1
+#> SRS522878 Keywords: GSC:MIxS MIMS:5.0 tundra ER-I2
+#> SRS522883 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B7
+#> SRS522884 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B8
+#> SRS522886 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B10
+#> sample-alias last-update investigation type
+#> SRS518212 ER-B1 2024-01-18T21:26:45 metagenome
+#> SRS522877 ER-I1 2024-01-18T21:26:28 metagenome
+#> SRS522878 ER-I2 2024-01-18T21:26:12 metagenome
+#> SRS522883 ER-B7 2024-01-18T21:25:55 metagenome
+#> SRS522884 ER-B8 2024-01-18T21:25:39 metagenome
+#> SRS522886 ER-B10 2024-01-18T21:25:06 metagenome
+#> project name
+#> SRS518212 A community genomics investigation of fungal adaptation to cold
+#> SRS522877 A community genomics investigation of fungal adaptation to cold
+#> SRS522878 A community genomics investigation of fungal adaptation to cold
+#> SRS522883 A community genomics investigation of fungal adaptation to cold
+#> SRS522884 A community genomics investigation of fungal adaptation to cold
+#> SRS522886 A community genomics investigation of fungal adaptation to cold
+#> geographic location (longitude) geographic location (depth)
+#> SRS518212 -103.55135 0-0.1m
+#> SRS522877 -103.55135 0-0.1m
+#> SRS522878 -103.555133 0-0.1m
+#> SRS522883 -103.5574 0-0.1m
+#> SRS522884 -103.548183 0-0.1m
+#> SRS522886 -103.543267 0-0.1m
+#> geographic location (country and/or sea,region) collection date
+#> SRS518212 Canada: Isachsen, Ellef Ringnes Island 2005-08
+#> SRS522877 Canada: Isachsen, Ellef Ringnes Island 2005-08
+#> SRS522878 Canada: Isachsen, Ellef Ringnes Island 2005-08
+#> SRS522883 Canada: Isachsen, Ellef Ringnes Island 2005-08
+#> SRS522884 Canada: Isachsen, Ellef Ringnes Island 2005-08
+#> SRS522886 Canada: Isachsen, Ellef Ringnes Island 2005-08
+#> environment (biome) environment (feature) environment (material)
+#> SRS518212 tundra frost boil soil
+#> SRS522877 tundra interboil soil
+#> SRS522878 tundra interboil soil
+#> SRS522883 tundra frost boil soil
+#> SRS522884 tundra frost boil soil
+#> SRS522886 tundra frost boil soil
+#> environmental package depth elevation
+#> SRS518212 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 41
+#> SRS522877 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 41
+#> SRS522878 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 40
+#> SRS522883 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 32
+#> SRS522884 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 30
+#> SRS522886 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 40
+#> miscellaneous parameter geographic location (latitude)
+#> SRS518212 Boil 1 78.78565
+#> SRS522877 Interboil 1 78.78565
+#> SRS522878 Interboil 2 78.784917
+#> SRS522883 Boil 7 78.783933
+#> SRS522884 Boil 8 78.783433
+#> SRS522886 Boil 10 78.78535
+#> NCBI sample classification instrument model acc_type
+#> SRS518212 410658 Illumina MiSeq samples
+#> SRS522877 410658 Illumina MiSeq samples
+#> SRS522878 410658 Illumina MiSeq samples
+#> SRS522883 410658 Illumina MiSeq samples
+#> SRS522884 410658 Illumina MiSeq samples
+#> SRS522886 410658 Illumina MiSeq samples
+#> biome studies type
+#> SRS518212 root:Environmental:Terrestrial:Soil MGYS00000850 samples
+#> SRS522877 root:Environmental:Terrestrial:Soil MGYS00000850 samples
+#> SRS522878 root:Environmental:Terrestrial:Soil MGYS00000850 samples
+#> SRS522883 root:Environmental:Terrestrial:Soil MGYS00000850 samples
+#> SRS522884 root:Environmental:Terrestrial:Soil MGYS00000850 samples
+#> SRS522886 root:Environmental:Terrestrial:Soil MGYS00000850 samples
+#> collection-date
+#> SRS518212 <NA>
+#> SRS522877 <NA>
+#> SRS522878 <NA>
+#> SRS522883 <NA>
+#> SRS522884 <NA>
+#> SRS522886 2005-08-01
Specifying an accession
parameter will restrict results
to just those matching that particular entry, be it a study, sample or
run. For example, to retrieve information for study “MGYS00002891”:
study_samples <- doQuery(mg, "studies", accession="MGYS00002891")
-study_samples
study_samples
+#> accession bioproject samples-count is-private
+#> MGYS00002891 MGYS00002891 PRJNA384570 29 FALSE
+#> secondary-accession centre-name
+#> MGYS00002891 SRP105345 University of Minnesota
+#> study-abstract
+#> MGYS00002891 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> study-name data-origination last-update
+#> MGYS00002891 Queensland Marine Sediment HARVESTED 2019-11-07T16:33:46
+#> acc_type biomes type
+#> MGYS00002891 studies root:Environmental:Aquatic:Marine:Sediment studies
-# For demonstrative purpose, take only few samples
-set.seed(595)
-analyses_accessions <- sample(analyses_accessions, 5)
-
-analyses_accessions
analyses_accessions
+#> [1] "MGYA00209648" "MGYA00209649" "MGYA00209650" "MGYA00209651" "MGYA00209652"
+#> [6] "MGYA00209653" "MGYA00209654" "MGYA00209655" "MGYA00209656" "MGYA00209657"
+#> [11] "MGYA00209658" "MGYA00209659" "MGYA00209660" "MGYA00209661" "MGYA00209662"
+#> [16] "MGYA00209663" "MGYA00209664" "MGYA00209665" "MGYA00209666" "MGYA00209667"
+#> [21] "MGYA00209668" "MGYA00209669" "MGYA00209670" "MGYA00209671" "MGYA00209672"
+#> [26] "MGYA00209673" "MGYA00209674" "MGYA00209675" "MGYA00209676"
A useful side effect of the above call is that some attribute metadata for each sample has now been retrieved and stored in the local cache. Thus subsequent API calls for these samples (which will occur @@ -266,7 +458,196 @@
analyses_metadata <- getMetadata(mg, analyses_accessions)
-head(analyses_metadata)
head(analyses_metadata)
+#> analysis_analysis-status analysis_pipeline-version
+#> MGYA00209648 completed 4.1
+#> MGYA00209649 completed 4.1
+#> MGYA00209650 completed 4.1
+#> MGYA00209651 completed 4.1
+#> MGYA00209652 completed 4.1
+#> MGYA00209653 completed 4.1
+#> analysis_experiment-type analysis_accession analysis_is-private
+#> MGYA00209648 amplicon MGYA00209648 FALSE
+#> MGYA00209649 amplicon MGYA00209649 FALSE
+#> MGYA00209650 amplicon MGYA00209650 FALSE
+#> MGYA00209651 amplicon MGYA00209651 FALSE
+#> MGYA00209652 amplicon MGYA00209652 FALSE
+#> MGYA00209653 amplicon MGYA00209653 FALSE
+#> analysis_complete-time analysis_instrument-platform
+#> MGYA00209648 2018-09-06T00:00:00 ILLUMINA
+#> MGYA00209649 2018-09-06T00:00:00 ILLUMINA
+#> MGYA00209650 2018-09-06T00:00:00 ILLUMINA
+#> MGYA00209651 2018-09-06T00:00:00 ILLUMINA
+#> MGYA00209652 2018-09-06T00:00:00 ILLUMINA
+#> MGYA00209653 2018-09-06T00:00:00 ILLUMINA
+#> analysis_instrument-model analysis_Submitted nucleotide sequences
+#> MGYA00209648 Illumina HiSeq 2500 1441694
+#> MGYA00209649 Illumina HiSeq 2500 650265
+#> MGYA00209650 Illumina HiSeq 2500 1207289
+#> MGYA00209651 Illumina HiSeq 2500 469703
+#> MGYA00209652 Illumina HiSeq 2500 606584
+#> MGYA00209653 Illumina HiSeq 2500 692146
+#> analysis_Nucleotide sequences after format-specific filtering
+#> MGYA00209648 1441359
+#> MGYA00209649 650108
+#> MGYA00209650 1206954
+#> MGYA00209651 469585
+#> MGYA00209652 606429
+#> MGYA00209653 691971
+#> analysis_Nucleotide sequences after length filtering
+#> MGYA00209648 1272787
+#> MGYA00209649 578060
+#> MGYA00209650 1090737
+#> MGYA00209651 419171
+#> MGYA00209652 536462
+#> MGYA00209653 623965
+#> analysis_Nucleotide sequences after undetermined bases filtering
+#> MGYA00209648 1272787
+#> MGYA00209649 578060
+#> MGYA00209650 1090737
+#> MGYA00209651 419171
+#> MGYA00209652 536462
+#> MGYA00209653 623965
+#> analysis_Reads with predicted CDS
+#> MGYA00209648 22713
+#> MGYA00209649 11079
+#> MGYA00209650 19717
+#> MGYA00209651 7586
+#> MGYA00209652 11348
+#> MGYA00209653 25746
+#> analysis_Reads with predicted RNA
+#> MGYA00209648 1243457
+#> MGYA00209649 564004
+#> MGYA00209650 1065641
+#> MGYA00209651 410161
+#> MGYA00209652 523120
+#> MGYA00209653 595537
+#> analysis_Reads with InterProScan match analysis_Predicted CDS
+#> MGYA00209648 54 22807
+#> MGYA00209649 44 11215
+#> MGYA00209650 372 19758
+#> MGYA00209651 34 7604
+#> MGYA00209652 41 11356
+#> MGYA00209653 280 25977
+#> analysis_Predicted CDS with InterProScan match
+#> MGYA00209648 55
+#> MGYA00209649 44
+#> MGYA00209650 373
+#> MGYA00209651 35
+#> MGYA00209652 41
+#> MGYA00209653 280
+#> analysis_Total InterProScan matches analysis_acc_type
+#> MGYA00209648 97 analysis-jobs
+#> MGYA00209649 58 analysis-jobs
+#> MGYA00209650 589 analysis-jobs
+#> MGYA00209651 58 analysis-jobs
+#> MGYA00209652 47 analysis-jobs
+#> MGYA00209653 485 analysis-jobs
+#> study_attributes.accession study_attributes.bioproject
+#> MGYA00209648 MGYS00002891 PRJNA384570
+#> MGYA00209649 MGYS00002891 PRJNA384570
+#> MGYA00209650 MGYS00002891 PRJNA384570
+#> MGYA00209651 MGYS00002891 PRJNA384570
+#> MGYA00209652 MGYS00002891 PRJNA384570
+#> MGYA00209653 MGYS00002891 PRJNA384570
+#> study_attributes.samples-count study_attributes.is-private
+#> MGYA00209648 29 FALSE
+#> MGYA00209649 29 FALSE
+#> MGYA00209650 29 FALSE
+#> MGYA00209651 29 FALSE
+#> MGYA00209652 29 FALSE
+#> MGYA00209653 29 FALSE
+#> study_attributes.secondary-accession study_attributes.centre-name
+#> MGYA00209648 SRP105345 University of Minnesota
+#> MGYA00209649 SRP105345 University of Minnesota
+#> MGYA00209650 SRP105345 University of Minnesota
+#> MGYA00209651 SRP105345 University of Minnesota
+#> MGYA00209652 SRP105345 University of Minnesota
+#> MGYA00209653 SRP105345 University of Minnesota
+#> study_attributes.study-abstract
+#> MGYA00209648 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> MGYA00209649 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> MGYA00209650 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> MGYA00209651 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> MGYA00209652 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> MGYA00209653 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island
+#> study_attributes.study-name study_attributes.data-origination
+#> MGYA00209648 Queensland Marine Sediment HARVESTED
+#> MGYA00209649 Queensland Marine Sediment HARVESTED
+#> MGYA00209650 Queensland Marine Sediment HARVESTED
+#> MGYA00209651 Queensland Marine Sediment HARVESTED
+#> MGYA00209652 Queensland Marine Sediment HARVESTED
+#> MGYA00209653 Queensland Marine Sediment HARVESTED
+#> study_attributes.last-update study_accession study_acc_type
+#> MGYA00209648 2019-11-07T16:33:46 MGYS00002891 studies
+#> MGYA00209649 2019-11-07T16:33:46 MGYS00002891 studies
+#> MGYA00209650 2019-11-07T16:33:46 MGYS00002891 studies
+#> MGYA00209651 2019-11-07T16:33:46 MGYS00002891 studies
+#> MGYA00209652 2019-11-07T16:33:46 MGYS00002891 studies
+#> MGYA00209653 2019-11-07T16:33:46 MGYS00002891 studies
+#> sample_latitude sample_biosample sample_longitude sample_accession
+#> MGYA00209648 -23.749 SAMN06842047 151.3654 SRS2151215
+#> MGYA00209649 -23.7692 SAMN06842069 151.3167 SRS2151190
+#> MGYA00209650 -23.6158 SAMN06842067 152.1597 SRS2151193
+#> MGYA00209651 -23.7692 SAMN06842071 151.3167 SRS2151189
+#> MGYA00209652 -23.4369 SAMN06842064 151.9813 SRS2151195
+#> MGYA00209653 -23.6158 SAMN06842065 152.1597 SRS2151196
+#> sample_analysis-completed
+#> MGYA00209648 2018-09-06
+#> MGYA00209649 2018-09-06
+#> MGYA00209650 2018-09-06
+#> MGYA00209651 2018-09-06
+#> MGYA00209652 2018-09-06
+#> MGYA00209653 2018-09-06
+#> sample_geo-loc-name sample_sample-desc
+#> MGYA00209648 Australia: Queensland composite
+#> MGYA00209649 Australia: Queensland core
+#> MGYA00209650 Australia: Queensland, Great Barrier Reef core
+#> MGYA00209651 Australia: Queensland core
+#> MGYA00209652 Australia: Queensland, Great Barrier Reef core
+#> MGYA00209653 Australia: Queensland, Great Barrier Reef core
+#> sample_sample-name sample_sample-alias sample_last-update
+#> MGYA00209648 Facing island box 2 Facing island box 2 2018-09-06T01:18:52
+#> MGYA00209649 Gladstone Harbour 7A Gladstone Harbour 7A 2018-09-06T01:18:52
+#> MGYA00209650 Fitzroy reef 2C Fitzroy reef 2C 2018-09-06T01:18:52
+#> MGYA00209651 Gladstone Harbour 7C Gladstone Harbour 7C 2018-09-06T01:18:52
+#> MGYA00209652 Heron Island 4D Heron Island 4D 2018-09-06T01:18:52
+#> MGYA00209653 Fitzroy reef 2A Fitzroy reef 2A 2018-09-06T01:18:52
+#> sample_geographic location (longitude)
+#> MGYA00209648 151.36536
+#> MGYA00209649 151.31674
+#> MGYA00209650 152.15974
+#> MGYA00209651 151.31674
+#> MGYA00209652 151.98132
+#> MGYA00209653 152.15974
+#> sample_geographic location (country and/or sea,region)
+#> MGYA00209648 Australia: Queensland
+#> MGYA00209649 Australia: Queensland
+#> MGYA00209650 Australia: Queensland, Great Barrier Reef
+#> MGYA00209651 Australia: Queensland
+#> MGYA00209652 Australia: Queensland, Great Barrier Reef
+#> MGYA00209653 Australia: Queensland, Great Barrier Reef
+#> sample_geographic location (latitude) sample_instrument model
+#> MGYA00209648 -23.749048 Illumina HiSeq 2500
+#> MGYA00209649 -23.769222 Illumina HiSeq 2500
+#> MGYA00209650 -23.615824 Illumina HiSeq 2500
+#> MGYA00209651 -23.769222 Illumina HiSeq 2500
+#> MGYA00209652 -23.436857 Illumina HiSeq 2500
+#> MGYA00209653 -23.615824 Illumina HiSeq 2500
+#> sample_acc_type run_accession
+#> MGYA00209648 samples SRR5483782
+#> MGYA00209649 samples SRR5483760
+#> MGYA00209650 samples SRR5483762
+#> MGYA00209651 samples SRR5483758
+#> MGYA00209652 samples SRR5483765
+#> MGYA00209653 samples SRR5483764
+#> biome_string sample_depth
+#> MGYA00209648 root:Environmental:Aquatic:Marine:Sediment <NA>
+#> MGYA00209649 root:Environmental:Aquatic:Marine:Sediment 0.0
+#> MGYA00209650 root:Environmental:Aquatic:Marine:Sediment 20.0
+#> MGYA00209651 root:Environmental:Aquatic:Marine:Sediment 20.0
+#> MGYA00209652 root:Environmental:Aquatic:Marine:Sediment 30.0
+#> MGYA00209653 root:Environmental:Aquatic:Marine:Sediment 0.0
The resulting data.frame has columns with names prefixed with their
source type. For example, “sample_xxx” columns correspond to metadata
gleaned from querying an accession’s sample
entry. MGnify
@@ -290,7 +671,8 @@
tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE)
-tse
tse
+#> class: TreeSummarizedExperiment
+#> dim: 3689 29
+#> metadata(0):
+#> assays(1): counts
+#> rownames(3689): 92640 251937 ... 233398 265506
+#> rowData names(8): Kingdom Phylum ... Species taxonomy1
+#> colnames(29): MGYA00209651 MGYA00209670 ... MGYA00209657 MGYA00209667
+#> colData names(49): analysis_experiment.type analysis_pipeline.version
+#> ... biome_string sample_depth
+#> reducedDimNames(0):
+#> mainExpName: NULL
+#> altExpNames(0):
+#> rowLinks: NULL
+#> rowTree: NULL
+#> colLinks: NULL
+#> colTree: NULL
TreeSE
object is uniquely positioned to support SummarizedExperiment-based
microbiome data manipulation and visualization. Moreover, it enables
access to miaverse
tools. For example, we can estimate
@@ -328,17 +726,26 @@
library(miaViz)
+#> Loading required package: ggraph
-plotAbundance(tse)
If needed, TreeSE
can be converted to
phyloseq
.
pseq <- makePhyloseqFromTreeSE(tse)
-pseq
-# Subset the accessions by taking 5 random analyses
-set.seed(74)
-all_accessions <- sample(all_accessions, 5)
-all_accessions
head(all_accessions)
+#> [1] "MGYA00097621" "MGYA00097622" "MGYA00097623" "MGYA00097624" "MGYA00097625"
+#> [6] "MGYA00097626"
The first step with this new accession list is, as previously, to
retrieve the associated metadata using getMetadata()
, and
as seen with the doQuery()
results, the returned
@@ -385,7 +791,343 @@
From full_metadata
we get an idea of the type of data
we’re dealing with, and can extract useul information such as sequencing
platform, source biome, etc. The next code snippet tallies a few of
@@ -400,9 +1142,17 @@
Again, we can fetch the data by calling getResult()
.
bulk.dl=TRUE
has the potential to significantly speed up
data retrieval. MGnify makes its functional results available in two
@@ -444,7 +1196,22 @@
mae <- getResult(mg, all_accessions, bulk.dl = TRUE)
-mae
mae
+#> A MultiAssayExperiment object of 4 listed
+#> experiments with user-defined names and respective classes.
+#> Containing an ExperimentList class object of length 4:
+#> [1] microbiota: TreeSummarizedExperiment with 32401 rows and 487 columns
+#> [2] go-slim: TreeSummarizedExperiment with 116 rows and 487 columns
+#> [3] go-terms: TreeSummarizedExperiment with 2640 rows and 487 columns
+#> [4] interpro-identifiers: TreeSummarizedExperiment with 15818 rows and 487 columns
+#> Functionality:
+#> experiments() - obtain the ExperimentList instance
+#> colData() - the primary/phenotype DataFrame
+#> sampleMap() - the sample coordination DataFrame
+#> `$`, `[`, `[[` - extract colData columns, subset, or experiment
+#> *Format() - convert into a long or wide DataFrame
+#> assays() - convert ExperimentList to a SimpleList of matrices
+#> exportClass() - save data to flat files
For metagenomic samples, the result is MultiAssayExperiment
(MAE
) which links multiple TreeSE
objects into
one dataset. These TreeSE
objects include taxonomic
@@ -453,7 +1220,24 @@
-mae[[2]]
+mae[[2]]
+#> class: TreeSummarizedExperiment
+#> dim: 116 487
+#> metadata(0):
+#> assays(1): counts
+#> rownames(116): GO:0000015 GO:0000150 ... GO:1902494 GO:1990204
+#> rowData names(10): description category ... Genus Species
+#> colnames(487): MGYA00083332 MGYA00083120 ... MGYA00097653 MGYA00097655
+#> colData names(70): analysis_analysis.status analysis_pipeline.version
+#> ... sample_host.scientific.name
+#> sample_human.gut.environmental.package
+#> reducedDimNames(0):
+#> mainExpName: NULL
+#> altExpNames(0):
+#> rowLinks: NULL
+#> rowTree: NULL
+#> colLinks: NULL
+#> colTree: NULL
We can perform principal component analysis to microbial profiling data by utilizing miaverse tools.
@@ -465,6 +1249,7 @@Metagenomics= vegan::vegdist, method = "bray") # Plot plotReducedDim(mae[[1]], "MDS", colour_by = "sample_environment.feature")
To list the types of available files, and guide the filtering, something like the following might be useful.
-table(dl_urls$attributes.description.label)
table(dl_urls$attributes.description.label)
+#>
+#> Complete GO annotation GO slim annotation
+#> 5 5
+#> InterPro matches OTUs, counts and taxonomic assignments
+#> 5 15
+#> Phylogenetic tree Predicted CDS with annotation
+#> 5 5
+#> Predicted CDS without annotation Predicted ORF without annotation
+#> 5 5
+#> Predicted tRNAs Processed nucleotide reads
+#> 5 5
+#> Processed reads with annotation Processed reads with pCDS
+#> 5 5
+#> Processed reads without annotation Reads encoding 16S rRNA
+#> 5 5
+#> Reads encoding 23S rRNA Reads encoding 5S rRNA
+#> 5 5
+#> Taxa abundance distribution
+#> 5
Unlike other MGnifyR
functions,
searchFile()
is not limited to analyses
, and
by specifying accession_type
other results types may be
@@ -538,7 +1408,8 @@
# Where are the files?
-cached_location
A second download option is available, which allows built-in parsing of the file. If we know ahead of time what processing will be performed, it may be possible to integrate it into a function, pass this function @@ -579,9 +1450,114 @@
-amoC_seq_counts
amoC_seq_counts
+#> [1] "/tmp/RtmpeKYwQ1/file4c1941f3c62"
-sessionInfo()
sessionInfo()
+#> R Under development (unstable) (2024-01-31 r85845)
+#> Platform: x86_64-pc-linux-gnu
+#> Running under: Ubuntu 22.04.3 LTS
+#>
+#> Matrix products: default
+#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
+#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
+#>
+#> locale:
+#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
+#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
+#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
+#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
+#> [9] LC_ADDRESS=C LC_TELEPHONE=C
+#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
+#>
+#> time zone: UTC
+#> tzcode source: system (glibc)
+#>
+#> attached base packages:
+#> [1] stats4 stats graphics grDevices utils datasets methods
+#> [8] base
+#>
+#> other attached packages:
+#> [1] miaViz_1.11.0 ggraph_2.1.0
+#> [3] scater_1.31.2 ggplot2_3.4.4
+#> [5] scuttle_1.13.0 MGnifyR_0.99.15
+#> [7] biomformat_1.31.0 mia_1.11.1
+#> [9] MultiAssayExperiment_1.29.0 TreeSummarizedExperiment_2.11.0
+#> [11] Biostrings_2.71.2 XVector_0.43.1
+#> [13] SingleCellExperiment_1.25.0 SummarizedExperiment_1.33.3
+#> [15] Biobase_2.63.0 GenomicRanges_1.55.2
+#> [17] GenomeInfoDb_1.39.5 IRanges_2.37.1
+#> [19] S4Vectors_0.41.3 BiocGenerics_0.49.1
+#> [21] MatrixGenerics_1.15.0 matrixStats_1.2.0
+#> [23] knitr_1.45 BiocStyle_2.31.0
+#>
+#> loaded via a namespace (and not attached):
+#> [1] jsonlite_1.8.8 magrittr_2.0.3
+#> [3] ggbeeswarm_0.7.2 farver_2.1.1
+#> [5] rmarkdown_2.25 fs_1.6.3
+#> [7] zlibbioc_1.49.0 ragg_1.2.7
+#> [9] vctrs_0.6.5 multtest_2.59.0
+#> [11] memoise_2.0.1 DelayedMatrixStats_1.25.1
+#> [13] RCurl_1.98-1.14 ggtree_3.11.0
+#> [15] BiocBaseUtils_1.5.0 htmltools_0.5.7
+#> [17] S4Arrays_1.3.3 BiocNeighbors_1.21.2
+#> [19] Rhdf5lib_1.25.1 gridGraphics_0.5-1
+#> [21] SparseArray_1.3.3 rhdf5_2.47.2
+#> [23] sass_0.4.8 bslib_0.6.1
+#> [25] desc_1.4.3 plyr_1.8.9
+#> [27] DECIPHER_2.31.1 cachem_1.0.8
+#> [29] igraph_2.0.1.1 iterators_1.0.14
+#> [31] lifecycle_1.0.4 pkgconfig_2.0.3
+#> [33] rsvd_1.0.5 Matrix_1.6-5
+#> [35] R6_2.5.1 fastmap_1.1.1
+#> [37] GenomeInfoDbData_1.2.11 aplot_0.2.2
+#> [39] digest_0.6.34 ggnewscale_0.4.9
+#> [41] colorspace_2.1-0 patchwork_1.2.0
+#> [43] irlba_2.3.5.1 textshaping_0.3.7
+#> [45] RSQLite_2.3.5 vegan_2.6-4
+#> [47] beachmat_2.19.1 labeling_0.4.3
+#> [49] fansi_1.0.6 urltools_1.7.3
+#> [51] polyclip_1.10-6 httr_1.4.7
+#> [53] abind_1.4-5 mgcv_1.9-1
+#> [55] compiler_4.4.0 bit64_4.0.5
+#> [57] withr_3.0.0 BiocParallel_1.37.0
+#> [59] viridis_0.6.5 DBI_1.2.1
+#> [61] highr_0.10 ggforce_0.4.1
+#> [63] MASS_7.3-60.2 DelayedArray_0.29.1
+#> [65] bluster_1.13.0 permute_0.9-7
+#> [67] tools_4.4.0 vipor_0.4.7
+#> [69] beeswarm_0.4.0 ape_5.7-1
+#> [71] glue_1.7.0 nlme_3.1-164
+#> [73] rhdf5filters_1.15.1 grid_4.4.0
+#> [75] ade4_1.7-22 cluster_2.1.6
+#> [77] reshape2_1.4.4 generics_0.1.3
+#> [79] gtable_0.3.4 tidyr_1.3.1
+#> [81] data.table_1.15.0 tidygraph_1.3.1
+#> [83] BiocSingular_1.19.0 ScaledMatrix_1.11.0
+#> [85] utf8_1.2.4 foreach_1.5.2
+#> [87] ggrepel_0.9.5 pillar_1.9.0
+#> [89] stringr_1.5.1 yulab.utils_0.1.4
+#> [91] splines_4.4.0 tweenr_2.0.2
+#> [93] dplyr_1.1.4 treeio_1.27.0
+#> [95] lattice_0.22-5 survival_3.5-7
+#> [97] bit_4.0.5 tidyselect_1.2.0
+#> [99] DirichletMultinomial_1.45.0 gridExtra_2.3
+#> [101] bookdown_0.37 phyloseq_1.47.0
+#> [103] xfun_0.41 graphlayouts_1.1.0
+#> [105] stringi_1.8.3 ggfun_0.1.4
+#> [107] lazyeval_0.2.2 yaml_2.3.8
+#> [109] evaluate_0.23 codetools_0.2-19
+#> [111] tibble_3.2.1 BiocManager_1.30.22
+#> [113] ggplotify_0.1.2 cli_3.6.2
+#> [115] systemfonts_1.0.5 munsell_0.5.0
+#> [117] jquerylib_0.1.4 Rcpp_1.0.12
+#> [119] triebeard_0.4.1 parallel_4.4.0
+#> [121] pkgdown_2.0.7 blob_1.2.4
+#> [123] sparseMatrixStats_1.15.0 bitops_1.0-7
+#> [125] decontam_1.23.0 viridisLite_0.4.2
+#> [127] tidytree_0.4.6 scales_1.3.0
+#> [129] purrr_1.0.2 crayon_1.5.2
+#> [131] rlang_1.1.3
diff --git a/articles/MGnifyR_long_files/figure-html/calculate_diversity-1.png b/articles/MGnifyR_long_files/figure-html/calculate_diversity-1.png
new file mode 100644
index 00000000..3db93d70
Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/calculate_diversity-1.png differ
diff --git a/articles/MGnifyR_long_files/figure-html/full_metatdata_explore-1.png b/articles/MGnifyR_long_files/figure-html/full_metatdata_explore-1.png
new file mode 100644
index 00000000..ff234c47
Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/full_metatdata_explore-1.png differ
diff --git a/articles/MGnifyR_long_files/figure-html/pcoa-1.png b/articles/MGnifyR_long_files/figure-html/pcoa-1.png
new file mode 100644
index 00000000..488c67a4
Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/pcoa-1.png differ
diff --git a/articles/MGnifyR_long_files/figure-html/plot_abundance-1.png b/articles/MGnifyR_long_files/figure-html/plot_abundance-1.png
new file mode 100644
index 00000000..783393bb
Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/plot_abundance-1.png differ
diff --git a/pkgdown.yml b/pkgdown.yml
index c000027b..a2101cd3 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -4,5 +4,5 @@ pkgdown_sha: ~
articles:
MGnifyR: MGnifyR.html
MGnifyR_long: MGnifyR_long.html
-last_built: 2024-02-02T14:21Z
+last_built: 2024-02-04T14:15Z
diff --git a/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS b/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS
index 87af23ed..f3627111 100644
Binary files a/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS and b/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS differ
diff --git a/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS b/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS
index c9c07bb1..0357d0c7 100644
Binary files a/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS and b/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS differ
diff --git a/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS b/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS
index eb564458..9e27b927 100644
Binary files a/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS and b/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS differ
diff --git a/reference/searchAnalysis.html b/reference/searchAnalysis.html
index 7b74cba5..cfd2c0a4 100644
--- a/reference/searchAnalysis.html
+++ b/reference/searchAnalysis.html
@@ -118,11 +118,7 @@