diff --git a/articles/MGnifyR.html b/articles/MGnifyR.html index d55f0849..5f676583 100644 --- a/articles/MGnifyR.html +++ b/articles/MGnifyR.html @@ -80,7 +80,7 @@
-samples
-#>                  biosample   accession
-#> ERS14399432 SAMEA112288606 ERS14399432
-#> ERS14399426 SAMEA112288600 ERS14399426
-#> ERS5222929    SAMEA7465225  ERS5222929
-#> ERS5222919    SAMEA7465215  ERS5222919
-#> ERS5222921    SAMEA7465217  ERS5222921
-#>                                                                                                                                                                                               sample-desc
-#> ERS14399432                                                                                                                                                                                       biomass
-#> ERS14399426                                                                                                                                                                                       biomass
-#> ERS5222929             Raw water source [Surface water], Treatment [Ferric sulfate coagulation, clarification, sand filtration, ozonisation, activated carbon filtration], Disinfection [UV-light, NH2Cl]
-#> ERS5222919  Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, addition of sulphuric acid, sand filtration], Disinfection [No disinfection]
-#> ERS5222921                              Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, sand filtration], Disinfection [No disinfection]
-#>             environment-biome                environment-feature
-#> ERS14399432              <NA>                               <NA>
-#> ERS14399426              <NA>                               <NA>
-#> ERS5222929         Freshwater Drinking water distribution system
-#> ERS5222919         Freshwater Drinking water distribution system
-#> ERS5222921         Freshwater Drinking water distribution system
-#>             environment-material     sample-name    sample-alias
-#> ERS14399432                 <NA>         GREEN64         GREEN64
-#> ERS14399426                 <NA>         GREEN58         GREEN58
-#> ERS5222929                 water THS_D2_SW_CHM_R THS_D2_SW_CHM_R
-#> ERS5222919                 water  THS_A1_GW_ND_R  THS_A1_GW_ND_R
-#> ERS5222921                 water  THS_B1_GW_ND_D  THS_B1_GW_ND_D
-#>                     last-update
-#> ERS14399432 2023-04-23T00:43:15
-#> ERS14399426 2023-04-23T02:07:52
-#> ERS5222929  2020-11-10T18:16:23
-#> ERS5222919  2020-11-10T18:02:30
-#> ERS5222921  2021-06-02T14:21:12
-#>                                                                    project name
-#> ERS14399432                        ARGs study in bioelectrochemical remediation
-#> ERS14399426                        ARGs study in bioelectrochemical remediation
-#> ERS5222929  DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> ERS5222919  DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> ERS5222921  DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
+head(samples)
+#>                  biosample   accession sample-desc
+#> ERS14399436 SAMEA112288610 ERS14399436     biofilm
+#> ERS14399429 SAMEA112288603 ERS14399429     biofilm
+#> ERS14399431 SAMEA112288605 ERS14399431     biomass
+#> ERS14399428 SAMEA112288602 ERS14399428     biofilm
+#> ERS14399416 SAMEA112288590 ERS14399416     biofilm
+#> ERS14399423 SAMEA112288597 ERS14399423     biofilm
+#>                                  environment-biome
+#> ERS14399436 Laboratory environment (ENVO_01001405)
+#> ERS14399429 Laboratory environment (ENVO_01001405)
+#> ERS14399431                                   <NA>
+#> ERS14399428                                   <NA>
+#> ERS14399416                                   <NA>
+#> ERS14399423                                   <NA>
+#>                                environment-feature
+#> ERS14399436 Laboratory environment (ENVO_01001405)
+#> ERS14399429 Laboratory environment (ENVO_01001405)
+#> ERS14399431                                   <NA>
+#> ERS14399428                                   <NA>
+#> ERS14399416                                   <NA>
+#> ERS14399423                                   <NA>
+#>                         environment-material sample-name sample-alias
+#> ERS14399436 Biofilm material (ENVO:01000156)     GREEN68      GREEN68
+#> ERS14399429 Biofilm material (ENVO:01000156)     GREEN61      GREEN61
+#> ERS14399431                             <NA>     GREEN63      GREEN63
+#> ERS14399428                             <NA>     GREEN60      GREEN60
+#> ERS14399416                             <NA>     GREEN48      GREEN48
+#> ERS14399423                             <NA>     GREEN55      GREEN55
+#>                     last-update                                 project name
+#> ERS14399436 2023-08-07T11:15:24 ARGs study in bioelectrochemical remediation
+#> ERS14399429 2023-08-07T10:52:31 ARGs study in bioelectrochemical remediation
+#> ERS14399431 2023-04-23T08:53:19 ARGs study in bioelectrochemical remediation
+#> ERS14399428 2023-04-23T07:57:35 ARGs study in bioelectrochemical remediation
+#> ERS14399416 2023-04-23T06:14:08 ARGs study in bioelectrochemical remediation
+#> ERS14399423 2023-04-23T05:00:17 ARGs study in bioelectrochemical remediation
 #>             geographic location (country and/or sea,region) collection date
-#> ERS14399432                                           Spain      2022-09-14
-#> ERS14399426                                           Spain      2022-09-14
-#> ERS5222929                                          Finland      2015-09-01
-#> ERS5222919                                          Finland      2015-08-03
-#> ERS5222921                                          Finland      2015-08-03
-#>             environment (biome)              environment (feature)
-#> ERS14399432                <NA>                               <NA>
-#> ERS14399426                <NA>                               <NA>
-#> ERS5222929           Freshwater Drinking water distribution system
-#> ERS5222919           Freshwater Drinking water distribution system
-#> ERS5222921           Freshwater Drinking water distribution system
-#>             environment (material) ENA checklist acc_type
-#> ERS14399432                   <NA>     ERC000023  samples
-#> ERS14399426                   <NA>     ERC000023  samples
-#> ERS5222929                   water     ERC000025  samples
-#> ERS5222919                   water     ERC000025  samples
-#> ERS5222921                   water     ERC000025  samples
-#>                                                                              biome
-#> ERS14399432                   root:Environmental:Aquatic:Freshwater:Drinking water
-#> ERS14399426                   root:Environmental:Aquatic:Freshwater:Drinking water
-#> ERS5222929  root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> ERS5222919  root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> ERS5222921  root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#>                  studies    type collection-date latitude longitude
-#> ERS14399432 MGYS00006211 samples      2022-09-14     <NA>      <NA>
-#> ERS14399426 MGYS00006211 samples      2022-09-14     <NA>      <NA>
-#> ERS5222929  MGYS00005650 samples      2015-09-01    61.92     25.75
-#> ERS5222919  MGYS00005650 samples      2015-08-03    61.92     25.75
-#> ERS5222921  MGYS00005650 samples      2015-08-03    61.92     25.75
-#>             investigation type geographic location (longitude)
-#> ERS14399432               <NA>                            <NA>
-#> ERS14399426               <NA>                            <NA>
-#> ERS5222929   metatranscriptome                           25.75
-#> ERS5222919   metatranscriptome                           25.75
-#> ERS5222921          metagenome                           25.75
-#>                                       environmental package sequencing method
-#> ERS14399432                                            <NA>              <NA>
-#> ERS14399426                                            <NA>              <NA>
-#> ERS5222929  miscellaneous natural or artificial environment          Illumina
-#> ERS5222919  miscellaneous natural or artificial environment          Illumina
-#> ERS5222921  miscellaneous natural or artificial environment          Illumina
-#>             geographic location (latitude) analysis-completed geo-loc-name
-#> ERS14399432                           <NA>               <NA>         <NA>
-#> ERS14399426                           <NA>               <NA>         <NA>
-#> ERS5222929                           61.92               <NA>         <NA>
-#> ERS5222919                           61.92               <NA>         <NA>
-#> ERS5222921                           61.92               <NA>         <NA>
-#>             instrument model
-#> ERS14399432             <NA>
-#> ERS14399426             <NA>
-#> ERS5222929              <NA>
-#> ERS5222919              <NA>
-#> ERS5222921              <NA>
+#> ERS14399436 Spain 2022-09-14 +#> ERS14399429 Spain 2022-09-14 +#> ERS14399431 Spain 2022-09-14 +#> ERS14399428 Spain 2022-09-14 +#> ERS14399416 Spain 2022-09-14 +#> ERS14399423 Spain 2022-09-14 +#> environment (biome) +#> ERS14399436 Laboratory environment (ENVO_01001405) +#> ERS14399429 Laboratory environment (ENVO_01001405) +#> ERS14399431 <NA> +#> ERS14399428 <NA> +#> ERS14399416 <NA> +#> ERS14399423 <NA> +#> environment (feature) +#> ERS14399436 Laboratory environment (ENVO_01001405) +#> ERS14399429 Laboratory environment (ENVO_01001405) +#> ERS14399431 <NA> +#> ERS14399428 <NA> +#> ERS14399416 <NA> +#> ERS14399423 <NA> +#> environment (material) ENA checklist acc_type +#> ERS14399436 Biofilm material (ENVO:01000156) ERC000023 samples +#> ERS14399429 Biofilm material (ENVO:01000156) ERC000023 samples +#> ERS14399431 <NA> ERC000023 samples +#> ERS14399428 <NA> ERC000023 samples +#> ERS14399416 <NA> ERC000023 samples +#> ERS14399423 <NA> ERC000023 samples +#> studies biome +#> ERS14399436 MGYS00006211 root:Environmental:Aquatic:Freshwater:Drinking water +#> ERS14399429 MGYS00006211 root:Environmental:Aquatic:Freshwater:Drinking water +#> ERS14399431 MGYS00006211 root:Environmental:Aquatic:Freshwater:Drinking water +#> ERS14399428 MGYS00006211 root:Environmental:Aquatic:Freshwater:Drinking water +#> ERS14399416 MGYS00006211 root:Environmental:Aquatic:Freshwater:Drinking water +#> ERS14399423 MGYS00006211 root:Environmental:Aquatic:Freshwater:Drinking water +#> type collection-date latitude longitude investigation type +#> ERS14399436 samples <NA> <NA> <NA> <NA> +#> ERS14399429 samples <NA> <NA> <NA> <NA> +#> ERS14399431 samples 2022-09-14 <NA> <NA> <NA> +#> ERS14399428 samples 2022-09-14 <NA> <NA> <NA> +#> ERS14399416 samples 2022-09-14 <NA> <NA> <NA> +#> ERS14399423 samples 2022-09-14 <NA> <NA> <NA> +#> geographic location (longitude) environmental package +#> ERS14399436 <NA> <NA> +#> ERS14399429 <NA> <NA> +#> ERS14399431 <NA> <NA> +#> ERS14399428 <NA> <NA> +#> ERS14399416 <NA> <NA> +#> ERS14399423 <NA> <NA> +#> sequencing method geographic location (latitude) analysis-completed +#> ERS14399436 <NA> <NA> <NA> +#> ERS14399429 <NA> <NA> <NA> +#> ERS14399431 <NA> <NA> <NA> +#> ERS14399428 <NA> <NA> <NA> +#> ERS14399416 <NA> <NA> <NA> +#> ERS14399423 <NA> <NA> <NA> +#> geo-loc-name instrument model +#> ERS14399436 <NA> <NA> +#> ERS14399429 <NA> <NA> +#> ERS14399431 <NA> <NA> +#> ERS14399428 <NA> <NA> +#> ERS14399416 <NA> <NA> +#> ERS14399423 <NA> <NA>

Find relevent analyses accessions @@ -345,7 +349,8 @@

Find relevent analysesanalyses_accessions <- searchAnalysis(mg, "samples", samples$accession)

 head(analyses_accessions)
-#> NULL
+#> [1] "MGYA00652201" "MGYA00652185" "MGYA00643487" "MGYA00643486" "MGYA00643485" +#> [6] "MGYA00643484"

Fetch metadata @@ -356,228 +361,272 @@

Fetch metadataanalyses_metadata <- getMetadata(mg, analyses_accessions)

 head(analyses_metadata)
-#>              analysis_experiment-type analysis_pipeline-version
-#> MGYA00643475                 assembly                       5.0
-#> MGYA00643477                 assembly                       5.0
-#> MGYA00575721       metatranscriptomic                       5.0
-#> MGYA00575717       metatranscriptomic                       5.0
-#> MGYA00575713              metagenomic                       5.0
-#>              analysis_analysis-status analysis_accession analysis_is-private
-#> MGYA00643475                completed       MGYA00643475                TRUE
-#> MGYA00643477                completed       MGYA00643477                TRUE
-#> MGYA00575721                completed       MGYA00575721               FALSE
-#> MGYA00575717                completed       MGYA00575717               FALSE
-#> MGYA00575713                completed       MGYA00575713               FALSE
+#>              analysis_analysis-status analysis_pipeline-version
+#> MGYA00652201                completed                       5.0
+#> MGYA00652185                completed                       5.0
+#> MGYA00643487                completed                       5.0
+#> MGYA00643486                completed                       5.0
+#> MGYA00643485                completed                       5.0
+#> MGYA00643484                completed                       5.0
+#>              analysis_experiment-type analysis_accession analysis_is-private
+#> MGYA00652201                 assembly       MGYA00652201                TRUE
+#> MGYA00652185                 assembly       MGYA00652185                TRUE
+#> MGYA00643487                 assembly       MGYA00643487                TRUE
+#> MGYA00643486                 assembly       MGYA00643486                TRUE
+#> MGYA00643485                 assembly       MGYA00643485                TRUE
+#> MGYA00643484                 assembly       MGYA00643484                TRUE
 #>              analysis_complete-time analysis_instrument-platform
-#> MGYA00643475    2023-04-23T00:43:18                     ILLUMINA
-#> MGYA00643477    2023-04-23T02:07:56                     ILLUMINA
-#> MGYA00575721    2020-11-10T18:16:27                     ILLUMINA
-#> MGYA00575717    2020-11-10T18:02:34                     ILLUMINA
-#> MGYA00575713    2020-11-10T17:47:00                     ILLUMINA
+#> MGYA00652201    2023-08-07T11:15:25                     ILLUMINA
+#> MGYA00652185    2023-08-07T10:52:31                     ILLUMINA
+#> MGYA00643487    2023-04-23T08:53:23                     ILLUMINA
+#> MGYA00643486    2023-04-23T07:57:38                     ILLUMINA
+#> MGYA00643485    2023-04-23T06:14:11                     ILLUMINA
+#> MGYA00643484    2023-04-23T05:00:21                     ILLUMINA
 #>              analysis_instrument-model analysis_Submitted nucleotide sequences
-#> MGYA00643475     Illumina NovaSeq 6000                                  187536
-#> MGYA00643477     Illumina NovaSeq 6000                                  167208
-#> MGYA00575721       Illumina HiSeq 4000                                 1766611
-#> MGYA00575717       Illumina HiSeq 4000                                 5454850
-#> MGYA00575713       Illumina HiSeq 4000                                17338992
+#> MGYA00652201     Illumina NovaSeq 6000                                  223726
+#> MGYA00652185     Illumina NovaSeq 6000                                  292409
+#> MGYA00643487     Illumina NovaSeq 6000                                  162292
+#> MGYA00643486     Illumina NovaSeq 6000                                  233327
+#> MGYA00643485     Illumina NovaSeq 6000                                  318625
+#> MGYA00643484     Illumina NovaSeq 6000                                  341952
 #>              analysis_Nucleotide sequences after format-specific filtering
-#> MGYA00643475                                                        187536
-#> MGYA00643477                                                        167208
-#> MGYA00575721                                                        293112
-#> MGYA00575717                                                       1159251
-#> MGYA00575713                                                       2179562
+#> MGYA00652201                                                        223726
+#> MGYA00652185                                                        292409
+#> MGYA00643487                                                        162292
+#> MGYA00643486                                                        233327
+#> MGYA00643485                                                        318625
+#> MGYA00643484                                                        341952
 #>              analysis_Nucleotide sequences after length filtering
-#> MGYA00643475                                               187536
-#> MGYA00643477                                               167208
-#> MGYA00575721                                               293112
-#> MGYA00575717                                              1159251
-#> MGYA00575713                                              2179562
+#> MGYA00652201                                               223726
+#> MGYA00652185                                               292409
+#> MGYA00643487                                               162292
+#> MGYA00643486                                               233327
+#> MGYA00643485                                               318625
+#> MGYA00643484                                               341952
 #>              analysis_Nucleotide sequences after undetermined bases filtering
-#> MGYA00643475                                                           187536
-#> MGYA00643477                                                           167208
-#> MGYA00575721                                                           293112
-#> MGYA00575717                                                          1159251
-#> MGYA00575713                                                          2179562
+#> MGYA00652201                                                           223726
+#> MGYA00652185                                                           292409
+#> MGYA00643487                                                           162292
+#> MGYA00643486                                                           233327
+#> MGYA00643485                                                           318625
+#> MGYA00643484                                                           341952
 #>              analysis_Reads with predicted CDS
-#> MGYA00643475                            185060
-#> MGYA00643477                            163559
-#> MGYA00575721                             34145
-#> MGYA00575717                             79238
-#> MGYA00575713                           2132038
+#> MGYA00652201                            223146
+#> MGYA00652185                            291506
+#> MGYA00643487                            159860
+#> MGYA00643486                            232596
+#> MGYA00643485                            317405
+#> MGYA00643484                            341116
 #>              analysis_Reads with predicted RNA
-#> MGYA00643475                              3748
-#> MGYA00643477                              2841
-#> MGYA00575721                            254446
-#> MGYA00575717                           1065067
-#> MGYA00575713                              6062
+#> MGYA00652201                              3008
+#> MGYA00652185                              4472
+#> MGYA00643487                              3518
+#> MGYA00643486                              3513
+#> MGYA00643485                              5420
+#> MGYA00643484                              4849
 #>              analysis_Reads with InterProScan match analysis_Predicted CDS
-#> MGYA00643475                                 140771                 586640
-#> MGYA00643477                                 127017                 464277
-#> MGYA00575721                                   7922                  34824
-#> MGYA00575717                                  14140                  80536
-#> MGYA00575713                                 850215                2213838
+#> MGYA00652201                                 182364                 489141
+#> MGYA00652185                                 241005                 674191
+#> MGYA00643487                                 124819                 519411
+#> MGYA00643486                                 192842                 542005
+#> MGYA00643485                                 259359                 893435
+#> MGYA00643484                                 280422                 826459
 #>              analysis_Predicted CDS with InterProScan match
-#> MGYA00643475                                         368387
-#> MGYA00643477                                         313613
-#> MGYA00575721                                           7947
-#> MGYA00575717                                          14203
-#> MGYA00575713                                         853169
+#> MGYA00652201                                         331022
+#> MGYA00652185                                         472354
+#> MGYA00643487                                         330317
+#> MGYA00643486                                         379304
+#> MGYA00643485                                         585352
+#> MGYA00643484                                         572175
 #>              analysis_Total InterProScan matches
-#> MGYA00643475                             1261625
-#> MGYA00643477                             1069719
-#> MGYA00575721                               12971
-#> MGYA00575717                               24971
-#> MGYA00575713                             1513676
+#> MGYA00652201                             1035846
+#> MGYA00652185                             1520845
+#> MGYA00643487                             1134950
+#> MGYA00643486                             1221863
+#> MGYA00643485                             1928199
+#> MGYA00643484                             1847000
 #>              analysis_Predicted SSU sequences analysis_Predicted LSU sequences
-#> MGYA00643475                              192                              318
-#> MGYA00643477                              171                              278
-#> MGYA00575721                           118755                           143666
-#> MGYA00575717                           573080                           504459
-#> MGYA00575713                             1593                             2969
+#> MGYA00652201                              141                              222
+#> MGYA00652185                              268                              394
+#> MGYA00643487                              217                              330
+#> MGYA00643486                              199                              345
+#> MGYA00643485                              273                              431
+#> MGYA00643484                              243                              399
 #>              analysis_acc_type study_attributes.accession
-#> MGYA00643475     analysis-jobs               MGYS00006211
-#> MGYA00643477     analysis-jobs               MGYS00006211
-#> MGYA00575721     analysis-jobs               MGYS00005650
-#> MGYA00575717     analysis-jobs               MGYS00005650
-#> MGYA00575713     analysis-jobs               MGYS00005650
+#> MGYA00652201     analysis-jobs               MGYS00006211
+#> MGYA00652185     analysis-jobs               MGYS00006211
+#> MGYA00643487     analysis-jobs               MGYS00006211
+#> MGYA00643486     analysis-jobs               MGYS00006211
+#> MGYA00643485     analysis-jobs               MGYS00006211
+#> MGYA00643484     analysis-jobs               MGYS00006211
 #>              study_attributes.bioproject study_attributes.samples-count
-#> MGYA00643475                  PRJEB58755                             23
-#> MGYA00643477                  PRJEB58755                             23
-#> MGYA00575721                  PRJEB40814                             15
-#> MGYA00575717                  PRJEB40814                             15
-#> MGYA00575713                  PRJEB40814                             15
+#> MGYA00652201                  PRJEB58755                             23
+#> MGYA00652185                  PRJEB58755                             23
+#> MGYA00643487                  PRJEB58755                             23
+#> MGYA00643486                  PRJEB58755                             23
+#> MGYA00643485                  PRJEB58755                             23
+#> MGYA00643484                  PRJEB58755                             23
 #>              study_attributes.is-private study_attributes.secondary-accession
-#> MGYA00643475                       FALSE                            ERP143823
-#> MGYA00643477                       FALSE                            ERP143823
-#> MGYA00575721                       FALSE                            ERP124491
-#> MGYA00575717                       FALSE                            ERP124491
-#> MGYA00575713                       FALSE                            ERP124491
+#> MGYA00652201                       FALSE                            ERP143823
+#> MGYA00652185                       FALSE                            ERP143823
+#> MGYA00643487                       FALSE                            ERP143823
+#> MGYA00643486                       FALSE                            ERP143823
+#> MGYA00643485                       FALSE                            ERP143823
+#> MGYA00643484                       FALSE                            ERP143823
 #>              study_attributes.centre-name
-#> MGYA00643475  Leitat Technological Center
-#> MGYA00643477  Leitat Technological Center
-#> MGYA00575721                       US EPA
-#> MGYA00575717                       US EPA
-#> MGYA00575713                       US EPA
-#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          study_attributes.study-abstract
-#> MGYA00643475                                                                                                                                                                                                                                                                                                Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
-#> MGYA00643477                                                                                                                                                                                                                                                                                                Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
-#> MGYA00575721 The water microbiome in the drinking water distribution systems (DWDSs) of five waterworks in Finland with different raw water sources and treatment processes was explored. The sampled DWDSs were from two waterworks AB with non-disinfected, recharged groundwater as source water and from three waterworks utilizing chlorinated water (two DWDSs of surface waterworks CD and one of ground waterworks E). The water microbiome was characterized by Illumina high-throughput sequencing technology.
-#> MGYA00575717 The water microbiome in the drinking water distribution systems (DWDSs) of five waterworks in Finland with different raw water sources and treatment processes was explored. The sampled DWDSs were from two waterworks AB with non-disinfected, recharged groundwater as source water and from three waterworks utilizing chlorinated water (two DWDSs of surface waterworks CD and one of ground waterworks E). The water microbiome was characterized by Illumina high-throughput sequencing technology.
-#> MGYA00575713 The water microbiome in the drinking water distribution systems (DWDSs) of five waterworks in Finland with different raw water sources and treatment processes was explored. The sampled DWDSs were from two waterworks AB with non-disinfected, recharged groundwater as source water and from three waterworks utilizing chlorinated water (two DWDSs of surface waterworks CD and one of ground waterworks E). The water microbiome was characterized by Illumina high-throughput sequencing technology.
-#>                                                                                                                                            study_attributes.study-name
-#> MGYA00643475                                                                                                              ARGs study in bioelectrochemical remediation
-#> MGYA00643477                                                                                                              ARGs study in bioelectrochemical remediation
-#> MGYA00575721 Metagenomic and metatranscriptomic analysis of the microbial community in drinking water distribution systems of ground and surface waterworks in Finland
-#> MGYA00575717 Metagenomic and metatranscriptomic analysis of the microbial community in drinking water distribution systems of ground and surface waterworks in Finland
-#> MGYA00575713 Metagenomic and metatranscriptomic analysis of the microbial community in drinking water distribution systems of ground and surface waterworks in Finland
+#> MGYA00652201  Leitat Technological Center
+#> MGYA00652185  Leitat Technological Center
+#> MGYA00643487  Leitat Technological Center
+#> MGYA00643486  Leitat Technological Center
+#> MGYA00643485  Leitat Technological Center
+#> MGYA00643484  Leitat Technological Center
+#>                                                                                                                                                                                           study_attributes.study-abstract
+#> MGYA00652201 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00652185 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643487 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643486 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643485 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#> MGYA00643484 Elimination of several antibiotics in water by bioelectrochemical cells. The main objective is study how the concentration of antibiotic resistant genes (ARG) changed depending on the voltage application.
+#>                               study_attributes.study-name
+#> MGYA00652201 ARGs study in bioelectrochemical remediation
+#> MGYA00652185 ARGs study in bioelectrochemical remediation
+#> MGYA00643487 ARGs study in bioelectrochemical remediation
+#> MGYA00643486 ARGs study in bioelectrochemical remediation
+#> MGYA00643485 ARGs study in bioelectrochemical remediation
+#> MGYA00643484 ARGs study in bioelectrochemical remediation
 #>              study_attributes.data-origination study_attributes.last-update
-#> MGYA00643475                         SUBMITTED          2023-08-07T11:15:24
-#> MGYA00643477                         SUBMITTED          2023-08-07T11:15:24
-#> MGYA00575721                         SUBMITTED          2021-06-02T16:00:57
-#> MGYA00575717                         SUBMITTED          2021-06-02T16:00:57
-#> MGYA00575713                         SUBMITTED          2021-06-02T16:00:57
+#> MGYA00652201                         SUBMITTED          2023-08-07T11:15:24
+#> MGYA00652185                         SUBMITTED          2023-08-07T11:15:24
+#> MGYA00643487                         SUBMITTED          2023-08-07T11:15:24
+#> MGYA00643486                         SUBMITTED          2023-08-07T11:15:24
+#> MGYA00643485                         SUBMITTED          2023-08-07T11:15:24
+#> MGYA00643484                         SUBMITTED          2023-08-07T11:15:24
 #>              study_accession study_acc_type sample_biosample sample_accession
-#> MGYA00643475    MGYS00006211        studies   SAMEA112288606      ERS14399432
-#> MGYA00643477    MGYS00006211        studies   SAMEA112288600      ERS14399426
-#> MGYA00575721    MGYS00005650        studies     SAMEA7465225       ERS5222929
-#> MGYA00575717    MGYS00005650        studies     SAMEA7465215       ERS5222919
-#> MGYA00575713    MGYS00005650        studies     SAMEA7465217       ERS5222921
-#>              sample_collection-date
-#> MGYA00643475             2022-09-14
-#> MGYA00643477             2022-09-14
-#> MGYA00575721             2015-09-01
-#> MGYA00575717             2015-08-03
-#> MGYA00575713             2015-08-03
-#>                                                                                                                                                                                         sample_sample-desc
-#> MGYA00643475                                                                                                                                                                                       biomass
-#> MGYA00643477                                                                                                                                                                                       biomass
-#> MGYA00575721            Raw water source [Surface water], Treatment [Ferric sulfate coagulation, clarification, sand filtration, ozonisation, activated carbon filtration], Disinfection [UV-light, NH2Cl]
-#> MGYA00575717 Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, addition of sulphuric acid, sand filtration], Disinfection [No disinfection]
-#> MGYA00575713                             Raw water source [Artificial groundwater], Treatment [Aeration, lime stabilization, flocculation, clarification, sand filtration], Disinfection [No disinfection]
-#>              sample_sample-name sample_sample-alias  sample_last-update
-#> MGYA00643475            GREEN64             GREEN64 2023-04-23T00:43:15
-#> MGYA00643477            GREEN58             GREEN58 2023-04-23T02:07:52
-#> MGYA00575721    THS_D2_SW_CHM_R     THS_D2_SW_CHM_R 2020-11-10T18:16:23
-#> MGYA00575717     THS_A1_GW_ND_R      THS_A1_GW_ND_R 2020-11-10T18:02:30
-#> MGYA00575713     THS_B1_GW_ND_D      THS_B1_GW_ND_D 2021-06-02T14:21:12
-#>                                                              sample_project name
-#> MGYA00643475                        ARGs study in bioelectrochemical remediation
-#> MGYA00643477                        ARGs study in bioelectrochemical remediation
-#> MGYA00575721 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> MGYA00575717 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
-#> MGYA00575713 DWDSOME (Microbiome Dynamics in Drinking Water Distribution System)
+#> MGYA00652201    MGYS00006211        studies   SAMEA112288610      ERS14399436
+#> MGYA00652185    MGYS00006211        studies   SAMEA112288603      ERS14399429
+#> MGYA00643487    MGYS00006211        studies   SAMEA112288605      ERS14399431
+#> MGYA00643486    MGYS00006211        studies   SAMEA112288602      ERS14399428
+#> MGYA00643485    MGYS00006211        studies   SAMEA112288590      ERS14399416
+#> MGYA00643484    MGYS00006211        studies   SAMEA112288597      ERS14399423
+#>              sample_sample-desc               sample_environment-biome
+#> MGYA00652201            biofilm Laboratory environment (ENVO_01001405)
+#> MGYA00652185            biofilm Laboratory environment (ENVO_01001405)
+#> MGYA00643487            biomass                                   <NA>
+#> MGYA00643486            biofilm                                   <NA>
+#> MGYA00643485            biofilm                                   <NA>
+#> MGYA00643484            biofilm                                   <NA>
+#>                          sample_environment-feature
+#> MGYA00652201 Laboratory environment (ENVO_01001405)
+#> MGYA00652185 Laboratory environment (ENVO_01001405)
+#> MGYA00643487                                   <NA>
+#> MGYA00643486                                   <NA>
+#> MGYA00643485                                   <NA>
+#> MGYA00643484                                   <NA>
+#>                   sample_environment-material sample_sample-name
+#> MGYA00652201 Biofilm material (ENVO:01000156)            GREEN68
+#> MGYA00652185 Biofilm material (ENVO:01000156)            GREEN61
+#> MGYA00643487                             <NA>            GREEN63
+#> MGYA00643486                             <NA>            GREEN60
+#> MGYA00643485                             <NA>            GREEN48
+#> MGYA00643484                             <NA>            GREEN55
+#>              sample_sample-alias  sample_last-update
+#> MGYA00652201             GREEN68 2023-08-07T11:15:24
+#> MGYA00652185             GREEN61 2023-08-07T10:52:31
+#> MGYA00643487             GREEN63 2023-04-23T08:53:19
+#> MGYA00643486             GREEN60 2023-04-23T07:57:35
+#> MGYA00643485             GREEN48 2023-04-23T06:14:08
+#> MGYA00643484             GREEN55 2023-04-23T05:00:17
+#>                                       sample_project name
+#> MGYA00652201 ARGs study in bioelectrochemical remediation
+#> MGYA00652185 ARGs study in bioelectrochemical remediation
+#> MGYA00643487 ARGs study in bioelectrochemical remediation
+#> MGYA00643486 ARGs study in bioelectrochemical remediation
+#> MGYA00643485 ARGs study in bioelectrochemical remediation
+#> MGYA00643484 ARGs study in bioelectrochemical remediation
 #>              sample_geographic location (country and/or sea,region)
-#> MGYA00643475                                                  Spain
-#> MGYA00643477                                                  Spain
-#> MGYA00575721                                                Finland
-#> MGYA00575717                                                Finland
-#> MGYA00575713                                                Finland
-#>              sample_collection date sample_ENA checklist sample_acc_type
-#> MGYA00643475             2022-09-14            ERC000023         samples
-#> MGYA00643477             2022-09-14            ERC000023         samples
-#> MGYA00575721             2015-09-01            ERC000025         samples
-#> MGYA00575717             2015-08-03            ERC000025         samples
-#> MGYA00575713             2015-08-03            ERC000025         samples
-#>              assembly_accession
-#> MGYA00643475        ERZ16299693
-#> MGYA00643477        ERZ16299677
-#> MGYA00575721               <NA>
-#> MGYA00575717               <NA>
-#> MGYA00575713               <NA>
-#>                                                                        biome_string
-#> MGYA00643475                   root:Environmental:Aquatic:Freshwater:Drinking water
-#> MGYA00643477                   root:Environmental:Aquatic:Freshwater:Drinking water
-#> MGYA00575721 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> MGYA00575717 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#> MGYA00575713 root:Environmental:Aquatic:Freshwater:Drinking water:Delivery networks
-#>              sample_latitude sample_longitude sample_environment-biome
-#> MGYA00643475            <NA>             <NA>                     <NA>
-#> MGYA00643477            <NA>             <NA>                     <NA>
-#> MGYA00575721           61.92            25.75               Freshwater
-#> MGYA00575717           61.92            25.75               Freshwater
-#> MGYA00575713           61.92            25.75               Freshwater
-#>                      sample_environment-feature sample_environment-material
-#> MGYA00643475                               <NA>                        <NA>
-#> MGYA00643477                               <NA>                        <NA>
-#> MGYA00575721 Drinking water distribution system                       water
-#> MGYA00575717 Drinking water distribution system                       water
-#> MGYA00575713 Drinking water distribution system                       water
+#> MGYA00652201                                                  Spain
+#> MGYA00652185                                                  Spain
+#> MGYA00643487                                                  Spain
+#> MGYA00643486                                                  Spain
+#> MGYA00643485                                                  Spain
+#> MGYA00643484                                                  Spain
+#>              sample_collection date             sample_environment (biome)
+#> MGYA00652201             2022-09-14 Laboratory environment (ENVO_01001405)
+#> MGYA00652185             2022-09-14 Laboratory environment (ENVO_01001405)
+#> MGYA00643487             2022-09-14                                   <NA>
+#> MGYA00643486             2022-09-14                                   <NA>
+#> MGYA00643485             2022-09-14                                   <NA>
+#> MGYA00643484             2022-09-14                                   <NA>
+#>                        sample_environment (feature)
+#> MGYA00652201 Laboratory environment (ENVO_01001405)
+#> MGYA00652185 Laboratory environment (ENVO_01001405)
+#> MGYA00643487                                   <NA>
+#> MGYA00643486                                   <NA>
+#> MGYA00643485                                   <NA>
+#> MGYA00643484                                   <NA>
+#>                 sample_environment (material) sample_ENA checklist
+#> MGYA00652201 Biofilm material (ENVO:01000156)            ERC000023
+#> MGYA00652185 Biofilm material (ENVO:01000156)            ERC000023
+#> MGYA00643487                             <NA>            ERC000023
+#> MGYA00643486                             <NA>            ERC000023
+#> MGYA00643485                             <NA>            ERC000023
+#> MGYA00643484                             <NA>            ERC000023
+#>              sample_acc_type assembly_accession
+#> MGYA00652201         samples        ERZ20300939
+#> MGYA00652185         samples        ERZ20300942
+#> MGYA00643487         samples        ERZ16299686
+#> MGYA00643486         samples        ERZ16299690
+#> MGYA00643485         samples        ERZ16299649
+#> MGYA00643484         samples        ERZ16299683
+#>                                                      biome_string
+#> MGYA00652201 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00652185 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643487 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643486 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643485 root:Environmental:Aquatic:Freshwater:Drinking water
+#> MGYA00643484 root:Environmental:Aquatic:Freshwater:Drinking water
+#>              sample_collection-date sample_latitude sample_longitude
+#> MGYA00652201                   <NA>            <NA>             <NA>
+#> MGYA00652185                   <NA>            <NA>             <NA>
+#> MGYA00643487             2022-09-14            <NA>             <NA>
+#> MGYA00643486             2022-09-14            <NA>             <NA>
+#> MGYA00643485             2022-09-14            <NA>             <NA>
+#> MGYA00643484             2022-09-14            <NA>             <NA>
 #>              sample_investigation type sample_geographic location (longitude)
-#> MGYA00643475                      <NA>                                   <NA>
-#> MGYA00643477                      <NA>                                   <NA>
-#> MGYA00575721         metatranscriptome                                  25.75
-#> MGYA00575717         metatranscriptome                                  25.75
-#> MGYA00575713                metagenome                                  25.75
-#>              sample_environment (biome)       sample_environment (feature)
-#> MGYA00643475                       <NA>                               <NA>
-#> MGYA00643477                       <NA>                               <NA>
-#> MGYA00575721                 Freshwater Drinking water distribution system
-#> MGYA00575717                 Freshwater Drinking water distribution system
-#> MGYA00575713                 Freshwater Drinking water distribution system
-#>              sample_environment (material)
-#> MGYA00643475                          <NA>
-#> MGYA00643477                          <NA>
-#> MGYA00575721                         water
-#> MGYA00575717                         water
-#> MGYA00575713                         water
-#>                                 sample_environmental package
-#> MGYA00643475                                            <NA>
-#> MGYA00643477                                            <NA>
-#> MGYA00575721 miscellaneous natural or artificial environment
-#> MGYA00575717 miscellaneous natural or artificial environment
-#> MGYA00575713 miscellaneous natural or artificial environment
-#>              sample_sequencing method sample_geographic location (latitude)
-#> MGYA00643475                     <NA>                                  <NA>
-#> MGYA00643477                     <NA>                                  <NA>
-#> MGYA00575721                 Illumina                                 61.92
-#> MGYA00575717                 Illumina                                 61.92
-#> MGYA00575713                 Illumina                                 61.92
-#>              run_accession
-#> MGYA00643475          <NA>
-#> MGYA00643477          <NA>
-#> MGYA00575721    ERR4702562
-#> MGYA00575717    ERR4702552
-#> MGYA00575713    ERR4702554
+#> MGYA00652201 <NA> <NA> +#> MGYA00652185 <NA> <NA> +#> MGYA00643487 <NA> <NA> +#> MGYA00643486 <NA> <NA> +#> MGYA00643485 <NA> <NA> +#> MGYA00643484 <NA> <NA> +#> sample_environmental package sample_sequencing method +#> MGYA00652201 <NA> <NA> +#> MGYA00652185 <NA> <NA> +#> MGYA00643487 <NA> <NA> +#> MGYA00643486 <NA> <NA> +#> MGYA00643485 <NA> <NA> +#> MGYA00643484 <NA> <NA> +#> sample_geographic location (latitude) run_accession +#> MGYA00652201 <NA> <NA> +#> MGYA00652185 <NA> <NA> +#> MGYA00643487 <NA> <NA> +#> MGYA00643486 <NA> <NA> +#> MGYA00643485 <NA> <NA> +#> MGYA00643484 <NA> <NA> +#> sample_analysis-completed sample_geo-loc-name +#> MGYA00652201 <NA> <NA> +#> MGYA00652185 <NA> <NA> +#> MGYA00643487 <NA> <NA> +#> MGYA00643486 <NA> <NA> +#> MGYA00643485 <NA> <NA> +#> MGYA00643484 <NA> <NA> +#> sample_instrument model +#> MGYA00652201 <NA> +#> MGYA00652185 <NA> +#> MGYA00643487 <NA> +#> MGYA00643486 <NA> +#> MGYA00643485 <NA> +#> MGYA00643484 <NA>

Fetch microbiome data @@ -598,12 +647,12 @@

Fetch microbiome data#> A MultiAssayExperiment object of 6 listed #> experiments with user-defined names and respective classes. #> Containing an ExperimentList class object of length 6: -#> [1] microbiota: TreeSummarizedExperiment with 2029 rows and 5 columns -#> [2] go-slim: TreeSummarizedExperiment with 116 rows and 5 columns -#> [3] go-terms: TreeSummarizedExperiment with 3133 rows and 5 columns -#> [4] interpro-identifiers: TreeSummarizedExperiment with 18223 rows and 5 columns -#> [5] taxonomy: TreeSummarizedExperiment with 3112 rows and 5 columns -#> [6] taxonomy-lsu: TreeSummarizedExperiment with 3378 rows and 5 columns +#> [1] microbiota: TreeSummarizedExperiment with 3506 rows and 50 columns +#> [2] go-slim: TreeSummarizedExperiment with 116 rows and 38 columns +#> [3] go-terms: TreeSummarizedExperiment with 3133 rows and 38 columns +#> [4] interpro-identifiers: TreeSummarizedExperiment with 18223 rows and 38 columns +#> [5] taxonomy: TreeSummarizedExperiment with 3617 rows and 50 columns +#> [6] taxonomy-lsu: TreeSummarizedExperiment with 3378 rows and 42 columns #> Functionality: #> experiments() - obtain the ExperimentList instance #> colData() - the primary/phenotype DataFrame @@ -617,15 +666,14 @@

Fetch microbiome data
 mae[[1]]
 #> class: TreeSummarizedExperiment 
-#> dim: 2029 5 
+#> dim: 3506 50 
 #> metadata(0):
 #> assays(1): counts
-#> rownames(2029): 200154 3353 ... 5820 100053
-#> rowData names(8): Kingdom Phylum ... Species taxonomy
-#> colnames(5): MGYA00575713 MGYA00575717 MGYA00575721 MGYA00643475
-#>   MGYA00643477
-#> colData names(61): analysis_experiment.type analysis_pipeline.version
-#>   ... sample_geographic.location..latitude. run_accession
+#> rownames(3506): 82608 62797 ... 5820 6794
+#> rowData names(9): Kingdom Phylum ... taxonomy1 taxonomy
+#> colnames(50): MGYA00144458 MGYA00144419 ... MGYA00652185 MGYA00652201
+#> colData names(64): analysis_analysis.status analysis_pipeline.version
+#>   ... sample_geo.loc.name sample_instrument.model
 #> reducedDimNames(0):
 #> mainExpName: NULL
 #> altExpNames(0):
@@ -644,11 +692,8 @@ 

Fetch microbiome data#> Loading required package: scuttle #> Loading required package: ggplot2 -plotColData(mae[[1]], "shannon", x = "sample_sample.desc") -#> Warning: Groups with fewer than two data points have been dropped. -#> Groups with fewer than two data points have been dropped. -#> Groups with fewer than two data points have been dropped.

-

+plotColData(mae[[1]], "shannon", x = "sample_environment..biome.") +

 # Agglomerate data
 altExps(mae[[1]]) <- splitByRanks(mae[[1]])
@@ -671,7 +716,7 @@ 

Fetch microbiome data FUN = vegan::vegdist, method = "bray") # Plot plotReducedDim( - mae[[1]], "MDS", colour_by = "sample_sample.desc")

+ mae[[1]], "MDS", colour_by = "sample_environment..biome.")

@@ -691,60 +736,82 @@

Fetch sequence filesdl_urls$attributes.description.label == "Predicted alpha tmRNA", ] head(target_urls) #> type id -#> 24 analyses ERZ16299693_alpha_tmRNA.RF01849.fasta.gz -#> 74 analyses ERZ16299677_alpha_tmRNA.RF01849.fasta.gz -#> 121 analyses ERR4702562_alpha_tmRNA.RF01849.fasta.gz -#> 174 analyses ERR4702554_alpha_tmRNA.RF01849.fasta.gz +#> 24 analyses ERZ20300939_alpha_tmRNA.RF01849.fasta.gz +#> 75 analyses ERZ20300942_alpha_tmRNA.RF01849.fasta.gz +#> 126 analyses ERZ16299686_alpha_tmRNA.RF01849.fasta.gz +#> 177 analyses ERZ16299690_alpha_tmRNA.RF01849.fasta.gz +#> 228 analyses ERZ16299649_alpha_tmRNA.RF01849.fasta.gz +#> 279 analyses ERZ16299683_alpha_tmRNA.RF01849.fasta.gz #> attributes.alias attributes.file.format.name -#> 24 ERZ16299693_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 74 ERZ16299677_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 121 ERR4702562_alpha_tmRNA.RF01849.fasta.gz FASTA -#> 174 ERR4702554_alpha_tmRNA.RF01849.fasta.gz FASTA +#> 24 ERZ20300939_alpha_tmRNA.RF01849.fasta.gz FASTA +#> 75 ERZ20300942_alpha_tmRNA.RF01849.fasta.gz FASTA +#> 126 ERZ16299686_alpha_tmRNA.RF01849.fasta.gz FASTA +#> 177 ERZ16299690_alpha_tmRNA.RF01849.fasta.gz FASTA +#> 228 ERZ16299649_alpha_tmRNA.RF01849.fasta.gz FASTA +#> 279 ERZ16299683_alpha_tmRNA.RF01849.fasta.gz FASTA #> attributes.file.format.extension attributes.file.format.compression #> 24 fasta TRUE -#> 74 fasta TRUE -#> 121 fasta TRUE -#> 174 fasta TRUE +#> 75 fasta TRUE +#> 126 fasta TRUE +#> 177 fasta TRUE +#> 228 fasta TRUE +#> 279 fasta TRUE #> attributes.description.label #> 24 Predicted alpha tmRNA -#> 74 Predicted alpha tmRNA -#> 121 Predicted alpha tmRNA -#> 174 Predicted alpha tmRNA +#> 75 Predicted alpha tmRNA +#> 126 Predicted alpha tmRNA +#> 177 Predicted alpha tmRNA +#> 228 Predicted alpha tmRNA +#> 279 Predicted alpha tmRNA #> attributes.description.description #> 24 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 74 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 121 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) -#> 174 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) +#> 75 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) +#> 126 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) +#> 177 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) +#> 228 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) +#> 279 Predicted Alphaproteobacteria transfer-messenger RNA (RF01849) #> attributes.group.type attributes.file.checksum.checksum #> 24 non-coding RNAs -#> 74 non-coding RNAs -#> 121 non-coding RNAs -#> 174 non-coding RNAs +#> 75 non-coding RNAs +#> 126 non-coding RNAs +#> 177 non-coding RNAs +#> 228 non-coding RNAs +#> 279 non-coding RNAs #> attributes.file.checksum.checksum.algorithm #> 24 -#> 74 -#> 121 -#> 174 +#> 75 +#> 126 +#> 177 +#> 228 +#> 279 #> relationships.pipeline.data.type relationships.pipeline.data.id #> 24 pipelines 5.0 -#> 74 pipelines 5.0 -#> 121 pipelines 5.0 -#> 174 pipelines 5.0 +#> 75 pipelines 5.0 +#> 126 pipelines 5.0 +#> 177 pipelines 5.0 +#> 228 pipelines 5.0 +#> 279 pipelines 5.0 #> relationships.pipeline.related #> 24 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 74 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 121 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json -#> 174 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json +#> 75 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json +#> 126 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json +#> 177 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json +#> 228 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json +#> 279 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/5.0?format=json #> download_url -#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643475/file/ERZ16299693_alpha_tmRNA.RF01849.fasta.gz -#> 74 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643477/file/ERZ16299677_alpha_tmRNA.RF01849.fasta.gz -#> 121 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00575721/file/ERR4702562_alpha_tmRNA.RF01849.fasta.gz -#> 174 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00575713/file/ERR4702554_alpha_tmRNA.RF01849.fasta.gz +#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz +#> 75 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00652185/file/ERZ20300942_alpha_tmRNA.RF01849.fasta.gz +#> 126 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643487/file/ERZ16299686_alpha_tmRNA.RF01849.fasta.gz +#> 177 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643486/file/ERZ16299690_alpha_tmRNA.RF01849.fasta.gz +#> 228 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643485/file/ERZ16299649_alpha_tmRNA.RF01849.fasta.gz +#> 279 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00643484/file/ERZ16299683_alpha_tmRNA.RF01849.fasta.gz #> accession -#> 24 MGYA00643475 -#> 74 MGYA00643477 -#> 121 MGYA00575721 -#> 174 MGYA00575713

+#> 24 MGYA00652201 +#> 75 MGYA00652185 +#> 126 MGYA00643487 +#> 177 MGYA00643486 +#> 228 MGYA00643485 +#> 279 MGYA00643484

Finally, we can download the files with getFile().

 # Just select a single file from the target_urls list for demonstration.
@@ -753,10 +820,10 @@ 

Fetch sequence files
 # Where are the files?
 cached_location
-#> [1] "/tmp/Rtmp5cE7Yz/filebb468bb04c3"

+#> [1] "/.MGnifyR_cache/analyses/MGYA00652201/file/ERZ20300939_alpha_tmRNA.RF01849.fasta.gz"
 sessionInfo()
-#> R Under development (unstable) (2024-01-23 r85822)
+#> R Under development (unstable) (2024-01-31 r85845)
 #> Platform: x86_64-pc-linux-gnu
 #> Running under: Ubuntu 22.04.3 LTS
 #> 
diff --git a/articles/MGnifyR_files/figure-html/calculate_diversity-1.png b/articles/MGnifyR_files/figure-html/calculate_diversity-1.png
index ac682bb5..68030359 100644
Binary files a/articles/MGnifyR_files/figure-html/calculate_diversity-1.png and b/articles/MGnifyR_files/figure-html/calculate_diversity-1.png differ
diff --git a/articles/MGnifyR_files/figure-html/pcoa-1.png b/articles/MGnifyR_files/figure-html/pcoa-1.png
index a9716776..b7f95115 100644
Binary files a/articles/MGnifyR_files/figure-html/pcoa-1.png and b/articles/MGnifyR_files/figure-html/pcoa-1.png differ
diff --git a/articles/MGnifyR_files/figure-html/plot_abundance-1.png b/articles/MGnifyR_files/figure-html/plot_abundance-1.png
index 96d3fed0..fb0951e1 100644
Binary files a/articles/MGnifyR_files/figure-html/plot_abundance-1.png and b/articles/MGnifyR_files/figure-html/plot_abundance-1.png differ
diff --git a/articles/MGnifyR_long.html b/articles/MGnifyR_long.html
index d98c8b4d..e26aab6c 100644
--- a/articles/MGnifyR_long.html
+++ b/articles/MGnifyR_long.html
@@ -80,7 +80,7 @@
     

Create a client @@ -138,7 +213,28 @@

Create a client +mg +#> An object of class "MgnifyClient" +#> Slot "databaseUrl": +#> [1] "https://www.ebi.ac.uk/metagenomics/api/v1" +#> +#> Slot "authTok": +#> [1] NA +#> +#> Slot "useCache": +#> [1] FALSE +#> +#> Slot "cacheDir": +#> [1] "/__w/MGnifyR/MGnifyR/vignettes/.MGnifyR_cache" +#> +#> Slot "showWarnings": +#> [1] FALSE +#> +#> Slot "clearCache": +#> [1] FALSE +#> +#> Slot "verbose": +#> [1] TRUE

It’s recommended that local caching is enabled with useCache = TRUE. Queries to the MGnify API can be quite slow, particularly when retrieving multipage results for many analyses @@ -204,14 +300,108 @@

Search datamg, "samples", latitude_gte=60.0, experiment_type="amplicon", biome_name="Soil", instrument_platform = "Illumina", max.hits = 10)
-head(northpolar)
+head(northpolar) +#> latitude longitude biosample accession analysis-completed +#> SRS518212 78.7857 -103.5513 SAMN02484608 SRS518212 2016-05-04 +#> SRS522877 78.7857 -103.5513 SAMN02484612 SRS522877 2016-05-04 +#> SRS522878 78.7849 -103.5551 SAMN02484613 SRS522878 2016-05-04 +#> SRS522883 78.7839 -103.5574 SAMN02484618 SRS522883 2016-05-04 +#> SRS522884 78.7834 -103.5482 SAMN02484619 SRS522884 2016-05-04 +#> SRS522886 78.7854 -103.5433 SAMN02484621 SRS522886 2016-05-04 +#> sample-desc environment-biome sample-name +#> SRS518212 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B1 +#> SRS522877 Keywords: GSC:MIxS MIMS:5.0 tundra ER-I1 +#> SRS522878 Keywords: GSC:MIxS MIMS:5.0 tundra ER-I2 +#> SRS522883 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B7 +#> SRS522884 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B8 +#> SRS522886 Keywords: GSC:MIxS MIMS:5.0 tundra ER-B10 +#> sample-alias last-update investigation type +#> SRS518212 ER-B1 2024-01-18T21:26:45 metagenome +#> SRS522877 ER-I1 2024-01-18T21:26:28 metagenome +#> SRS522878 ER-I2 2024-01-18T21:26:12 metagenome +#> SRS522883 ER-B7 2024-01-18T21:25:55 metagenome +#> SRS522884 ER-B8 2024-01-18T21:25:39 metagenome +#> SRS522886 ER-B10 2024-01-18T21:25:06 metagenome +#> project name +#> SRS518212 A community genomics investigation of fungal adaptation to cold +#> SRS522877 A community genomics investigation of fungal adaptation to cold +#> SRS522878 A community genomics investigation of fungal adaptation to cold +#> SRS522883 A community genomics investigation of fungal adaptation to cold +#> SRS522884 A community genomics investigation of fungal adaptation to cold +#> SRS522886 A community genomics investigation of fungal adaptation to cold +#> geographic location (longitude) geographic location (depth) +#> SRS518212 -103.55135 0-0.1m +#> SRS522877 -103.55135 0-0.1m +#> SRS522878 -103.555133 0-0.1m +#> SRS522883 -103.5574 0-0.1m +#> SRS522884 -103.548183 0-0.1m +#> SRS522886 -103.543267 0-0.1m +#> geographic location (country and/or sea,region) collection date +#> SRS518212 Canada: Isachsen, Ellef Ringnes Island 2005-08 +#> SRS522877 Canada: Isachsen, Ellef Ringnes Island 2005-08 +#> SRS522878 Canada: Isachsen, Ellef Ringnes Island 2005-08 +#> SRS522883 Canada: Isachsen, Ellef Ringnes Island 2005-08 +#> SRS522884 Canada: Isachsen, Ellef Ringnes Island 2005-08 +#> SRS522886 Canada: Isachsen, Ellef Ringnes Island 2005-08 +#> environment (biome) environment (feature) environment (material) +#> SRS518212 tundra frost boil soil +#> SRS522877 tundra interboil soil +#> SRS522878 tundra interboil soil +#> SRS522883 tundra frost boil soil +#> SRS522884 tundra frost boil soil +#> SRS522886 tundra frost boil soil +#> environmental package depth elevation +#> SRS518212 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 41 +#> SRS522877 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 41 +#> SRS522878 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 40 +#> SRS522883 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 32 +#> SRS522884 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 30 +#> SRS522886 MIMS.me;MIGS/MIMS/MIMARKS.soil 0-0.1m 40 +#> miscellaneous parameter geographic location (latitude) +#> SRS518212 Boil 1 78.78565 +#> SRS522877 Interboil 1 78.78565 +#> SRS522878 Interboil 2 78.784917 +#> SRS522883 Boil 7 78.783933 +#> SRS522884 Boil 8 78.783433 +#> SRS522886 Boil 10 78.78535 +#> NCBI sample classification instrument model acc_type +#> SRS518212 410658 Illumina MiSeq samples +#> SRS522877 410658 Illumina MiSeq samples +#> SRS522878 410658 Illumina MiSeq samples +#> SRS522883 410658 Illumina MiSeq samples +#> SRS522884 410658 Illumina MiSeq samples +#> SRS522886 410658 Illumina MiSeq samples +#> biome studies type +#> SRS518212 root:Environmental:Terrestrial:Soil MGYS00000850 samples +#> SRS522877 root:Environmental:Terrestrial:Soil MGYS00000850 samples +#> SRS522878 root:Environmental:Terrestrial:Soil MGYS00000850 samples +#> SRS522883 root:Environmental:Terrestrial:Soil MGYS00000850 samples +#> SRS522884 root:Environmental:Terrestrial:Soil MGYS00000850 samples +#> SRS522886 root:Environmental:Terrestrial:Soil MGYS00000850 samples +#> collection-date +#> SRS518212 <NA> +#> SRS522877 <NA> +#> SRS522878 <NA> +#> SRS522883 <NA> +#> SRS522884 <NA> +#> SRS522886 2005-08-01

Specifying an accession parameter will restrict results to just those matching that particular entry, be it a study, sample or run. For example, to retrieve information for study “MGYS00002891”:

 study_samples <- doQuery(mg, "studies", accession="MGYS00002891")
-study_samples
+study_samples +#> accession bioproject samples-count is-private +#> MGYS00002891 MGYS00002891 PRJNA384570 29 FALSE +#> secondary-accession centre-name +#> MGYS00002891 SRP105345 University of Minnesota +#> study-abstract +#> MGYS00002891 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> study-name data-origination last-update +#> MGYS00002891 Queensland Marine Sediment HARVESTED 2019-11-07T16:33:46 +#> acc_type biomes type +#> MGYS00002891 studies root:Environmental:Aquatic:Marine:Sediment studies

Find relevent analyses accessions @@ -237,11 +427,13 @@

Find relevent analysesanalyses_accessions <- searchAnalysis( mg, type="studies", accession = study_samples$accession)

-# For demonstrative purpose, take only few samples
-set.seed(595)
-analyses_accessions <- sample(analyses_accessions, 5)
-
-analyses_accessions
+analyses_accessions +#> [1] "MGYA00209648" "MGYA00209649" "MGYA00209650" "MGYA00209651" "MGYA00209652" +#> [6] "MGYA00209653" "MGYA00209654" "MGYA00209655" "MGYA00209656" "MGYA00209657" +#> [11] "MGYA00209658" "MGYA00209659" "MGYA00209660" "MGYA00209661" "MGYA00209662" +#> [16] "MGYA00209663" "MGYA00209664" "MGYA00209665" "MGYA00209666" "MGYA00209667" +#> [21] "MGYA00209668" "MGYA00209669" "MGYA00209670" "MGYA00209671" "MGYA00209672" +#> [26] "MGYA00209673" "MGYA00209674" "MGYA00209675" "MGYA00209676"

A useful side effect of the above call is that some attribute metadata for each sample has now been retrieved and stored in the local cache. Thus subsequent API calls for these samples (which will occur @@ -266,7 +458,196 @@

Fetch metadata
 analyses_metadata <- getMetadata(mg, analyses_accessions)
-head(analyses_metadata)
+head(analyses_metadata) +#> analysis_analysis-status analysis_pipeline-version +#> MGYA00209648 completed 4.1 +#> MGYA00209649 completed 4.1 +#> MGYA00209650 completed 4.1 +#> MGYA00209651 completed 4.1 +#> MGYA00209652 completed 4.1 +#> MGYA00209653 completed 4.1 +#> analysis_experiment-type analysis_accession analysis_is-private +#> MGYA00209648 amplicon MGYA00209648 FALSE +#> MGYA00209649 amplicon MGYA00209649 FALSE +#> MGYA00209650 amplicon MGYA00209650 FALSE +#> MGYA00209651 amplicon MGYA00209651 FALSE +#> MGYA00209652 amplicon MGYA00209652 FALSE +#> MGYA00209653 amplicon MGYA00209653 FALSE +#> analysis_complete-time analysis_instrument-platform +#> MGYA00209648 2018-09-06T00:00:00 ILLUMINA +#> MGYA00209649 2018-09-06T00:00:00 ILLUMINA +#> MGYA00209650 2018-09-06T00:00:00 ILLUMINA +#> MGYA00209651 2018-09-06T00:00:00 ILLUMINA +#> MGYA00209652 2018-09-06T00:00:00 ILLUMINA +#> MGYA00209653 2018-09-06T00:00:00 ILLUMINA +#> analysis_instrument-model analysis_Submitted nucleotide sequences +#> MGYA00209648 Illumina HiSeq 2500 1441694 +#> MGYA00209649 Illumina HiSeq 2500 650265 +#> MGYA00209650 Illumina HiSeq 2500 1207289 +#> MGYA00209651 Illumina HiSeq 2500 469703 +#> MGYA00209652 Illumina HiSeq 2500 606584 +#> MGYA00209653 Illumina HiSeq 2500 692146 +#> analysis_Nucleotide sequences after format-specific filtering +#> MGYA00209648 1441359 +#> MGYA00209649 650108 +#> MGYA00209650 1206954 +#> MGYA00209651 469585 +#> MGYA00209652 606429 +#> MGYA00209653 691971 +#> analysis_Nucleotide sequences after length filtering +#> MGYA00209648 1272787 +#> MGYA00209649 578060 +#> MGYA00209650 1090737 +#> MGYA00209651 419171 +#> MGYA00209652 536462 +#> MGYA00209653 623965 +#> analysis_Nucleotide sequences after undetermined bases filtering +#> MGYA00209648 1272787 +#> MGYA00209649 578060 +#> MGYA00209650 1090737 +#> MGYA00209651 419171 +#> MGYA00209652 536462 +#> MGYA00209653 623965 +#> analysis_Reads with predicted CDS +#> MGYA00209648 22713 +#> MGYA00209649 11079 +#> MGYA00209650 19717 +#> MGYA00209651 7586 +#> MGYA00209652 11348 +#> MGYA00209653 25746 +#> analysis_Reads with predicted RNA +#> MGYA00209648 1243457 +#> MGYA00209649 564004 +#> MGYA00209650 1065641 +#> MGYA00209651 410161 +#> MGYA00209652 523120 +#> MGYA00209653 595537 +#> analysis_Reads with InterProScan match analysis_Predicted CDS +#> MGYA00209648 54 22807 +#> MGYA00209649 44 11215 +#> MGYA00209650 372 19758 +#> MGYA00209651 34 7604 +#> MGYA00209652 41 11356 +#> MGYA00209653 280 25977 +#> analysis_Predicted CDS with InterProScan match +#> MGYA00209648 55 +#> MGYA00209649 44 +#> MGYA00209650 373 +#> MGYA00209651 35 +#> MGYA00209652 41 +#> MGYA00209653 280 +#> analysis_Total InterProScan matches analysis_acc_type +#> MGYA00209648 97 analysis-jobs +#> MGYA00209649 58 analysis-jobs +#> MGYA00209650 589 analysis-jobs +#> MGYA00209651 58 analysis-jobs +#> MGYA00209652 47 analysis-jobs +#> MGYA00209653 485 analysis-jobs +#> study_attributes.accession study_attributes.bioproject +#> MGYA00209648 MGYS00002891 PRJNA384570 +#> MGYA00209649 MGYS00002891 PRJNA384570 +#> MGYA00209650 MGYS00002891 PRJNA384570 +#> MGYA00209651 MGYS00002891 PRJNA384570 +#> MGYA00209652 MGYS00002891 PRJNA384570 +#> MGYA00209653 MGYS00002891 PRJNA384570 +#> study_attributes.samples-count study_attributes.is-private +#> MGYA00209648 29 FALSE +#> MGYA00209649 29 FALSE +#> MGYA00209650 29 FALSE +#> MGYA00209651 29 FALSE +#> MGYA00209652 29 FALSE +#> MGYA00209653 29 FALSE +#> study_attributes.secondary-accession study_attributes.centre-name +#> MGYA00209648 SRP105345 University of Minnesota +#> MGYA00209649 SRP105345 University of Minnesota +#> MGYA00209650 SRP105345 University of Minnesota +#> MGYA00209651 SRP105345 University of Minnesota +#> MGYA00209652 SRP105345 University of Minnesota +#> MGYA00209653 SRP105345 University of Minnesota +#> study_attributes.study-abstract +#> MGYA00209648 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> MGYA00209649 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> MGYA00209650 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> MGYA00209651 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> MGYA00209652 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> MGYA00209653 Characterization of bacterial communities in marine sediments from Gladstone and Heron Island +#> study_attributes.study-name study_attributes.data-origination +#> MGYA00209648 Queensland Marine Sediment HARVESTED +#> MGYA00209649 Queensland Marine Sediment HARVESTED +#> MGYA00209650 Queensland Marine Sediment HARVESTED +#> MGYA00209651 Queensland Marine Sediment HARVESTED +#> MGYA00209652 Queensland Marine Sediment HARVESTED +#> MGYA00209653 Queensland Marine Sediment HARVESTED +#> study_attributes.last-update study_accession study_acc_type +#> MGYA00209648 2019-11-07T16:33:46 MGYS00002891 studies +#> MGYA00209649 2019-11-07T16:33:46 MGYS00002891 studies +#> MGYA00209650 2019-11-07T16:33:46 MGYS00002891 studies +#> MGYA00209651 2019-11-07T16:33:46 MGYS00002891 studies +#> MGYA00209652 2019-11-07T16:33:46 MGYS00002891 studies +#> MGYA00209653 2019-11-07T16:33:46 MGYS00002891 studies +#> sample_latitude sample_biosample sample_longitude sample_accession +#> MGYA00209648 -23.749 SAMN06842047 151.3654 SRS2151215 +#> MGYA00209649 -23.7692 SAMN06842069 151.3167 SRS2151190 +#> MGYA00209650 -23.6158 SAMN06842067 152.1597 SRS2151193 +#> MGYA00209651 -23.7692 SAMN06842071 151.3167 SRS2151189 +#> MGYA00209652 -23.4369 SAMN06842064 151.9813 SRS2151195 +#> MGYA00209653 -23.6158 SAMN06842065 152.1597 SRS2151196 +#> sample_analysis-completed +#> MGYA00209648 2018-09-06 +#> MGYA00209649 2018-09-06 +#> MGYA00209650 2018-09-06 +#> MGYA00209651 2018-09-06 +#> MGYA00209652 2018-09-06 +#> MGYA00209653 2018-09-06 +#> sample_geo-loc-name sample_sample-desc +#> MGYA00209648 Australia: Queensland composite +#> MGYA00209649 Australia: Queensland core +#> MGYA00209650 Australia: Queensland, Great Barrier Reef core +#> MGYA00209651 Australia: Queensland core +#> MGYA00209652 Australia: Queensland, Great Barrier Reef core +#> MGYA00209653 Australia: Queensland, Great Barrier Reef core +#> sample_sample-name sample_sample-alias sample_last-update +#> MGYA00209648 Facing island box 2 Facing island box 2 2018-09-06T01:18:52 +#> MGYA00209649 Gladstone Harbour 7A Gladstone Harbour 7A 2018-09-06T01:18:52 +#> MGYA00209650 Fitzroy reef 2C Fitzroy reef 2C 2018-09-06T01:18:52 +#> MGYA00209651 Gladstone Harbour 7C Gladstone Harbour 7C 2018-09-06T01:18:52 +#> MGYA00209652 Heron Island 4D Heron Island 4D 2018-09-06T01:18:52 +#> MGYA00209653 Fitzroy reef 2A Fitzroy reef 2A 2018-09-06T01:18:52 +#> sample_geographic location (longitude) +#> MGYA00209648 151.36536 +#> MGYA00209649 151.31674 +#> MGYA00209650 152.15974 +#> MGYA00209651 151.31674 +#> MGYA00209652 151.98132 +#> MGYA00209653 152.15974 +#> sample_geographic location (country and/or sea,region) +#> MGYA00209648 Australia: Queensland +#> MGYA00209649 Australia: Queensland +#> MGYA00209650 Australia: Queensland, Great Barrier Reef +#> MGYA00209651 Australia: Queensland +#> MGYA00209652 Australia: Queensland, Great Barrier Reef +#> MGYA00209653 Australia: Queensland, Great Barrier Reef +#> sample_geographic location (latitude) sample_instrument model +#> MGYA00209648 -23.749048 Illumina HiSeq 2500 +#> MGYA00209649 -23.769222 Illumina HiSeq 2500 +#> MGYA00209650 -23.615824 Illumina HiSeq 2500 +#> MGYA00209651 -23.769222 Illumina HiSeq 2500 +#> MGYA00209652 -23.436857 Illumina HiSeq 2500 +#> MGYA00209653 -23.615824 Illumina HiSeq 2500 +#> sample_acc_type run_accession +#> MGYA00209648 samples SRR5483782 +#> MGYA00209649 samples SRR5483760 +#> MGYA00209650 samples SRR5483762 +#> MGYA00209651 samples SRR5483758 +#> MGYA00209652 samples SRR5483765 +#> MGYA00209653 samples SRR5483764 +#> biome_string sample_depth +#> MGYA00209648 root:Environmental:Aquatic:Marine:Sediment <NA> +#> MGYA00209649 root:Environmental:Aquatic:Marine:Sediment 0.0 +#> MGYA00209650 root:Environmental:Aquatic:Marine:Sediment 20.0 +#> MGYA00209651 root:Environmental:Aquatic:Marine:Sediment 20.0 +#> MGYA00209652 root:Environmental:Aquatic:Marine:Sediment 30.0 +#> MGYA00209653 root:Environmental:Aquatic:Marine:Sediment 0.0

The resulting data.frame has columns with names prefixed with their source type. For example, “sample_xxx” columns correspond to metadata gleaned from querying an accession’s sample entry. MGnify @@ -290,7 +671,8 @@

Fetch metadataknown_depths <- analyses_metadata[ !is.na(as.numeric(analyses_metadata$sample_depth)), ] # How many are left? -dim(known_depths) +dim(known_depths) +#> [1] 26 49

Fetch microbiome data @@ -319,7 +701,23 @@

Amplicon sequencing
 tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE)

-tse
+tse +#> class: TreeSummarizedExperiment +#> dim: 3689 29 +#> metadata(0): +#> assays(1): counts +#> rownames(3689): 92640 251937 ... 233398 265506 +#> rowData names(8): Kingdom Phylum ... Species taxonomy1 +#> colnames(29): MGYA00209651 MGYA00209670 ... MGYA00209657 MGYA00209667 +#> colData names(49): analysis_experiment.type analysis_pipeline.version +#> ... biome_string sample_depth +#> reducedDimNames(0): +#> mainExpName: NULL +#> altExpNames(0): +#> rowLinks: NULL +#> rowTree: NULL +#> colLinks: NULL +#> colTree: NULL

TreeSE object is uniquely positioned to support SummarizedExperiment-based microbiome data manipulation and visualization. Moreover, it enables access to miaverse tools. For example, we can estimate @@ -328,17 +726,26 @@

Amplicon sequencingtse <- estimateDiversity(tse, index = "shannon") library(scater) +#> Loading required package: scuttle +#> Loading required package: ggplot2 plotColData(tse, "shannon", x = "sample_geo.loc.name") +

 library(miaViz)
+#> Loading required package: ggraph
 
-plotAbundance(tse)
+plotAbundance(tse[!is.na( rowData(tse)[["Kingdom"]] ), ], rank = "Kingdom") +

If needed, TreeSE can be converted to phyloseq.

 pseq <- makePhyloseqFromTreeSE(tse)
-pseq
+pseq +#> phyloseq-class experiment-level object +#> otu_table() OTU Table: [ 3689 taxa and 29 samples ] +#> sample_data() Sample Data: [ 29 samples by 50 sample variables ] +#> tax_table() Taxonomy Table: [ 3689 taxa by 7 taxonomic ranks ]

Metagenomics @@ -370,10 +777,9 @@

Metagenomics# Combine analyses all_accessions <- c(soil, human, marine)

-# Subset the accessions by taking 5 random analyses
-set.seed(74)
-all_accessions <- sample(all_accessions, 5)
-all_accessions
+head(all_accessions) +#> [1] "MGYA00097621" "MGYA00097622" "MGYA00097623" "MGYA00097624" "MGYA00097625" +#> [6] "MGYA00097626"

The first step with this new accession list is, as previously, to retrieve the associated metadata using getMetadata(), and as seen with the doQuery() results, the returned @@ -385,7 +791,343 @@

Metagenomicsfull_metadata <- getMetadata(mg, all_accessions)
 colnames(full_metadata)
-head(full_metadata)
+#> [1] "analysis_experiment-type" +#> [2] "analysis_pipeline-version" +#> [3] "analysis_analysis-status" +#> [4] "analysis_accession" +#> [5] "analysis_is-private" +#> [6] "analysis_complete-time" +#> [7] "analysis_instrument-platform" +#> [8] "analysis_instrument-model" +#> [9] "analysis_Submitted nucleotide sequences" +#> [10] "analysis_Nucleotide sequences after format-specific filtering" +#> [11] "analysis_Nucleotide sequences after length filtering" +#> [12] "analysis_Nucleotide sequences after undetermined bases filtering" +#> [13] "analysis_Reads with predicted CDS" +#> [14] "analysis_Reads with predicted RNA" +#> [15] "analysis_Reads with InterProScan match" +#> [16] "analysis_Predicted CDS" +#> [17] "analysis_Predicted CDS with InterProScan match" +#> [18] "analysis_Total InterProScan matches" +#> [19] "analysis_acc_type" +#> [20] "study_attributes.accession" +#> [21] "study_attributes.bioproject" +#> [22] "study_attributes.samples-count" +#> [23] "study_attributes.is-private" +#> [24] "study_attributes.secondary-accession" +#> [25] "study_attributes.centre-name" +#> [26] "study_attributes.study-abstract" +#> [27] "study_attributes.study-name" +#> [28] "study_attributes.data-origination" +#> [29] "study_attributes.last-update" +#> [30] "study_accession" +#> [31] "study_acc_type" +#> [32] "sample_latitude" +#> [33] "sample_longitude" +#> [34] "sample_biosample" +#> [35] "sample_accession" +#> [36] "sample_analysis-completed" +#> [37] "sample_geo-loc-name" +#> [38] "sample_sample-desc" +#> [39] "sample_environment-biome" +#> [40] "sample_environment-feature" +#> [41] "sample_environment-material" +#> [42] "sample_sample-name" +#> [43] "sample_sample-alias" +#> [44] "sample_last-update" +#> [45] "sample_investigation type" +#> [46] "sample_project name" +#> [47] "sample_geographic location (depth)" +#> [48] "sample_collection date" +#> [49] "sample_sequencing method" +#> [50] "sample_geographic location (elevation)" +#> [51] "sample_NCBI sample classification" +#> [52] "sample_instrument model" +#> [53] "sample_ENA checklist" +#> [54] "sample_sediment environmental package" +#> [55] "sample_acc_type" +#> [56] "run_accession" +#> [57] "biome_string" +#> [58] "sample_collection-date" +#> [59] "sample_host-tax-id" +#> [60] "sample_species" +#> [61] "sample_geographic location (longitude)" +#> [62] "sample_geographic location (country and/or sea,region)" +#> [63] "sample_environment (biome)" +#> [64] "sample_environment (feature)" +#> [65] "sample_environment (material)" +#> [66] "sample_environmental package" +#> [67] "sample_host taxid" +#> [68] "sample_geographic location (latitude)" +#> [69] "sample_host scientific name" +#> [70] "sample_human gut environmental package" +head(full_metadata) +#> analysis_experiment-type analysis_pipeline-version +#> MGYA00097631 metagenomic 3.0 +#> MGYA00097632 metagenomic 3.0 +#> MGYA00097633 metagenomic 3.0 +#> MGYA00097634 metagenomic 3.0 +#> MGYA00097635 metagenomic 3.0 +#> MGYA00097636 metagenomic 3.0 +#> analysis_analysis-status analysis_accession analysis_is-private +#> MGYA00097631 completed MGYA00097631 FALSE +#> MGYA00097632 completed MGYA00097632 FALSE +#> MGYA00097633 completed MGYA00097633 FALSE +#> MGYA00097634 completed MGYA00097634 FALSE +#> MGYA00097635 completed MGYA00097635 FALSE +#> MGYA00097636 completed MGYA00097636 FALSE +#> analysis_complete-time analysis_instrument-platform +#> MGYA00097631 2017-02-10T00:00:00 ILLUMINA +#> MGYA00097632 2017-02-10T00:00:00 ILLUMINA +#> MGYA00097633 2017-02-10T00:00:00 ILLUMINA +#> MGYA00097634 2017-02-10T00:00:00 ILLUMINA +#> MGYA00097635 2017-02-10T00:00:00 ILLUMINA +#> MGYA00097636 2017-02-10T00:00:00 ILLUMINA +#> analysis_instrument-model analysis_Submitted nucleotide sequences +#> MGYA00097631 Illumina HiSeq 4000 14263292 +#> MGYA00097632 Illumina HiSeq 4000 14017313 +#> MGYA00097633 Illumina HiSeq 4000 17493693 +#> MGYA00097634 Illumina HiSeq 4000 12803734 +#> MGYA00097635 Illumina HiSeq 4000 19918196 +#> MGYA00097636 Illumina HiSeq 4000 11768981 +#> analysis_Nucleotide sequences after format-specific filtering +#> MGYA00097631 14221062 +#> MGYA00097632 14007866 +#> MGYA00097633 17476376 +#> MGYA00097634 12794042 +#> MGYA00097635 19901713 +#> MGYA00097636 11759662 +#> analysis_Nucleotide sequences after length filtering +#> MGYA00097631 13131530 +#> MGYA00097632 13251661 +#> MGYA00097633 16306229 +#> MGYA00097634 12081026 +#> MGYA00097635 18776052 +#> MGYA00097636 11124185 +#> analysis_Nucleotide sequences after undetermined bases filtering +#> MGYA00097631 13131432 +#> MGYA00097632 13251540 +#> MGYA00097633 16306111 +#> MGYA00097634 12080915 +#> MGYA00097635 18775871 +#> MGYA00097636 11124089 +#> analysis_Reads with predicted CDS +#> MGYA00097631 12706029 +#> MGYA00097632 12854908 +#> MGYA00097633 15781126 +#> MGYA00097634 11692069 +#> MGYA00097635 18174796 +#> MGYA00097636 10767961 +#> analysis_Reads with predicted RNA +#> MGYA00097631 16190 +#> MGYA00097632 15055 +#> MGYA00097633 20933 +#> MGYA00097634 14064 +#> MGYA00097635 23868 +#> MGYA00097636 15644 +#> analysis_Reads with InterProScan match analysis_Predicted CDS +#> MGYA00097631 4564209 12799168 +#> MGYA00097632 4905147 12958349 +#> MGYA00097633 5734721 15898933 +#> MGYA00097634 4303972 11775442 +#> MGYA00097635 6944293 18316341 +#> MGYA00097636 4064336 10844621 +#> analysis_Predicted CDS with InterProScan match +#> MGYA00097631 4567359 +#> MGYA00097632 4909395 +#> MGYA00097633 5738555 +#> MGYA00097634 4306951 +#> MGYA00097635 6950012 +#> MGYA00097636 4067254 +#> analysis_Total InterProScan matches analysis_acc_type +#> MGYA00097631 7258924 analysis-jobs +#> MGYA00097632 7837311 analysis-jobs +#> MGYA00097633 9124572 analysis-jobs +#> MGYA00097634 6849131 analysis-jobs +#> MGYA00097635 11109826 analysis-jobs +#> MGYA00097636 6486426 analysis-jobs +#> study_attributes.accession study_attributes.bioproject +#> MGYA00097631 MGYS00001447 PRJEB19235 +#> MGYA00097632 MGYS00001447 PRJEB19235 +#> MGYA00097633 MGYS00001447 PRJEB19235 +#> MGYA00097634 MGYS00001447 PRJEB19235 +#> MGYA00097635 MGYS00001447 PRJEB19235 +#> MGYA00097636 MGYS00001447 PRJEB19235 +#> study_attributes.samples-count study_attributes.is-private +#> MGYA00097631 38 FALSE +#> MGYA00097632 38 FALSE +#> MGYA00097633 38 FALSE +#> MGYA00097634 38 FALSE +#> MGYA00097635 38 FALSE +#> MGYA00097636 38 FALSE +#> study_attributes.secondary-accession study_attributes.centre-name +#> MGYA00097631 ERP021219 University of York +#> MGYA00097632 ERP021219 University of York +#> MGYA00097633 ERP021219 University of York +#> MGYA00097634 ERP021219 University of York +#> MGYA00097635 ERP021219 University of York +#> MGYA00097636 ERP021219 University of York +#> study_attributes.study-abstract +#> MGYA00097631 Samples were collected from natural and realigned sites, and during Summer and Winter. +#> MGYA00097632 Samples were collected from natural and realigned sites, and during Summer and Winter. +#> MGYA00097633 Samples were collected from natural and realigned sites, and during Summer and Winter. +#> MGYA00097634 Samples were collected from natural and realigned sites, and during Summer and Winter. +#> MGYA00097635 Samples were collected from natural and realigned sites, and during Summer and Winter. +#> MGYA00097636 Samples were collected from natural and realigned sites, and during Summer and Winter. +#> study_attributes.study-name +#> MGYA00097631 Samples from salt marshes in the south of England +#> MGYA00097632 Samples from salt marshes in the south of England +#> MGYA00097633 Samples from salt marshes in the south of England +#> MGYA00097634 Samples from salt marshes in the south of England +#> MGYA00097635 Samples from salt marshes in the south of England +#> MGYA00097636 Samples from salt marshes in the south of England +#> study_attributes.data-origination study_attributes.last-update +#> MGYA00097631 SUBMITTED 2017-02-10T11:32:47 +#> MGYA00097632 SUBMITTED 2017-02-10T11:32:47 +#> MGYA00097633 SUBMITTED 2017-02-10T11:32:47 +#> MGYA00097634 SUBMITTED 2017-02-10T11:32:47 +#> MGYA00097635 SUBMITTED 2017-02-10T11:32:47 +#> MGYA00097636 SUBMITTED 2017-02-10T11:32:47 +#> study_accession study_acc_type sample_latitude sample_longitude +#> MGYA00097631 MGYS00001447 studies 51.7972 0.921 +#> MGYA00097632 MGYS00001447 studies 51.7972 0.921 +#> MGYA00097633 MGYS00001447 studies 51.7865 0.8609 +#> MGYA00097634 MGYS00001447 studies 51.7865 0.8609 +#> MGYA00097635 MGYS00001447 studies <NA> <NA> +#> MGYA00097636 MGYS00001447 studies 51.7968 0.9218 +#> sample_biosample sample_accession sample_analysis-completed +#> MGYA00097631 SAMEA56090668 ERS1512902 2017-02-10 +#> MGYA00097632 SAMEA56092168 ERS1512904 2017-02-10 +#> MGYA00097633 SAMEA56099668 ERS1512914 2017-02-10 +#> MGYA00097634 SAMEA56095168 ERS1512908 2017-02-10 +#> MGYA00097635 <NA> <NA> <NA> +#> MGYA00097636 SAMEA56094418 ERS1512907 2017-02-10 +#> sample_geo-loc-name sample_sample-desc +#> MGYA00097631 United Kingdom Natural salt marsh, Mersea Island, Winter +#> MGYA00097632 United Kingdom Natural salt marsh, Mersea Island, Winter +#> MGYA00097633 United Kingdom Natural salt marsh, Abbotts Hall, Winter +#> MGYA00097634 United Kingdom Natural salt marsh, Abbotts Hall, Summer +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 United Kingdom Realigned salt marsh, Mersea Island, Winter +#> sample_environment-biome sample_environment-feature +#> MGYA00097631 marine salt marsh biome sea shore +#> MGYA00097632 marine salt marsh biome sea shore +#> MGYA00097633 marine salt marsh biome sea shore +#> MGYA00097634 marine salt marsh biome sea shore +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 marine salt marsh biome sea shore +#> sample_environment-material +#> MGYA00097631 sediment +#> MGYA00097632 sediment +#> MGYA00097633 sediment +#> MGYA00097634 sediment +#> MGYA00097635 <NA> +#> MGYA00097636 sediment +#> sample_sample-name sample_sample-alias +#> MGYA00097631 Natural salt marsh, Mersea Island, Winter W MINM 1A +#> MGYA00097632 Natural salt marsh, Mersea Island, Winter W MINM 3A +#> MGYA00097633 Natural salt marsh, Abbotts Hall, Winter W AHNM 2A +#> MGYA00097634 Natural salt marsh, Abbotts Hall, Summer S AHNM 1A +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 Realigned salt marsh, Mersea Island, Winter W MIRM 3A +#> sample_last-update sample_investigation type sample_project name +#> MGYA00097631 2017-02-10T11:32:50 metagenome Southern Saltmarshes +#> MGYA00097632 2017-02-10T11:32:50 metagenome Southern Saltmarshes +#> MGYA00097633 2017-02-10T11:32:50 metagenome Southern Saltmarshes +#> MGYA00097634 2017-02-10T11:32:50 metagenome Southern Saltmarshes +#> MGYA00097635 <NA> <NA> <NA> +#> MGYA00097636 2017-02-10T11:32:50 metagenome Southern Saltmarshes +#> sample_geographic location (depth) sample_collection date +#> MGYA00097631 0.05 2015-02-24 +#> MGYA00097632 0.05 2015-02-24 +#> MGYA00097633 0.05 2015-03-28 +#> MGYA00097634 0.05 2014-07-03 +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 0.05 2015-02-24 +#> sample_sequencing method sample_geographic location (elevation) +#> MGYA00097631 Illumina HiSeq 0.3 +#> MGYA00097632 Illumina HiSeq 0.3 +#> MGYA00097633 Illumina HiSeq 2 +#> MGYA00097634 Illumina HiSeq 2 +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 Illumina HiSeq 1 +#> sample_NCBI sample classification sample_instrument model +#> MGYA00097631 749907 Illumina HiSeq 4000 +#> MGYA00097632 749907 Illumina HiSeq 4000 +#> MGYA00097633 749907 Illumina HiSeq 4000 +#> MGYA00097634 749907 Illumina HiSeq 4000 +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 749907 Illumina HiSeq 4000 +#> sample_ENA checklist +#> MGYA00097631 GSC MIxS sediment (ERC000021) +#> MGYA00097632 GSC MIxS sediment (ERC000021) +#> MGYA00097633 GSC MIxS sediment (ERC000021) +#> MGYA00097634 GSC MIxS sediment (ERC000021) +#> MGYA00097635 <NA> +#> MGYA00097636 GSC MIxS sediment (ERC000021) +#> sample_sediment environmental package sample_acc_type +#> MGYA00097631 sediment samples +#> MGYA00097632 sediment samples +#> MGYA00097633 sediment samples +#> MGYA00097634 sediment samples +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 sediment samples +#> run_accession biome_string +#> MGYA00097631 ERR1811630 root:Environmental:Terrestrial:Soil +#> MGYA00097632 ERR1811632 root:Environmental:Terrestrial:Soil +#> MGYA00097633 ERR1811642 root:Environmental:Terrestrial:Soil +#> MGYA00097634 ERR1811636 root:Environmental:Terrestrial:Soil +#> MGYA00097635 ERR1811628 <NA> +#> MGYA00097636 ERR1811635 root:Environmental:Terrestrial:Soil +#> sample_collection-date sample_host-tax-id sample_species +#> MGYA00097631 <NA> <NA> <NA> +#> MGYA00097632 <NA> <NA> <NA> +#> MGYA00097633 <NA> <NA> <NA> +#> MGYA00097634 <NA> <NA> <NA> +#> MGYA00097635 <NA> <NA> <NA> +#> MGYA00097636 <NA> <NA> <NA> +#> sample_geographic location (longitude) +#> MGYA00097631 <NA> +#> MGYA00097632 <NA> +#> MGYA00097633 <NA> +#> MGYA00097634 <NA> +#> MGYA00097635 <NA> +#> MGYA00097636 <NA> +#> sample_geographic location (country and/or sea,region) +#> MGYA00097631 <NA> +#> MGYA00097632 <NA> +#> MGYA00097633 <NA> +#> MGYA00097634 <NA> +#> MGYA00097635 <NA> +#> MGYA00097636 <NA> +#> sample_environment (biome) sample_environment (feature) +#> MGYA00097631 <NA> <NA> +#> MGYA00097632 <NA> <NA> +#> MGYA00097633 <NA> <NA> +#> MGYA00097634 <NA> <NA> +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 <NA> <NA> +#> sample_environment (material) sample_environmental package +#> MGYA00097631 <NA> <NA> +#> MGYA00097632 <NA> <NA> +#> MGYA00097633 <NA> <NA> +#> MGYA00097634 <NA> <NA> +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 <NA> <NA> +#> sample_host taxid sample_geographic location (latitude) +#> MGYA00097631 <NA> <NA> +#> MGYA00097632 <NA> <NA> +#> MGYA00097633 <NA> <NA> +#> MGYA00097634 <NA> <NA> +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 <NA> <NA> +#> sample_host scientific name sample_human gut environmental package +#> MGYA00097631 <NA> <NA> +#> MGYA00097632 <NA> <NA> +#> MGYA00097633 <NA> <NA> +#> MGYA00097634 <NA> <NA> +#> MGYA00097635 <NA> <NA> +#> MGYA00097636 <NA> <NA>

From full_metadata we get an idea of the type of data we’re dealing with, and can extract useul information such as sequencing platform, source biome, etc. The next code snippet tallies a few of @@ -400,9 +1142,17 @@

Metagenomics#Distribution of sample source material: table(full_metadata$`sample_environment-material`) +#> +#> sediment stool water +#> 27 38 412 #What sequencing machine(s) were used? table(full_metadata$`sample_instrument model`) +#> +#> Illumina HiScanSQ Illumina HiSeq 2000 Illumina HiSeq 2500 Illumina HiSeq 4000 +#> 38 352 2 25 +#> NextSeq 500 +#> 60 # Boxplot of raw read counts: ggplot( @@ -410,7 +1160,9 @@

Metagenomicsas.numeric(`analysis_Submitted nucleotide sequences`)))) + geom_boxplot(aes(group=study_accession)) + theme_bw() + - ylab("log(submitted reads)") + ylab("log(submitted reads)") +#> Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`). +

Again, we can fetch the data by calling getResult(). bulk.dl=TRUE has the potential to significantly speed up data retrieval. MGnify makes its functional results available in two @@ -444,7 +1196,22 @@

Metagenomics
 mae <- getResult(mg, all_accessions, bulk.dl = TRUE)
-mae
+mae +#> A MultiAssayExperiment object of 4 listed +#> experiments with user-defined names and respective classes. +#> Containing an ExperimentList class object of length 4: +#> [1] microbiota: TreeSummarizedExperiment with 32401 rows and 487 columns +#> [2] go-slim: TreeSummarizedExperiment with 116 rows and 487 columns +#> [3] go-terms: TreeSummarizedExperiment with 2640 rows and 487 columns +#> [4] interpro-identifiers: TreeSummarizedExperiment with 15818 rows and 487 columns +#> Functionality: +#> experiments() - obtain the ExperimentList instance +#> colData() - the primary/phenotype DataFrame +#> sampleMap() - the sample coordination DataFrame +#> `$`, `[`, `[[` - extract colData columns, subset, or experiment +#> *Format() - convert into a long or wide DataFrame +#> assays() - convert ExperimentList to a SimpleList of matrices +#> exportClass() - save data to flat files

For metagenomic samples, the result is MultiAssayExperiment (MAE) which links multiple TreeSE objects into one dataset. These TreeSE objects include taxonomic @@ -453,7 +1220,24 @@

Metagenomics
-mae[[2]]
+mae[[2]] +#> class: TreeSummarizedExperiment +#> dim: 116 487 +#> metadata(0): +#> assays(1): counts +#> rownames(116): GO:0000015 GO:0000150 ... GO:1902494 GO:1990204 +#> rowData names(10): description category ... Genus Species +#> colnames(487): MGYA00083332 MGYA00083120 ... MGYA00097653 MGYA00097655 +#> colData names(70): analysis_analysis.status analysis_pipeline.version +#> ... sample_host.scientific.name +#> sample_human.gut.environmental.package +#> reducedDimNames(0): +#> mainExpName: NULL +#> altExpNames(0): +#> rowLinks: NULL +#> rowTree: NULL +#> colLinks: NULL +#> colTree: NULL

We can perform principal component analysis to microbial profiling data by utilizing miaverse tools.

@@ -465,6 +1249,7 @@ 

Metagenomics= vegan::vegdist, method = "bray") # Plot plotReducedDim(mae[[1]], "MDS", colour_by = "sample_environment.feature")

+

+head(target_urls) +#> type id +#> 2 analyses ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz +#> 21 analyses ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz +#> 40 analyses ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz +#> 59 analyses ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz +#> 78 analyses ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz +#> attributes.alias attributes.file.format.name +#> 2 ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 21 ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 40 ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 59 ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 78 ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> attributes.file.format.extension attributes.file.format.compression +#> 2 fasta TRUE +#> 21 fasta TRUE +#> 40 fasta TRUE +#> 59 fasta TRUE +#> 78 fasta TRUE +#> attributes.description.label +#> 2 Predicted CDS with annotation +#> 21 Predicted CDS with annotation +#> 40 Predicted CDS with annotation +#> 59 Predicted CDS with annotation +#> 78 Predicted CDS with annotation +#> attributes.description.description attributes.group.type +#> 2 Predicted coding sequences with InterPro match (FASTA) Sequence data +#> 21 Predicted coding sequences with InterPro match (FASTA) Sequence data +#> 40 Predicted coding sequences with InterPro match (FASTA) Sequence data +#> 59 Predicted coding sequences with InterPro match (FASTA) Sequence data +#> 78 Predicted coding sequences with InterPro match (FASTA) Sequence data +#> attributes.file.checksum.checksum +#> 2 +#> 21 +#> 40 +#> 59 +#> 78 +#> attributes.file.checksum.checksum.algorithm relationships.pipeline.data.type +#> 2 pipelines +#> 21 pipelines +#> 40 pipelines +#> 59 pipelines +#> 78 pipelines +#> relationships.pipeline.data.id +#> 2 3.0 +#> 21 3.0 +#> 40 3.0 +#> 59 3.0 +#> 78 3.0 +#> relationships.pipeline.related +#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 21 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 40 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 59 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 78 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> download_url +#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083117/file/ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz +#> 21 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083245/file/ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz +#> 40 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00096984/file/ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz +#> 59 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083497/file/ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz +#> 78 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083193/file/ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz +#> accession +#> 2 MGYA00083117 +#> 21 MGYA00083245 +#> 40 MGYA00096984 +#> 59 MGYA00083497 +#> 78 MGYA00083193

To list the types of available files, and guide the filtering, something like the following might be useful.

-table(dl_urls$attributes.description.label)
+table(dl_urls$attributes.description.label) +#> +#> Complete GO annotation GO slim annotation +#> 5 5 +#> InterPro matches OTUs, counts and taxonomic assignments +#> 5 15 +#> Phylogenetic tree Predicted CDS with annotation +#> 5 5 +#> Predicted CDS without annotation Predicted ORF without annotation +#> 5 5 +#> Predicted tRNAs Processed nucleotide reads +#> 5 5 +#> Processed reads with annotation Processed reads with pCDS +#> 5 5 +#> Processed reads without annotation Reads encoding 16S rRNA +#> 5 5 +#> Reads encoding 23S rRNA Reads encoding 5S rRNA +#> 5 5 +#> Taxa abundance distribution +#> 5

Unlike other MGnifyR functions, searchFile() is not limited to analyses, and by specifying accession_type other results types may be @@ -538,7 +1408,8 @@

Fetch sequence filescached_location <- c(cached_location1, cached_location2)
 # Where are the files?
-cached_location
+cached_location +#> [1] "/tmp/RtmpeKYwQ1/file4c191e04bb83" "/tmp/RtmpeKYwQ1/file4c1921132881"

A second download option is available, which allows built-in parsing of the file. If we know ahead of time what processing will be performed, it may be possible to integrate it into a function, pass this function @@ -579,9 +1450,114 @@

Fetch sequence filesamoC_seq_counts <- getFile( mg, target_urls$download_url[[1]], read_func = getAmoCseqs)
-amoC_seq_counts
+amoC_seq_counts +#> [1] "/tmp/RtmpeKYwQ1/file4c1941f3c62" +sessionInfo() +#> R Under development (unstable) (2024-01-31 r85845) +#> Platform: x86_64-pc-linux-gnu +#> Running under: Ubuntu 22.04.3 LTS +#> +#> Matrix products: default +#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 +#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 +#> +#> locale: +#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C +#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 +#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 +#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C +#> [9] LC_ADDRESS=C LC_TELEPHONE=C +#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +#> +#> time zone: UTC +#> tzcode source: system (glibc) +#> +#> attached base packages: +#> [1] stats4 stats graphics grDevices utils datasets methods +#> [8] base +#> +#> other attached packages: +#> [1] miaViz_1.11.0 ggraph_2.1.0 +#> [3] scater_1.31.2 ggplot2_3.4.4 +#> [5] scuttle_1.13.0 MGnifyR_0.99.15 +#> [7] biomformat_1.31.0 mia_1.11.1 +#> [9] MultiAssayExperiment_1.29.0 TreeSummarizedExperiment_2.11.0 +#> [11] Biostrings_2.71.2 XVector_0.43.1 +#> [13] SingleCellExperiment_1.25.0 SummarizedExperiment_1.33.3 +#> [15] Biobase_2.63.0 GenomicRanges_1.55.2 +#> [17] GenomeInfoDb_1.39.5 IRanges_2.37.1 +#> [19] S4Vectors_0.41.3 BiocGenerics_0.49.1 +#> [21] MatrixGenerics_1.15.0 matrixStats_1.2.0 +#> [23] knitr_1.45 BiocStyle_2.31.0 +#> +#> loaded via a namespace (and not attached): +#> [1] jsonlite_1.8.8 magrittr_2.0.3 +#> [3] ggbeeswarm_0.7.2 farver_2.1.1 +#> [5] rmarkdown_2.25 fs_1.6.3 +#> [7] zlibbioc_1.49.0 ragg_1.2.7 +#> [9] vctrs_0.6.5 multtest_2.59.0 +#> [11] memoise_2.0.1 DelayedMatrixStats_1.25.1 +#> [13] RCurl_1.98-1.14 ggtree_3.11.0 +#> [15] BiocBaseUtils_1.5.0 htmltools_0.5.7 +#> [17] S4Arrays_1.3.3 BiocNeighbors_1.21.2 +#> [19] Rhdf5lib_1.25.1 gridGraphics_0.5-1 +#> [21] SparseArray_1.3.3 rhdf5_2.47.2 +#> [23] sass_0.4.8 bslib_0.6.1 +#> [25] desc_1.4.3 plyr_1.8.9 +#> [27] DECIPHER_2.31.1 cachem_1.0.8 +#> [29] igraph_2.0.1.1 iterators_1.0.14 +#> [31] lifecycle_1.0.4 pkgconfig_2.0.3 +#> [33] rsvd_1.0.5 Matrix_1.6-5 +#> [35] R6_2.5.1 fastmap_1.1.1 +#> [37] GenomeInfoDbData_1.2.11 aplot_0.2.2 +#> [39] digest_0.6.34 ggnewscale_0.4.9 +#> [41] colorspace_2.1-0 patchwork_1.2.0 +#> [43] irlba_2.3.5.1 textshaping_0.3.7 +#> [45] RSQLite_2.3.5 vegan_2.6-4 +#> [47] beachmat_2.19.1 labeling_0.4.3 +#> [49] fansi_1.0.6 urltools_1.7.3 +#> [51] polyclip_1.10-6 httr_1.4.7 +#> [53] abind_1.4-5 mgcv_1.9-1 +#> [55] compiler_4.4.0 bit64_4.0.5 +#> [57] withr_3.0.0 BiocParallel_1.37.0 +#> [59] viridis_0.6.5 DBI_1.2.1 +#> [61] highr_0.10 ggforce_0.4.1 +#> [63] MASS_7.3-60.2 DelayedArray_0.29.1 +#> [65] bluster_1.13.0 permute_0.9-7 +#> [67] tools_4.4.0 vipor_0.4.7 +#> [69] beeswarm_0.4.0 ape_5.7-1 +#> [71] glue_1.7.0 nlme_3.1-164 +#> [73] rhdf5filters_1.15.1 grid_4.4.0 +#> [75] ade4_1.7-22 cluster_2.1.6 +#> [77] reshape2_1.4.4 generics_0.1.3 +#> [79] gtable_0.3.4 tidyr_1.3.1 +#> [81] data.table_1.15.0 tidygraph_1.3.1 +#> [83] BiocSingular_1.19.0 ScaledMatrix_1.11.0 +#> [85] utf8_1.2.4 foreach_1.5.2 +#> [87] ggrepel_0.9.5 pillar_1.9.0 +#> [89] stringr_1.5.1 yulab.utils_0.1.4 +#> [91] splines_4.4.0 tweenr_2.0.2 +#> [93] dplyr_1.1.4 treeio_1.27.0 +#> [95] lattice_0.22-5 survival_3.5-7 +#> [97] bit_4.0.5 tidyselect_1.2.0 +#> [99] DirichletMultinomial_1.45.0 gridExtra_2.3 +#> [101] bookdown_0.37 phyloseq_1.47.0 +#> [103] xfun_0.41 graphlayouts_1.1.0 +#> [105] stringi_1.8.3 ggfun_0.1.4 +#> [107] lazyeval_0.2.2 yaml_2.3.8 +#> [109] evaluate_0.23 codetools_0.2-19 +#> [111] tibble_3.2.1 BiocManager_1.30.22 +#> [113] ggplotify_0.1.2 cli_3.6.2 +#> [115] systemfonts_1.0.5 munsell_0.5.0 +#> [117] jquerylib_0.1.4 Rcpp_1.0.12 +#> [119] triebeard_0.4.1 parallel_4.4.0 +#> [121] pkgdown_2.0.7 blob_1.2.4 +#> [123] sparseMatrixStats_1.15.0 bitops_1.0-7 +#> [125] decontam_1.23.0 viridisLite_0.4.2 +#> [127] tidytree_0.4.6 scales_1.3.0 +#> [129] purrr_1.0.2 crayon_1.5.2 +#> [131] rlang_1.1.3 diff --git a/articles/MGnifyR_long_files/figure-html/calculate_diversity-1.png b/articles/MGnifyR_long_files/figure-html/calculate_diversity-1.png new file mode 100644 index 00000000..3db93d70 Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/calculate_diversity-1.png differ diff --git a/articles/MGnifyR_long_files/figure-html/full_metatdata_explore-1.png b/articles/MGnifyR_long_files/figure-html/full_metatdata_explore-1.png new file mode 100644 index 00000000..ff234c47 Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/full_metatdata_explore-1.png differ diff --git a/articles/MGnifyR_long_files/figure-html/pcoa-1.png b/articles/MGnifyR_long_files/figure-html/pcoa-1.png new file mode 100644 index 00000000..488c67a4 Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/pcoa-1.png differ diff --git a/articles/MGnifyR_long_files/figure-html/plot_abundance-1.png b/articles/MGnifyR_long_files/figure-html/plot_abundance-1.png new file mode 100644 index 00000000..783393bb Binary files /dev/null and b/articles/MGnifyR_long_files/figure-html/plot_abundance-1.png differ diff --git a/pkgdown.yml b/pkgdown.yml index c000027b..a2101cd3 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,5 +4,5 @@ pkgdown_sha: ~ articles: MGnifyR: MGnifyR.html MGnifyR_long: MGnifyR_long.html -last_built: 2024-02-02T14:21Z +last_built: 2024-02-04T14:15Z diff --git a/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS b/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS index 87af23ed..f3627111 100644 Binary files a/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS and b/reference/.MGnifyR_cache/analyses/MGYA00377505_format_json/.RDS differ diff --git a/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS b/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS index c9c07bb1..0357d0c7 100644 Binary files a/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS and b/reference/.MGnifyR_cache/samples/ERS2967391_format_json/.RDS differ diff --git a/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS b/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS index eb564458..9e27b927 100644 Binary files a/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS and b/reference/.MGnifyR_cache/studies/MGYS00005058_format_json/.RDS differ diff --git a/reference/searchAnalysis.html b/reference/searchAnalysis.html index 7b74cba5..cfd2c0a4 100644 --- a/reference/searchAnalysis.html +++ b/reference/searchAnalysis.html @@ -118,11 +118,7 @@

Examples

result <- searchAnalysis( mg, "samples", c("SRS4392730", "SRS4392743")) #> Fetching analyses... -#> | | | 0% -#> Warning: runs/SRR8611399/analyses (500 error): -#> Warning: runs/SRR8611399 (500 error): -#> Warning: assemblies/root:Engineered (404 error): Not found. -#> | |=================================== | 50% | |======================================================================| 100% +#> | | | 0% | |=================================== | 50% | |======================================================================| 100% # }