diff --git a/articles/MGnifyR_long.html b/articles/MGnifyR_long.html index e26aab6..33b5b37 100644 --- a/articles/MGnifyR_long.html +++ b/articles/MGnifyR_long.html @@ -1280,95 +1280,106 @@

Fetch sequence filestarget_urls <- dl_urls[ dl_urls$attributes.description.label == "Predicted CDS with annotation",] head(target_urls) -#> type id -#> 2 analyses ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz -#> 21 analyses ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz -#> 40 analyses ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz -#> 59 analyses ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz -#> 78 analyses ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz -#> attributes.alias attributes.file.format.name -#> 2 ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz FASTA -#> 21 ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz FASTA -#> 40 ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz FASTA -#> 59 ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz FASTA -#> 78 ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz FASTA -#> attributes.file.format.extension attributes.file.format.compression -#> 2 fasta TRUE -#> 21 fasta TRUE -#> 40 fasta TRUE -#> 59 fasta TRUE -#> 78 fasta TRUE -#> attributes.description.label -#> 2 Predicted CDS with annotation -#> 21 Predicted CDS with annotation -#> 40 Predicted CDS with annotation -#> 59 Predicted CDS with annotation -#> 78 Predicted CDS with annotation -#> attributes.description.description attributes.group.type -#> 2 Predicted coding sequences with InterPro match (FASTA) Sequence data -#> 21 Predicted coding sequences with InterPro match (FASTA) Sequence data -#> 40 Predicted coding sequences with InterPro match (FASTA) Sequence data -#> 59 Predicted coding sequences with InterPro match (FASTA) Sequence data -#> 78 Predicted coding sequences with InterPro match (FASTA) Sequence data -#> attributes.file.checksum.checksum -#> 2 -#> 21 -#> 40 -#> 59 -#> 78 -#> attributes.file.checksum.checksum.algorithm relationships.pipeline.data.type -#> 2 pipelines -#> 21 pipelines -#> 40 pipelines -#> 59 pipelines -#> 78 pipelines -#> relationships.pipeline.data.id -#> 2 3.0 -#> 21 3.0 -#> 40 3.0 -#> 59 3.0 -#> 78 3.0 -#> relationships.pipeline.related -#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json -#> 21 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json -#> 40 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json -#> 59 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json -#> 78 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json -#> download_url -#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083117/file/ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz -#> 21 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083245/file/ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz -#> 40 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00096984/file/ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz -#> 59 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083497/file/ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz -#> 78 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083193/file/ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz -#> accession -#> 2 MGYA00083117 -#> 21 MGYA00083245 -#> 40 MGYA00096984 -#> 59 MGYA00083497 -#> 78 MGYA00083193 +#> type id +#> 3 analyses ERR1811630_MERGED_FASTQ_CDS_annotated.faa.gz +#> 24 analyses ERR1811632_MERGED_FASTQ_CDS_annotated.faa.gz +#> 45 analyses ERR1811642_MERGED_FASTQ_CDS_annotated.faa.gz +#> 68 analyses ERR1811636_MERGED_FASTQ_CDS_annotated.faa.gz +#> 90 analyses ERR1811628_MERGED_FASTQ_CDS_annotated.faa.gz +#> 115 analyses ERR1811635_MERGED_FASTQ_CDS_annotated.faa.gz +#> attributes.alias attributes.file.format.name +#> 3 ERR1811630_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 24 ERR1811632_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 45 ERR1811642_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 68 ERR1811636_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 90 ERR1811628_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> 115 ERR1811635_MERGED_FASTQ_CDS_annotated.faa.gz FASTA +#> attributes.file.format.extension attributes.file.format.compression +#> 3 fasta TRUE +#> 24 fasta TRUE +#> 45 fasta TRUE +#> 68 fasta TRUE +#> 90 fasta TRUE +#> 115 fasta TRUE +#> attributes.description.label +#> 3 Predicted CDS with annotation +#> 24 Predicted CDS with annotation +#> 45 Predicted CDS with annotation +#> 68 Predicted CDS with annotation +#> 90 Predicted CDS with annotation +#> 115 Predicted CDS with annotation +#> attributes.description.description +#> 3 Predicted coding sequences with InterPro match (FASTA) +#> 24 Predicted coding sequences with InterPro match (FASTA) +#> 45 Predicted coding sequences with InterPro match (FASTA) +#> 68 Predicted coding sequences with InterPro match (FASTA) +#> 90 Predicted coding sequences with InterPro match (FASTA) +#> 115 Predicted coding sequences with InterPro match (FASTA) +#> attributes.group.type attributes.file.checksum.checksum +#> 3 Sequence data +#> 24 Sequence data +#> 45 Sequence data +#> 68 Sequence data +#> 90 Sequence data +#> 115 Sequence data +#> attributes.file.checksum.checksum.algorithm +#> 3 +#> 24 +#> 45 +#> 68 +#> 90 +#> 115 +#> relationships.pipeline.data.type relationships.pipeline.data.id +#> 3 pipelines 3.0 +#> 24 pipelines 3.0 +#> 45 pipelines 3.0 +#> 68 pipelines 3.0 +#> 90 pipelines 3.0 +#> 115 pipelines 3.0 +#> relationships.pipeline.related +#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 45 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 68 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 90 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> 115 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json +#> download_url +#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097631/file/ERR1811630_MERGED_FASTQ_CDS_annotated.faa.gz +#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097632/file/ERR1811632_MERGED_FASTQ_CDS_annotated.faa.gz +#> 45 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097633/file/ERR1811642_MERGED_FASTQ_CDS_annotated.faa.gz +#> 68 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097634/file/ERR1811636_MERGED_FASTQ_CDS_annotated.faa.gz +#> 90 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097635/file/ERR1811628_MERGED_FASTQ_CDS_annotated.faa.gz +#> 115 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097636/file/ERR1811635_MERGED_FASTQ_CDS_annotated.faa.gz +#> accession +#> 3 MGYA00097631 +#> 24 MGYA00097632 +#> 45 MGYA00097633 +#> 68 MGYA00097634 +#> 90 MGYA00097635 +#> 115 MGYA00097636

To list the types of available files, and guide the filtering, something like the following might be useful.

 table(dl_urls$attributes.description.label)
 #> 
 #>                 Complete GO annotation                     GO slim annotation 
-#>                                      5                                      5 
+#>                                    478                                    478 
 #>                       InterPro matches OTUs, counts and taxonomic assignments 
-#>                                      5                                     15 
+#>                                    483                                   1434 
 #>                      Phylogenetic tree          Predicted CDS with annotation 
-#>                                      5                                      5 
+#>                                    478                                    477 
 #>       Predicted CDS without annotation       Predicted ORF without annotation 
-#>                                      5                                      5 
+#>                                    480                                    488 
 #>                        Predicted tRNAs             Processed nucleotide reads 
-#>                                      5                                      5 
+#>                                    478                                    508 
 #>        Processed reads with annotation              Processed reads with pCDS 
-#>                                      5                                      5 
+#>                                    477                                    508 
 #>     Processed reads without annotation                Reads encoding 16S rRNA 
-#>                                      5                                      5 
+#>                                    488                                    478 
 #>                Reads encoding 23S rRNA                 Reads encoding 5S rRNA 
-#>                                      5                                      5 
+#>                                    478                                    478 
 #>            Taxa abundance distribution 
-#>                                      5
+#> 475

Unlike other MGnifyR functions, searchFile() is not limited to analyses, and by specifying accession_type other results types may be @@ -1409,7 +1420,7 @@

Fetch sequence files
 # Where are the files?
 cached_location
-#> [1] "/tmp/RtmpeKYwQ1/file4c191e04bb83" "/tmp/RtmpeKYwQ1/file4c1921132881"
+#> [1] "/tmp/RtmpeKYwQ1/file4c1937cbdfd5" "/tmp/RtmpeKYwQ1/file4c19a3b3548"

A second download option is available, which allows built-in parsing of the file. If we know ahead of time what processing will be performed, it may be possible to integrate it into a function, pass this function diff --git a/pkgdown.yml b/pkgdown.yml index a2101cd..ab9adf7 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,5 +4,5 @@ pkgdown_sha: ~ articles: MGnifyR: MGnifyR.html MGnifyR_long: MGnifyR_long.html -last_built: 2024-02-04T14:15Z +last_built: 2024-02-04T15:13Z