target_urls <- dl_urls[
dl_urls$attributes.description.label == "Predicted CDS with annotation",]
head(target_urls)
-#> type id
-#> 2 analyses ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 21 analyses ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 40 analyses ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 59 analyses ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 78 analyses ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz
-#> attributes.alias attributes.file.format.name
-#> 2 ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
-#> 21 ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
-#> 40 ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
-#> 59 ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
-#> 78 ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
-#> attributes.file.format.extension attributes.file.format.compression
-#> 2 fasta TRUE
-#> 21 fasta TRUE
-#> 40 fasta TRUE
-#> 59 fasta TRUE
-#> 78 fasta TRUE
-#> attributes.description.label
-#> 2 Predicted CDS with annotation
-#> 21 Predicted CDS with annotation
-#> 40 Predicted CDS with annotation
-#> 59 Predicted CDS with annotation
-#> 78 Predicted CDS with annotation
-#> attributes.description.description attributes.group.type
-#> 2 Predicted coding sequences with InterPro match (FASTA) Sequence data
-#> 21 Predicted coding sequences with InterPro match (FASTA) Sequence data
-#> 40 Predicted coding sequences with InterPro match (FASTA) Sequence data
-#> 59 Predicted coding sequences with InterPro match (FASTA) Sequence data
-#> 78 Predicted coding sequences with InterPro match (FASTA) Sequence data
-#> attributes.file.checksum.checksum
-#> 2
-#> 21
-#> 40
-#> 59
-#> 78
-#> attributes.file.checksum.checksum.algorithm relationships.pipeline.data.type
-#> 2 pipelines
-#> 21 pipelines
-#> 40 pipelines
-#> 59 pipelines
-#> 78 pipelines
-#> relationships.pipeline.data.id
-#> 2 3.0
-#> 21 3.0
-#> 40 3.0
-#> 59 3.0
-#> 78 3.0
-#> relationships.pipeline.related
-#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
-#> 21 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
-#> 40 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
-#> 59 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
-#> 78 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
-#> download_url
-#> 2 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083117/file/ERR1662433_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 21 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083245/file/ERR1662523_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 40 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00096984/file/ERR1809146_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 59 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083497/file/ERR1662188_MERGED_FASTQ_CDS_annotated.faa.gz
-#> 78 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00083193/file/ERR1662459_MERGED_FASTQ_CDS_annotated.faa.gz
-#> accession
-#> 2 MGYA00083117
-#> 21 MGYA00083245
-#> 40 MGYA00096984
-#> 59 MGYA00083497
-#> 78 MGYA00083193
+#> type id
+#> 3 analyses ERR1811630_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 24 analyses ERR1811632_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 45 analyses ERR1811642_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 68 analyses ERR1811636_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 90 analyses ERR1811628_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 115 analyses ERR1811635_MERGED_FASTQ_CDS_annotated.faa.gz
+#> attributes.alias attributes.file.format.name
+#> 3 ERR1811630_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
+#> 24 ERR1811632_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
+#> 45 ERR1811642_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
+#> 68 ERR1811636_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
+#> 90 ERR1811628_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
+#> 115 ERR1811635_MERGED_FASTQ_CDS_annotated.faa.gz FASTA
+#> attributes.file.format.extension attributes.file.format.compression
+#> 3 fasta TRUE
+#> 24 fasta TRUE
+#> 45 fasta TRUE
+#> 68 fasta TRUE
+#> 90 fasta TRUE
+#> 115 fasta TRUE
+#> attributes.description.label
+#> 3 Predicted CDS with annotation
+#> 24 Predicted CDS with annotation
+#> 45 Predicted CDS with annotation
+#> 68 Predicted CDS with annotation
+#> 90 Predicted CDS with annotation
+#> 115 Predicted CDS with annotation
+#> attributes.description.description
+#> 3 Predicted coding sequences with InterPro match (FASTA)
+#> 24 Predicted coding sequences with InterPro match (FASTA)
+#> 45 Predicted coding sequences with InterPro match (FASTA)
+#> 68 Predicted coding sequences with InterPro match (FASTA)
+#> 90 Predicted coding sequences with InterPro match (FASTA)
+#> 115 Predicted coding sequences with InterPro match (FASTA)
+#> attributes.group.type attributes.file.checksum.checksum
+#> 3 Sequence data
+#> 24 Sequence data
+#> 45 Sequence data
+#> 68 Sequence data
+#> 90 Sequence data
+#> 115 Sequence data
+#> attributes.file.checksum.checksum.algorithm
+#> 3
+#> 24
+#> 45
+#> 68
+#> 90
+#> 115
+#> relationships.pipeline.data.type relationships.pipeline.data.id
+#> 3 pipelines 3.0
+#> 24 pipelines 3.0
+#> 45 pipelines 3.0
+#> 68 pipelines 3.0
+#> 90 pipelines 3.0
+#> 115 pipelines 3.0
+#> relationships.pipeline.related
+#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
+#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
+#> 45 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
+#> 68 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
+#> 90 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
+#> 115 https://www.ebi.ac.uk/metagenomics/api/v1/pipelines/3.0?format=json
+#> download_url
+#> 3 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097631/file/ERR1811630_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 24 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097632/file/ERR1811632_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 45 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097633/file/ERR1811642_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 68 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097634/file/ERR1811636_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 90 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097635/file/ERR1811628_MERGED_FASTQ_CDS_annotated.faa.gz
+#> 115 https://www.ebi.ac.uk/metagenomics/api/v1/analyses/MGYA00097636/file/ERR1811635_MERGED_FASTQ_CDS_annotated.faa.gz
+#> accession
+#> 3 MGYA00097631
+#> 24 MGYA00097632
+#> 45 MGYA00097633
+#> 68 MGYA00097634
+#> 90 MGYA00097635
+#> 115 MGYA00097636
To list the types of available files, and guide the filtering,
something like the following might be useful.
table(dl_urls$attributes.description.label)
#>
#> Complete GO annotation GO slim annotation
-#> 5 5
+#> 478 478
#> InterPro matches OTUs, counts and taxonomic assignments
-#> 5 15
+#> 483 1434
#> Phylogenetic tree Predicted CDS with annotation
-#> 5 5
+#> 478 477
#> Predicted CDS without annotation Predicted ORF without annotation
-#> 5 5
+#> 480 488
#> Predicted tRNAs Processed nucleotide reads
-#> 5 5
+#> 478 508
#> Processed reads with annotation Processed reads with pCDS
-#> 5 5
+#> 477 508
#> Processed reads without annotation Reads encoding 16S rRNA
-#> 5 5
+#> 488 478
#> Reads encoding 23S rRNA Reads encoding 5S rRNA
-#> 5 5
+#> 478 478
#> Taxa abundance distribution
-#> 5
+#> 475
Unlike other MGnifyR
functions,
searchFile()
is not limited to analyses
, and
by specifying accession_type
other results types may be
@@ -1409,7 +1420,7 @@