Skip to content

Commit

Permalink
adds input directory option to break up long lists of data dirs
Browse files Browse the repository at this point in the history
  • Loading branch information
brwnj committed Jun 25, 2018
1 parent ad61564 commit 5019841
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 14 deletions.
13 changes: 13 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ of paths and patterns in a comma separated list, like::
--reference-database silva \
'collection1/LM_*.fastq.gz,collection2/rawdata'

Or if you have a case where you have lots of data directories, you can
specify ``--input-dir`` multiple times::

hundo annotate \
--filter-adapters qc_references/adapters.fa.gz \
--filter-contaminants qc_references/phix174.fa.gz \
--out-dir mothur_sop_silva \
--database-dir annotation_references \
--reference-database silva \
--input-dir collection2/rawdata \
--input-dir collection3/rawdata \
'collection1/LM_*.fastq.gz'

Dependencies are installed by default in the results directory defined
on the command line as ``--out-dir``. If you want to re-use dependencies
across many analyses and not have to re-install each time you update the
Expand Down
2 changes: 2 additions & 0 deletions hundo/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,7 @@ rule report:
shadow:
"shallow"
params:
fastq_dir = config.get("fastq_dir"),
filter_adapters = "None" if not config.get("filter_adapters") else config.get("filter_adapters"),
filter_contaminants = "None" if not config.get("filter_contaminants") else config.get("filter_contaminants"),
allowable_kmer_mismatches = config.get("allowable_kmer_mismatches"),
Expand Down Expand Up @@ -1325,6 +1326,7 @@ rule report:
::
fastq_dir: {params.fastq_dir}
filter_adapters: {params.filter_adapters}
filter_contaminants: {params.filter_contaminants}
allowable_kmer_mismatches: {params.allowable_kmer_mismatches}
Expand Down
2 changes: 1 addition & 1 deletion hundo/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.16"
__version__ = "1.1.17"
38 changes: 25 additions & 13 deletions hundo/hundo.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def unite_namemap(mapfile):
# species names of the taxonomy map
translated_hits = [namemap[i] for i in hits.names]
# only slightly more stringent
taxonomy = tree.lca(translated_hits, statistics.mean(hits.percent_ids))
taxonomy = tree.lca(
translated_hits, statistics.mean(hits.percent_ids)
)
print_unite(name, seq, taxonomy, outfasta, outtab)
else:
# unknown
Expand All @@ -123,11 +125,11 @@ def unite_namemap(mapfile):
otu.classification = tree.no_hits
continue

while lca_node.name in tree.assignment_min and hits.percent_ids[
-1
] < tree.assignment_min[
lca_node.name
] and lca_node is not tree.root:
while (
lca_node.name in tree.assignment_min
and hits.percent_ids[-1] < tree.assignment_min[lca_node.name]
and lca_node is not tree.root
):
lca_node = tree.get_parent(lca_node)
otu.classification = lca_node
for otu_id, otu in otus.items():
Expand Down Expand Up @@ -269,6 +271,9 @@ def run_download(database_dir, jobs, reference_database, dryrun, snakemake_args)
short_help="run annotation protocol",
)
@click.argument("fastq-dir")
@click.option(
"-i", "--input-dir", multiple=True, help="additional FASTQ input directories"
)
@click.option(
"--prefilter-file-size",
default=100000,
Expand Down Expand Up @@ -479,6 +484,7 @@ def run_download(database_dir, jobs, reference_database, dryrun, snakemake_args)
@click.argument("snakemake_args", nargs=-1, type=click.UNPROCESSED)
def run_annotate(
fastq_dir,
input_dir,
prefilter_file_size,
jobs,
out_dir,
Expand Down Expand Up @@ -513,6 +519,9 @@ def run_annotate(
Both R1 and R2 are expected to be present in the same directory and have
the same name except for the index ID (R1 and R2).
FASTQ_DIR may be a comma separated list of directories or additional
input directories may be added --input-dir multiple times.
By using SILVA, you agree to their license terms which are available at:
\b
Expand All @@ -524,14 +533,17 @@ def run_annotate(
https://hundo.rtfd.io
"""
fq_dir = list()
for input_dir in fastq_dir.replace(" ", "").split(","):
fq_dir.append(os.path.realpath(input_dir))
# combine single or comma separated list with input_dir
all_input_paths = fastq_dir.replace(" ", "").split(",") + list(input_dir)
for input_path in all_input_paths:
fq_dir.append(os.path.realpath(input_path))
# format the input paths in order to send to Snakemake command
fq_dir = ",".join(fq_dir)
database_dir = os.path.realpath(database_dir)
filter_adapters = os.path.realpath(filter_adapters) if filter_adapters else ""
filter_contaminants = os.path.realpath(
filter_contaminants
) if filter_contaminants else ""
filter_contaminants = (
os.path.realpath(filter_contaminants) if filter_contaminants else ""
)
no_temp_declared = False
for sa in snakemake_args:
if sa == "--nt" or sa == "--notemp":
Expand All @@ -540,7 +552,7 @@ def run_annotate(
"snakemake --snakefile {snakefile} --directory {out_dir} "
"--printshellcmds --jobs {jobs} --rerun-incomplete "
"--nolock {conda} {dryrun} "
"--config fastq_dir={fq_dir} author='{author}' threads={threads} "
"--config fastq_dir={fastq_dir} author='{author}' threads={threads} "
"database_dir={database_dir} filter_adapters={filter_adapters} "
"filter_contaminants={filter_contaminants} "
"allowable_kmer_mismatches={allowable_kmer_mismatches} "
Expand Down Expand Up @@ -569,7 +581,7 @@ def run_annotate(
jobs=jobs,
conda="" if no_conda else "--use-conda",
dryrun="--dryrun" if dryrun else "",
fq_dir=fq_dir,
fastq_dir=fq_dir,
author=author,
threads=threads,
database_dir=database_dir,
Expand Down

0 comments on commit 5019841

Please sign in to comment.