Skip to content

Commit

Permalink
🚧 add error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Jan 25, 2025
1 parent 6805150 commit a367d19
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 36 deletions.
86 changes: 50 additions & 36 deletions augur/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,45 +390,59 @@ def merge_sequences(args):
if args.quiet:
print_info = lambda *_: None

# FIXME: restore this error handling:
# except subprocess.CalledProcessError as err:
# raise SeqKitError(f"seqkit invocation failed") from err

with seqkit('rmdup',
stdin =subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as process:
# Reversed because seqkit rmdup keeps the first entry but this command
# should keep the last entry.
# NOTE(or FIXME?): For duplicates within a file, seqkit rmdup will
# keep the first entry which is somewhat at odds with the "last one
# wins" approach used for the rest of augur merge. We could error in
# this case (which is what augur filter does), however that requires
# reading all the IDs which is unnecessary complexity for a wrapper
# around seqkit rmdup.
for s in reversed(args.sequences):
print_info(f"Reading sequences from {s!r}…")
subprocess.run([*augur, "read-file", s], stdout=process.stdin)

# Add an newline character to support FASTA files that are missing one at the end.
# Extraneous newline characters are stripped by seqkit.
print(file=process.stdin)

# Let seqkit know that there are no more sequences.
process.stdin.close()

print_info(f"Merging sequences and writing to {args.output_sequences!r}…")

for line in iter(process.stderr.readline, ''):
print_info(f"[SeqKit] {line.rstrip()}")

# Write merged sequences.
subprocess.run([*augur, "write-file", args.output_sequences], stdin=process.stdout)
try:
with seqkit('rmdup',
stdin =subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as process:
# Reversed because seqkit rmdup keeps the first entry but this command
# should keep the last entry.
# NOTE(or FIXME?): For duplicates within a file, seqkit rmdup will
# keep the first entry which is somewhat at odds with the "last one
# wins" approach used for the rest of augur merge. We could error in
# this case (which is what augur filter does), however that requires
# reading all the IDs which is unnecessary complexity for a wrapper
# around seqkit rmdup.
for s in reversed(args.sequences):
print_info(f"Reading sequences from {s!r}…")
read_process = subprocess.run([*augur, "read-file", s], stdout=process.stdin)
try:
read_process.check_returncode()
except subprocess.CalledProcessError as e:
# FIXME: capture augur read-file errors but not seqkit errors
# print_info(f"Cannot read file {s}: {e}")
pass

# Add an newline character to support FASTA files that are missing one at the end.
# Extraneous newline characters are stripped by seqkit.
print(file=process.stdin)

# Let seqkit know that there are no more sequences.
try:
process.stdin.close()
except BrokenPipeError:
# Let the return code handle this
pass

print_info(f"Merging sequences and writing to {args.output_sequences!r}…")

# FIXME: add error handling?
for line in iter(process.stderr.readline, ''):
print_info(f"[SeqKit] {line.rstrip()}")

# Write merged sequences.
# FIXME: add error handling
subprocess.run([*augur, "write-file", args.output_sequences], stdin=process.stdout)

finally:
# Wait for the seqkit process to finish.
process.wait()
returncode = process.wait()
# FIXME: address potential deadlock issue?
# <https://docs.python.org/3/library/subprocess.html#subprocess.Popen.wait>
# Can't use Popen.communicate() because the data read is buffered in memory.
# <https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate>
if returncode != 0:
raise SeqKitError(f"seqkit invocation failed, see error message above.")


def sqlite3(*args, **kwargs):
Expand Down Expand Up @@ -517,7 +531,7 @@ def seqkit(*args, **kwargs):
return subprocess.Popen(argv, encoding="utf-8", text=True, **kwargs)


class SeqKitError(Exception):
class SeqKitError(AugurError):
pass


Expand Down
16 changes: 16 additions & 0 deletions tests/functional/merge/cram/merge-sequences.t
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,19 @@ This behavior is a reflection of the seqkit rmdup internals.
ATCG
>seq2
GCTA

Error handling

$ cat >y.fasta <<~~
> invalid fasta file
> ~~

$ ${AUGUR} merge \
> --sequences x.fasta y.fasta \
> --output-sequences - > merged.fasta
Reading sequences from 'y.fasta'
Reading sequences from 'x.fasta'
Merging sequences and writing to '-'
[SeqKit] \x1b[31m[ERRO]\x1b[0m fastx: invalid FASTA/Q format (esc)
ERROR: seqkit invocation failed, see error message above.
[2]

0 comments on commit a367d19

Please sign in to comment.