Merge pull request #639 from czbiohub/same-order

Handle multiple file inputs in the same precedence order.
nextstrain · May 20, 2021 · 0c9aad0 · 0c9aad0
2 parents ce41c17 + 050d5eb
commit 0c9aad0
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 3 deletions.
diff --git a/scripts/combine-and-dedup-fastas.py b/scripts/combine-and-dedup-fastas.py
@@ -24,8 +24,10 @@
     with open_file(args.output, "w") as output_handle:
         # Stream sequences from all input files into a single output file,
         # skipping duplicate records (same strain and sequence) and noting
-        # mismatched sequences for the same strain name.
-        for record in read_sequences(*args.input):
+        # mismatched sequences for the same strain name.  In order to
+        # prefer the latter files, we have to reverse the order of the
+        # files.
+        for record in read_sequences(*reversed(args.input)):
             counter += 1
             if counter % 10000 == 0:
                 print(f"Processed {counter} records")

diff --git a/scripts/sanitize_sequences.py b/scripts/sanitize_sequences.py
@@ -18,6 +18,8 @@
         pattern = ""
 
     with open_file(args.output, "w") as output_handle:
-        for sequence in read_sequences(*args.sequences):
+        # In order to prefer the latter files, we have to reverse the order of
+        # the files.
+        for sequence in read_sequences(*reversed(args.sequences)):
             sequence.id = re.sub(pattern, "", sequence.id)
             write_sequences(sequence, output_handle)