Skip to content

Commit

Permalink
Possibly compressed inputs for ConcatTextFiles
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalker174 committed Jan 10, 2024
1 parent ccdc2bf commit f08a683
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
7 changes: 6 additions & 1 deletion wdl/ConcatTextFiles.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@ workflow ConcatTextFiles {
input {
Array[File] text_files
String output_prefix
String output_suffix = ".concat.txt"
String output_suffix = "concat.txt"

Boolean gzipped = false
Boolean headered = false

String linux_docker
String sv_base_mini_docker
}

if (!headered) {
# Disable filter command since input might be compressed
call tasks.CatUncompressedFiles {
input:
shards=text_files,
outfile_name="~{output_prefix}.~{output_suffix}",
filter_command="",
sv_base_mini_docker=sv_base_mini_docker
}
}
Expand All @@ -27,6 +31,7 @@ workflow ConcatTextFiles {
call tasks.ConcatHeaderedTextFiles {
input:
text_files=text_files,
gzipped=gzipped,
output_filename="~{output_prefix}.~{output_suffix}",
linux_docker=linux_docker
}
Expand Down
8 changes: 6 additions & 2 deletions wdl/TasksMakeCohortVcf.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ task CatUncompressedFiles {
task ConcatHeaderedTextFiles {
input {
Array[File] text_files
Boolean gzipped = false
String output_filename
String linux_docker
RuntimeAttr? runtime_attr_override
Expand All @@ -137,6 +138,9 @@ task ConcatHeaderedTextFiles {
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
String cat_command = if gzipped then "zcat" else "cat"
String compress_command = if gzipped then "| gzip" else ""
output {
File out = "~{output_filename}"
}
Expand All @@ -147,10 +151,10 @@ task ConcatHeaderedTextFiles {
while read path; do
if [ $i == 0 ]; then
# Get header from first line of first file
awk 'NR==1' $path > $OUT_FILE
~{cat_command} $path | awk 'NR==1' ~{compress_command} > $OUT_FILE
fi
# Get data from each file, skipping header line
awk 'NR>1' $path >> $OUT_FILE
~{cat_command} $path | awk 'NR>1' ~{compress_command} >> $OUT_FILE
i=$((i+1))
done < ~{write_lines(text_files)}
>>>
Expand Down

0 comments on commit f08a683

Please sign in to comment.