From f08a683a6cd53d807838070ff5785b078128c6fc Mon Sep 17 00:00:00 2001 From: Mark Walker Date: Wed, 10 Jan 2024 11:07:12 -0500 Subject: [PATCH] Possibly compressed inputs for ConcatTextFiles --- wdl/ConcatTextFiles.wdl | 7 ++++++- wdl/TasksMakeCohortVcf.wdl | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/wdl/ConcatTextFiles.wdl b/wdl/ConcatTextFiles.wdl index 13ca2adfb..1a8755e4c 100644 --- a/wdl/ConcatTextFiles.wdl +++ b/wdl/ConcatTextFiles.wdl @@ -7,7 +7,9 @@ workflow ConcatTextFiles { input { Array[File] text_files String output_prefix - String output_suffix = ".concat.txt" + String output_suffix = "concat.txt" + + Boolean gzipped = false Boolean headered = false String linux_docker @@ -15,10 +17,12 @@ workflow ConcatTextFiles { } if (!headered) { + # Disable filter command since input might be compressed call tasks.CatUncompressedFiles { input: shards=text_files, outfile_name="~{output_prefix}.~{output_suffix}", + filter_command="", sv_base_mini_docker=sv_base_mini_docker } } @@ -27,6 +31,7 @@ workflow ConcatTextFiles { call tasks.ConcatHeaderedTextFiles { input: text_files=text_files, + gzipped=gzipped, output_filename="~{output_prefix}.~{output_suffix}", linux_docker=linux_docker } diff --git a/wdl/TasksMakeCohortVcf.wdl b/wdl/TasksMakeCohortVcf.wdl index 0c9d9e8e7..d489831e8 100644 --- a/wdl/TasksMakeCohortVcf.wdl +++ b/wdl/TasksMakeCohortVcf.wdl @@ -122,6 +122,7 @@ task CatUncompressedFiles { task ConcatHeaderedTextFiles { input { Array[File] text_files + Boolean gzipped = false String output_filename String linux_docker RuntimeAttr? runtime_attr_override @@ -137,6 +138,9 @@ task ConcatHeaderedTextFiles { } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + String cat_command = if gzipped then "zcat" else "cat" + String compress_command = if gzipped then "| gzip" else "" + output { File out = "~{output_filename}" } @@ -147,10 +151,10 @@ task ConcatHeaderedTextFiles { while read path; do if [ $i == 0 ]; then # Get header from first line of first file - awk 'NR==1' $path > $OUT_FILE + ~{cat_command} $path | awk 'NR==1' ~{compress_command} > $OUT_FILE fi # Get data from each file, skipping header line - awk 'NR>1' $path >> $OUT_FILE + ~{cat_command} $path | awk 'NR>1' ~{compress_command} >> $OUT_FILE i=$((i+1)) done < ~{write_lines(text_files)} >>>