diff --git a/README.md b/README.md index 21f549e..eb6e87d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # gatk4-genome-processing-pipeline Workflows used for germline processing in whole genome sequence data. +*- This repo will soon be archived, its new repository location will be [broadinstitute/warp](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs)* + ### WholeGenomeGermlineSingleSample : This WDL pipeline implements data pre-processing and initial variant calling (GVCF generation) according to the GATK Best Practices (June 2016) for germline SNP and @@ -24,12 +26,12 @@ Indel discovery in human whole-genome sequencing data. ### Software version requirements : - GATK 4.0.10.1 + - The Haplotypecaller call provides the option to use GATK 3, which uses GATK 4.beta.5 for PrintReads and GATK 3.5 for Haplotypecaller. - Picard 2.20.0-SNAPSHOT - Samtools 1.3.1 - Python 2.7 - Cromwell version support - - Successfully tested on v51 - - Does not work on versions < v23 due to output syntax + - Successfully tested on v53 ### Important Notes : - The provided JSON is a generic ready to use example template for the workflow. It is the user’s responsibility to correctly set the reference and resource variables for their own particular test case using the [GATK Tool and Tutorial Documentations](https://gatk.broadinstitute.org/hc/en-us/categories/360002310591). @@ -37,7 +39,7 @@ Indel discovery in human whole-genome sequencing data. - For help running workflows on the Google Cloud Platform or locally please view the following tutorial [(How to) Execute Workflows from the gatk-workflows Git Organization](https://gatk.broadinstitute.org/hc/en-us/articles/360035530952). - Please visit the [User Guide](https://gatk.broadinstitute.org/hc/en-us/categories/360002310591) site for further documentation on our workflows and tools. -- Relevant reference and resources bundles can be accessed in [Resource Bundle](https://gatk.broadinstitute.org/hc/en-us/articles/360036212652). +- Relevant reference and resources bundles can be accessed in [Resource Bundle](https://gatk.broadinstitute.org/hc/en-us/articles/360035890811). ### Contact Us : - The following material is provided by the Data Science Platforum group at the Broad Institute. Please direct any questions or concerns to one of our forum sites : [GATK](https://gatk.broadinstitute.org/hc/en-us/community/topics) or [Terra](https://support.terra.bio/hc/en-us/community/topics/360000500432). diff --git a/WholeGenomeGermlineSingleSample.wdl b/WholeGenomeGermlineSingleSample.wdl index 14bab47..c840a72 100644 --- a/WholeGenomeGermlineSingleSample.wdl +++ b/WholeGenomeGermlineSingleSample.wdl @@ -38,7 +38,7 @@ import "./structs/DNASeqStructs.wdl" # WORKFLOW DEFINITION workflow WholeGenomeGermlineSingleSample { - String pipeline_version = "2.0" + String pipeline_version = "2.1.0" input { SampleAndUnmappedBams sample_and_unmapped_bams @@ -218,4 +218,7 @@ workflow WholeGenomeGermlineSingleSample { File output_vcf = BamToGvcf.output_vcf File output_vcf_index = BamToGvcf.output_vcf_index } + meta { + allowNestedInputs: true + } } diff --git a/tasks/AggregatedBamQC.wdl b/tasks/AggregatedBamQC.wdl index f6f30e8..3f7ca53 100644 --- a/tasks/AggregatedBamQC.wdl +++ b/tasks/AggregatedBamQC.wdl @@ -106,4 +106,8 @@ input { File? fingerprint_summary_metrics = CheckFingerprint.summary_metrics File? fingerprint_detail_metrics = CheckFingerprint.detail_metrics } + + meta { + allowNestedInputs: true + } } diff --git a/tasks/Alignment.wdl b/tasks/Alignment.wdl index 41a9238..5c9469a 100644 --- a/tasks/Alignment.wdl +++ b/tasks/Alignment.wdl @@ -17,30 +17,11 @@ version 1.0 import "../structs/DNASeqStructs.wdl" -# Get version of BWA -task GetBwaVersion { - command { - # not setting set -o pipefail here because /bwa has a rc=1 and we dont want to allow rc=1 to succeed because - # the sed may also fail with that error and that is something we actually want to fail on. - /usr/gitc/bwa 2>&1 | \ - grep -e '^Version' | \ - sed 's/Version: //' - } - runtime { - docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.4.3-1564508330" - memory: "1 GiB" - } - output { - String bwa_version = read_string(stdout()) - } -} - # Read unmapped BAM, convert on-the-fly to FASTQ and stream to BWA MEM for alignment, then stream to MergeBamAlignment task SamToFastqAndBwaMemAndMba { input { File input_bam String bwa_commandline - String bwa_version String output_bam_basename # reference_fasta.ref_alt is the .alt file from bwa-kit @@ -62,9 +43,21 @@ task SamToFastqAndBwaMemAndMba { Int disk_size = ceil(unmapped_bam_size + bwa_ref_size + (disk_multiplier * unmapped_bam_size) + 20) command <<< + + + # This is done before "set -o pipefail" because "bwa" will have a rc=1 and we don't want to allow rc=1 to succeed + # because the sed may also fail with that error and that is something we actually want to fail on. + BWA_VERSION=$(/usr/gitc/bwa 2>&1 | \ + grep -e '^Version' | \ + sed 's/Version: //') + set -o pipefail set -e + if [-z ${BWA_VERSION}]; then + exit 1; + fi + # set the bash variable needed for the command-line bash_ref_fasta=~{reference_fasta.ref_fasta} # if reference_fasta.ref_alt has data in it, @@ -99,7 +92,7 @@ task SamToFastqAndBwaMemAndMba { MAX_INSERTIONS_OR_DELETIONS=-1 \ PRIMARY_ALIGNMENT_STRATEGY=MostDistant \ PROGRAM_RECORD_ID="bwamem" \ - PROGRAM_GROUP_VERSION="~{bwa_version}" \ + PROGRAM_GROUP_VERSION="${BWA_VERSION}" \ PROGRAM_GROUP_COMMAND_LINE="~{bwa_commandline}" \ PROGRAM_GROUP_NAME="bwamem" \ UNMAPPED_READ_STRATEGY=COPY_TO_TAG \ diff --git a/tasks/BamToCram.wdl b/tasks/BamToCram.wdl index cf9b1e5..df6cde3 100644 --- a/tasks/BamToCram.wdl +++ b/tasks/BamToCram.wdl @@ -63,5 +63,8 @@ workflow BamToCram { File output_cram_md5 = ConvertToCram.output_cram_md5 File validate_cram_file_report = ValidateCram.report } + meta { + allowNestedInputs: true + } } diff --git a/tasks/SplitLargeReadGroup.wdl b/tasks/SplitLargeReadGroup.wdl index 7ea5976..65abf7b 100644 --- a/tasks/SplitLargeReadGroup.wdl +++ b/tasks/SplitLargeReadGroup.wdl @@ -26,7 +26,6 @@ workflow SplitLargeReadGroup { File input_bam String bwa_commandline - String bwa_version String output_bam_basename # reference_fasta.ref_alt is the .alt file from bwa-kit @@ -58,7 +57,6 @@ workflow SplitLargeReadGroup { bwa_commandline = bwa_commandline, output_bam_basename = current_name, reference_fasta = reference_fasta, - bwa_version = bwa_version, compression_level = compression_level, preemptible_tries = preemptible_tries, hard_clip_reads = hard_clip_reads @@ -84,4 +82,7 @@ workflow SplitLargeReadGroup { output { File aligned_bam = GatherMonolithicBamFile.output_bam } + meta { + allowNestedInputs: true + } } diff --git a/tasks/UnmappedBamToAlignedBam.wdl b/tasks/UnmappedBamToAlignedBam.wdl index a854902..dbec144 100644 --- a/tasks/UnmappedBamToAlignedBam.wdl +++ b/tasks/UnmappedBamToAlignedBam.wdl @@ -50,10 +50,6 @@ workflow UnmappedBamToAlignedBam { Int compression_level = 2 - # Get the version of BWA to include in the PG record in the header of the BAM produced - # by MergeBamAlignment. - call Alignment.GetBwaVersion - # Get the size of the standard reference files as well as the additional reference files needed for BWA # Align flowcell-level unmapped input bams in parallel @@ -78,7 +74,6 @@ workflow UnmappedBamToAlignedBam { input: input_bam = unmapped_bam, bwa_commandline = bwa_commandline, - bwa_version = GetBwaVersion.bwa_version, output_bam_basename = unmapped_bam_basename + ".aligned.unsorted", reference_fasta = references.reference_fasta, compression_level = compression_level, @@ -95,7 +90,6 @@ workflow UnmappedBamToAlignedBam { bwa_commandline = bwa_commandline, output_bam_basename = unmapped_bam_basename + ".aligned.unsorted", reference_fasta = references.reference_fasta, - bwa_version = GetBwaVersion.bwa_version, compression_level = compression_level, preemptible_tries = papi_settings.preemptible_tries, hard_clip_reads = hard_clip_reads @@ -277,4 +271,7 @@ workflow UnmappedBamToAlignedBam { File output_bam = GatherBamFiles.output_bam File output_bam_index = GatherBamFiles.output_bam_index } + meta { + allowNestedInputs: true + } } diff --git a/tasks/VariantCalling.wdl b/tasks/VariantCalling.wdl index 5155367..2431232 100644 --- a/tasks/VariantCalling.wdl +++ b/tasks/VariantCalling.wdl @@ -153,6 +153,9 @@ workflow VariantCalling { File? bamout = MergeBamouts.output_bam File? bamout_index = MergeBamouts.output_bam_index } + meta { + allowNestedInputs: true + } } # This task is here because merging bamout files using Picard produces an error.