Skip to content

Commit

Permalink
PIPE-49-diploidify (#145)
Browse files Browse the repository at this point in the history
  • Loading branch information
paul-sud authored Mar 11, 2022
1 parent 4f8ffe5 commit 3338e6e
Show file tree
Hide file tree
Showing 9 changed files with 310 additions and 29 deletions.
221 changes: 221 additions & 0 deletions diploidify.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
version 1.0

import "./hic.wdl"

workflow diploidify {
meta {
version: "1.14.0"
caper_docker: "encodedcc/hic-pipeline:1.14.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.14.0"
}

input {
Array[File] bams
# This is from genophase, snp.out_HiC.vcf.gz
File vcf
File chrom_sizes

# Parameters
Int quality = 30
Array[Int] create_diploid_hic_resolutions = [2500000, 1000000, 500000, 250000, 100000, 50000, 25000, 10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10]

# Resource params, specify to override the defaults
Int? merge_num_cpus
Int? merge_ram_gb
Int? merge_disk_size_gb
Int? prepare_bam_num_cpus
Int? prepare_bam_ram_gb
Int? prepare_bam_disk_size_gb
Int? create_diploid_hic_num_cpus
Int? create_diploid_hic_ram_gb
Int? create_diploid_hic_disk_size_gb
Int? create_diploid_dhs_num_cpus
Int? create_diploid_dhs_ram_gb
Int? create_diploid_dhs_disk_size_gb

String docker = "encodedcc/hic-pipeline:1.14.0"
String singularity = "docker://encodedcc/hic-pipeline:1.14.0"
}

RuntimeEnvironment runtime_environment = {
"docker": docker,
"singularity": singularity
}

call filter_chrom_sizes { input:
chrom_sizes = chrom_sizes,
}

call hic.merge as merge { input:
bams = bams,
num_cpus = merge_num_cpus,
ram_gb = merge_ram_gb,
disk_size_gb = merge_disk_size_gb,
runtime_environment = runtime_environment,
}
call prepare_bam { input:
bam = merge.bam,
quality = quality,
chrom_sizes = chrom_sizes,
num_cpus = prepare_bam_num_cpus,
ram_gb = prepare_bam_ram_gb,
disk_size_gb = prepare_bam_disk_size_gb,
}

call create_diploid_hic { input:
bam = prepare_bam.filtered_bam,
bam_index = prepare_bam.bam_index,
vcf = vcf,
chrom_sizes = filter_chrom_sizes.filtered_chrom_sizes,
resolutions = create_diploid_hic_resolutions,
num_cpus = create_diploid_hic_num_cpus,
ram_gb = create_diploid_hic_ram_gb,
disk_size_gb = create_diploid_hic_disk_size_gb,
}

call create_diploid_dhs { input:
bam = prepare_bam.filtered_bam,
bam_index = prepare_bam.bam_index,
chrom_sizes = chrom_sizes,
reads_to_homologs = create_diploid_hic.reads_to_homologs,
num_cpus = create_diploid_dhs_num_cpus,
ram_gb = create_diploid_dhs_ram_gb,
disk_size_gb = create_diploid_dhs_disk_size_gb,
}
}

task filter_chrom_sizes {
input {
File chrom_sizes
String output_filename = "filtered.chrom.sizes"
}

command <<<
python3 "$(which filter_chrom_sizes.py)" ~{chrom_sizes} ~{output_filename}
>>>

output {
File filtered_chrom_sizes = output_filename
}
}

task prepare_bam {
input {
File bam
File chrom_sizes
Int quality
Int num_cpus = 8
Int ram_gb = 64
Int disk_size_gb = 2000
}

command <<<
export CHROM_SIZES_FILENAME="assembly.chrom.sizes"
mv ~{chrom_sizes} $CHROM_SIZES_FILENAME
bash /opt/juicer/CPU/diploidify.sh \
--from-stage prep \
--to-stage prep \
--chrom-sizes $CHROM_SIZES_FILENAME \
--mapq ~{quality} \
--juicer-dir /opt \
--phaser-dir /opt/3d-dna \
~{bam}
>>>

output {
File filtered_bam = "reads.sorted.bam"
File bam_index = "reads.sorted.bam.bai"
}

runtime {
cpu: num_cpus
memory: "~{ram_gb} GB"
disks: "local-disk ~{disk_size_gb} HDD"
}
}


task create_diploid_hic {
input {
File bam
File bam_index
File vcf
File chrom_sizes
Array[Int] resolutions
Int num_cpus = 24
Int ram_gb = 128
Int disk_size_gb = 2000
}

command <<<
mv ~{bam} "reads.sorted.bam"
mv ~{bam_index} "reads.sorted.bam.bai"
export CHROM_SIZES_FILENAME="assembly.chrom.sizes"
mv ~{chrom_sizes} $CHROM_SIZES_FILENAME
export VCF_FILENAME="snp.vcf"
gzip -dc ~{vcf} > $VCF_FILENAME
bash /opt/juicer/CPU/diploidify.sh \
--from-stage hic \
--to-stage hic \
--vcf $VCF_FILENAME \
--chrom-sizes $CHROM_SIZES_FILENAME \
--resolutions ~{sep="," resolutions} \
--threads-hic ~{num_cpus} \
--juicer-dir /opt \
--phaser-dir /opt/3d-dna
>>>

output {
# r = reference, a = alternate haplotype but it's arbitrary
File hic_r = "diploid_inter_r.hic"
File hic_a = "diploid_inter_a.hic"
File reads_to_homologs = "reads_to_homologs.txt"
}

runtime {
cpu: num_cpus
memory: "~{ram_gb} GB"
disks: "local-disk ~{disk_size_gb} HDD"
}
}

task create_diploid_dhs {
input {
File bam
File bam_index
File chrom_sizes
File reads_to_homologs
Int num_cpus = 2
Int ram_gb = 128
Int disk_size_gb = 1000
}

command <<<
mv ~{bam} "reads.sorted.bam"
mv ~{bam_index} "reads.sorted.bam.bai"
export CHROM_SIZES_FILENAME="assembly.chrom.sizes"
mv ~{chrom_sizes} $CHROM_SIZES_FILENAME
bash /opt/juicer/CPU/diploidify.sh \
--from-stage dhs \
--to-stage dhs \
--chrom-sizes $CHROM_SIZES_FILENAME \
--reads-to-homologs ~{reads_to_homologs} \
--juicer-dir /opt \
--phaser-dir /opt/3d-dna
>>>

output {
# r = reference, a = alternate haplotype but it's arbitrary
File bigwig_raw_r = "diploid_inter_raw_r.bw"
File bigwig_raw_a = "diploid_inter_raw_a.bw"
File bigwig_corrected_r = "diploid_inter_corrected_r.bw"
File bigwig_corrected_a = "diploid_inter_corrected_a.bw"
}

runtime {
cpu: num_cpus
memory: "~{ram_gb} GB"
disks: "local-disk ~{disk_size_gb} HDD"
}
}
12 changes: 10 additions & 2 deletions docker/hic-pipeline/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ RUN ln -s /usr/bin/python3 /usr/bin/python
# Need to be sure we have this for stats
RUN locale-gen en_US.UTF-8

# Fix warning for diploidify
# https://www.educative.io/edpresso/error-mesg-ttyname-failed-inappropriate-ioctl-for-device
# Need to escape the &s
# https://unix.stackexchange.com/questions/32907/what-characters-do-i-need-to-escape-when-using-sed-in-a-sh-script
RUN sed -i 's/mesg n || true/tty -s \&\& mesg n/' /root/.profile

WORKDIR /opt/

# Install BWA
Expand Down Expand Up @@ -105,7 +111,7 @@ RUN git clone https://github.com/ENCODE-DCC/kentUtils_bin_v381.git && \
# Install Juicer
RUN git clone --branch encode https://github.com/theaidenlab/juicer.git && \
cd juicer && \
git checkout 7b21fd620ee1f07266206caa2a7992d08d51ba8e && \
git checkout 50d557f1d4725a475071fce5975839602bd311e5 && \
chmod +x CPU/* CPU/common/* misc/* && \
find -mindepth 1 -maxdepth 1 -type d -not -name "CPU" -not -name ".git" -not -name "misc" | xargs rm -rf

Expand All @@ -115,7 +121,9 @@ RUN curl \
https://github.com/aidenlab/Juicebox/releases/download/v2.13.06/juicer_tools_2.13.06.jar \
-o /opt/juicer/CPU/common/juicer_tools.jar && \
chmod 666 /opt/juicer/CPU/common/juicer_tools.jar && \
ln -s juicer/CPU scripts
ln -s juicer/CPU scripts && \
ln -s /opt/juicer/CPU/common/juicer_tools /opt/juicer/CPU/juicer_tools && \
ln -s /opt/juicer/CPU/common/juicer_tools.jar /opt/juicer/CPU/juicer_tools.jar

RUN curl \
-LO \
Expand Down
10 changes: 5 additions & 5 deletions genophase.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import "./hic.wdl"

workflow genophase {
meta {
version: "1.13.0"
caper_docker: "encodedcc/hic-pipeline:1.13.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.13.0"
version: "1.14.0"
caper_docker: "encodedcc/hic-pipeline:1.14.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.14.0"
croo_out_def: "https://raw.githubusercontent.com/ENCODE-DCC/hic-pipeline/dev/croo_out_def.json"
}

Expand All @@ -25,8 +25,8 @@ workflow genophase {
Int? run_3d_dna_ram_gb
Boolean no_phasing = false

String docker = "encodedcc/hic-pipeline:1.13.0"
String singularity = "docker://encodedcc/hic-pipeline:1.13.0"
String docker = "encodedcc/hic-pipeline:1.14.0"
String singularity = "docker://encodedcc/hic-pipeline:1.14.0"
}

RuntimeEnvironment runtime_environment = {
Expand Down
20 changes: 11 additions & 9 deletions hic.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ struct RuntimeEnvironment {

workflow hic {
meta {
version: "1.13.0"
caper_docker: "encodedcc/hic-pipeline:1.13.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.13.0"
version: "1.14.0"
caper_docker: "encodedcc/hic-pipeline:1.14.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.14.0"
croo_out_def: "https://raw.githubusercontent.com/ENCODE-DCC/hic-pipeline/dev/croo_out_def.json"
description: "ENCODE Hi-C pipeline, see https://github.com/ENCODE-DCC/hic-pipeline for details."
}
Expand Down Expand Up @@ -77,10 +77,10 @@ workflow hic {
Int? create_accessibility_track_disk_size_gb
String assembly_name = "undefined"

String docker = "encodedcc/hic-pipeline:1.13.0"
String singularity = "docker://encodedcc/hic-pipeline:1.13.0"
String delta_docker = "encodedcc/hic-pipeline:1.13.0_delta"
String hiccups_docker = "encodedcc/hic-pipeline:1.13.0_hiccups"
String docker = "encodedcc/hic-pipeline:1.14.0"
String singularity = "docker://encodedcc/hic-pipeline:1.14.0"
String delta_docker = "encodedcc/hic-pipeline:1.14.0_delta"
String hiccups_docker = "encodedcc/hic-pipeline:1.14.0_hiccups"
}

RuntimeEnvironment runtime_environment = {
Expand Down Expand Up @@ -654,6 +654,8 @@ task merge {
input {
Array[File] bams
Int num_cpus = 8
Int ram_gb = 16
Int disk_size_gb = 6000
String output_bam_filename = "merged"
RuntimeEnvironment runtime_environment
}
Expand All @@ -675,8 +677,8 @@ task merge {

runtime {
cpu : "~{num_cpus}"
memory: "16 GB"
disks: "local-disk 6000 HDD"
memory: "~{ram_gb} GB"
disks: "local-disk ~{disk_size_gb} HDD"
docker: runtime_environment.docker
singularity: runtime_environment.singularity
}
Expand Down
2 changes: 1 addition & 1 deletion hic_pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__title__ = "hic-pipeline"
__version__ = "1.13.0"
__version__ = "1.14.0"
__description__ = "ENCODE Hi-C uniform processing pipeline."
__url__ = "https://github.com/ENCODE-DCC/hic-pipeline"
__uri__ = __url__
Expand Down
29 changes: 29 additions & 0 deletions hic_pipeline/filter_chrom_sizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import argparse


def main():
parser = get_parser()
args = parser.parse_args()
with open(args.chrom_sizes) as chrom_sizes, open(
args.output_file, "w"
) as output_file:
filter_chrom_sizes(chrom_sizes=chrom_sizes, output_file=output_file)


def filter_chrom_sizes(chrom_sizes, output_file):
for line in chrom_sizes:
chrom_name = line.split()[0]
if "_" in chrom_name or chrom_name == "chrEBV":
continue
output_file.write(line)


def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument("chrom_sizes")
parser.add_argument("output_file")
return parser


if __name__ == "__main__":
main()
10 changes: 5 additions & 5 deletions make_restriction_site_locations.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ struct RuntimeEnvironment {

workflow make_restriction_site_locations {
meta {
version: "1.13.0"
caper_docker: "encodedcc/hic-pipeline:1.13.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.13.0"
version: "1.14.0"
caper_docker: "encodedcc/hic-pipeline:1.14.0"
caper_singularity: "docker://encodedcc/hic-pipeline:1.14.0"
}

parameter_meta {
Expand All @@ -22,8 +22,8 @@ workflow make_restriction_site_locations {
File reference_fasta
String assembly_name
String restriction_enzyme
String docker = "encodedcc/hic-pipeline:1.13.0"
String singularity = "docker://encodedcc/hic-pipeline:1.13.0"
String docker = "encodedcc/hic-pipeline:1.14.0"
String singularity = "docker://encodedcc/hic-pipeline:1.14.0"
}


Expand Down
Loading

0 comments on commit 3338e6e

Please sign in to comment.