From d908cc993a4cc30035c5950a7366ca8a66675334 Mon Sep 17 00:00:00 2001 From: Paul Sud <41386393+paul-sud@users.noreply.github.com> Date: Thu, 2 Dec 2021 16:52:13 -0800 Subject: [PATCH] PIP-1580-slice-subcompartments (#126) --- docker/hic-pipeline/Dockerfile | 6 ++ hic.wdl | 64 +++++++++++++++++-- hic_pipeline/__init__.py | 2 +- make_restriction_site_locations.wdl | 6 +- tests/functional/json/test_hic.json | 1 + .../functional/json/test_hic_nonspecific.json | 1 + .../json/test_hic_with_read_group.json | 1 + .../json/test_multiple_libraries.json | 1 + tests/functional/json/test_no_pairs.json | 1 + tests/functional/json/test_ultima.json | 1 + 10 files changed, 76 insertions(+), 8 deletions(-) diff --git a/docker/hic-pipeline/Dockerfile b/docker/hic-pipeline/Dockerfile index 81d4869f..05982e72 100644 --- a/docker/hic-pipeline/Dockerfile +++ b/docker/hic-pipeline/Dockerfile @@ -113,6 +113,12 @@ RUN curl \ chmod 666 /opt/juicer/CPU/common/juicer_tools.jar && \ ln -s juicer/CPU scripts +RUN curl \ + -LO \ + https://github.com/sa501428/mixer-tools/releases/download/v4.07.08/MixerTools.4.7.8.jar && \ + chmod 666 /opt/MixerTools.4.7.8.jar && \ + ln -s /opt/MixerTools.4.7.8.jar /opt/MixerTools.jar + # For sorting, LC_ALL is C ENV LC_ALL C ENV PATH=/opt:/opt/scripts:/opt/scripts/common:/opt/juicer/misc:$PATH diff --git a/hic.wdl b/hic.wdl index f97a20c3..e9eef184 100644 --- a/hic.wdl +++ b/hic.wdl @@ -14,9 +14,9 @@ struct BamAndLigationCount { workflow hic { meta { - version: "1.3.1" - caper_docker: "encodedcc/hic-pipeline:1.3.1" - caper_singularity: "docker://encodedcc/hic-pipeline:1.3.1" + version: "1.4.0" + caper_docker: "encodedcc/hic-pipeline:1.4.0" + caper_singularity: "docker://encodedcc/hic-pipeline:1.4.0" croo_out_def: "https://raw.githubusercontent.com/ENCODE-DCC/hic-pipeline/dev/croo_out_def.json" } @@ -37,6 +37,7 @@ workflow hic { Boolean no_call_loops = false Boolean no_call_tads = false Boolean no_eigenvectors = false + Boolean no_slice = false Int align_num_cpus = 32 Int? create_hic_num_cpus Int? add_norm_num_cpus @@ -258,6 +259,25 @@ workflow hic { } } } + + if (!no_slice) { + File hic_file = select_first([add_norm.output_hic[1], input_hic]) + + call slice as slice_25kb { input: + hic_file = hic_file, + resolution = 25000, + } + + call slice as slice_50kb { input: + hic_file = hic_file, + resolution = 50000, + } + + call slice as slice_100kb { input: + hic_file = hic_file, + resolution = 100000, + } + } } task get_ligation_site_regex { @@ -778,7 +798,7 @@ task hiccups { cpu : "1" bootDiskSizeGb: "20" disks: "local-disk 100 HDD" - docker: "encodedcc/hic-pipeline:1.3.1_hiccups" + docker: "encodedcc/hic-pipeline:1.4.0_hiccups" gpuType: "nvidia-tesla-p100" gpuCount: 1 memory: "8 GB" @@ -828,6 +848,42 @@ task create_eigenvector { } } +task slice { + input { + File hic_file + Int resolution = 25000 + Int minimum_num_clusters = 2 + Int maximum_num_clusters = 13 + Int num_kmeans_runs = 4 + } + + command { + set -euo pipefail + java \ + -Xmx20G \ + -jar /opt/MixerTools.jar \ + slice \ + --encode-mode \ + -r ~{resolution} \ + ~{hic_file} \ + ~{minimum_num_clusters},~{maximum_num_clusters},~{num_kmeans_runs} \ + slice_results \ + cell_type + gzip -n slice_results/*.bed + mv slice_results/slice_subcompartment_clusters.bed.gz slice_subcompartment_clusters_~{resolution}.bed.gz + } + + output { + File subcompartments = "slice_subcompartment_clusters_~{resolution}.bed.gz" + } + + runtime { + cpu : "1" + disks: "local-disk 100 SSD" + memory : "24 GB" + } +} + task exit_early { input { String message diff --git a/hic_pipeline/__init__.py b/hic_pipeline/__init__.py index b702ea99..7bb35eb7 100644 --- a/hic_pipeline/__init__.py +++ b/hic_pipeline/__init__.py @@ -1,5 +1,5 @@ __title__ = "hic-pipeline" -__version__ = "1.3.1" +__version__ = "1.4.0" __description__ = "ENCODE Hi-C uniform processing pipeline." __url__ = "https://github.com/ENCODE-DCC/hic-pipeline" __uri__ = __url__ diff --git a/make_restriction_site_locations.wdl b/make_restriction_site_locations.wdl index 85acac93..65d0950b 100644 --- a/make_restriction_site_locations.wdl +++ b/make_restriction_site_locations.wdl @@ -2,9 +2,9 @@ version 1.0 workflow make_restriction_site_locations { meta { - version: "1.3.1" - caper_docker: "encodedcc/hic-pipeline:1.3.1" - caper_singularity: "docker://encodedcc/hic-pipeline:1.3.1" + version: "1.4.0" + caper_docker: "encodedcc/hic-pipeline:1.4.0" + caper_singularity: "docker://encodedcc/hic-pipeline:1.4.0" } parameter_meta { diff --git a/tests/functional/json/test_hic.json b/tests/functional/json/test_hic.json index 067814c9..ae9ddeda 100644 --- a/tests/functional/json/test_hic.json +++ b/tests/functional/json/test_hic.json @@ -11,6 +11,7 @@ ], "hic.no_call_loops": true, "hic.no_call_tads": true, + "hic.no_slice": true, "hic.reference_index": "tests/data/ce10_selected.tar.gz", "hic.restriction_enzymes": [ "MboI" diff --git a/tests/functional/json/test_hic_nonspecific.json b/tests/functional/json/test_hic_nonspecific.json index c566a83e..9475b914 100644 --- a/tests/functional/json/test_hic_nonspecific.json +++ b/tests/functional/json/test_hic_nonspecific.json @@ -11,6 +11,7 @@ ], "hic.no_call_loops": true, "hic.no_call_tads": true, + "hic.no_slice": true, "hic.reference_index": "tests/data/ce10_selected.tar.gz", "hic.restriction_enzymes": [ "none" diff --git a/tests/functional/json/test_hic_with_read_group.json b/tests/functional/json/test_hic_with_read_group.json index fadf568b..c2d65557 100644 --- a/tests/functional/json/test_hic_with_read_group.json +++ b/tests/functional/json/test_hic_with_read_group.json @@ -12,6 +12,7 @@ ], "hic.no_call_loops": true, "hic.no_call_tads": true, + "hic.no_slice": true, "hic.reference_index": "tests/data/ce10_selected.tar.gz", "hic.restriction_enzymes": [ "MboI" diff --git a/tests/functional/json/test_multiple_libraries.json b/tests/functional/json/test_multiple_libraries.json index 2a0eb474..4a50f935 100644 --- a/tests/functional/json/test_multiple_libraries.json +++ b/tests/functional/json/test_multiple_libraries.json @@ -21,6 +21,7 @@ ], "hic.no_call_loops": true, "hic.no_call_tads": true, + "hic.no_slice": true, "hic.reference_index": "tests/data/ce10_selected.tar.gz", "hic.restriction_enzymes": [ "MboI" diff --git a/tests/functional/json/test_no_pairs.json b/tests/functional/json/test_no_pairs.json index 7a13595e..8f0c79f4 100644 --- a/tests/functional/json/test_no_pairs.json +++ b/tests/functional/json/test_no_pairs.json @@ -12,6 +12,7 @@ "hic.no_call_loops": true, "hic.no_call_tads": true, "hic.no_pairs": true, + "hic.no_slice": true, "hic.reference_index": "tests/data/ce10_selected.tar.gz", "hic.restriction_enzymes": [ "MboI" diff --git a/tests/functional/json/test_ultima.json b/tests/functional/json/test_ultima.json index f01061d0..24676e5f 100644 --- a/tests/functional/json/test_ultima.json +++ b/tests/functional/json/test_ultima.json @@ -11,6 +11,7 @@ "hic.no_call_loops": true, "hic.no_call_tads": true, "hic.no_eigenvectors": true, + "hic.no_slice": true, "hic.reference_index": "tests/data/hg38_chr19_chrM_GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.tar.gz", "hic.restriction_enzymes": [ "none"