diff --git a/config/cluster.json b/config/cluster.json index 5e5e37a..9df9cbd 100644 --- a/config/cluster.json +++ b/config/cluster.json @@ -24,5 +24,10 @@ "threads": "16", "mem": "64g", "time": "0-12:00:00" + }, + "FitHiChIP": { + "threads": "10", + "mem": "64g", + "time": "3-00:00:00" } } diff --git a/config/configfile_BiasCorrection_CoverageBias b/config/configfile_BiasCorrection_CoverageBias new file mode 100644 index 0000000..2ec998e --- /dev/null +++ b/config/configfile_BiasCorrection_CoverageBias @@ -0,0 +1,115 @@ +#==================================== +# Sample configuration file for running FitHiChIP +#==================================== + +##******** +##***** parameters to provide the input HiChIP alignment files +##******** + +##============ +## option 1: provide the valid pairs from HiC-Pro pipeline - can be gzipped as well +##============ +ValidPairs=./TestData/Sample_ValidPairs.txt.gz + +##============ +## option 2: provide the bin interval and contact matrix files from HiC-Pro pipeline +##============ + +## bin interval file (of the format *_abs.bed from HiC-pro output) +Interval= + +## matrix file (of the format *.matrix from HiC-pro output) +Matrix= + +##============ +## option 3: If HiChIP was processed by aligners other than HiC-Pro +## a) provide the locus pairs as a .bed formatted file with the following format (7 fields): +## chr1 start1 end1 chr2 start2 end2 contactcounts +##============ +Bed= + +##============ +## option 4: If HiChIP data is provided in .hic format +## Make sure that the .hic file contains the target resolution which is provided in the BINSIZE parameter (below) +##============ +HIC= + +##============ +## option 5: If HiChIP data is provided in .cool / .mcool format +## Make sure that the .cool or .mcool file contains the target resolution which is provided in the BINSIZE parameter (below) +##============ +COOL= + + +##******** +## File containing chromomosome size information corresponding to the reference genome. +##******** +ChrSizeFile=./TestData/chrom_hg19.sizes + +##******** +## Mandatory parameter - Reference ChIP-seq / HiChIP peaks (in .bed format) - can be gzipped as well +## We recommend using reference ChIP-seq peaks (if available) +## Otherwise, peaks can be computed from HiChIP data. +## See the documentation: https://ay-lab.github.io/FitHiChIP/usage/Utilities.html#inferring-peaks-from-hichip-data-for-use-in-the-hichip-pipeline +##******** +PeakFile=./TestData/Sample.Peaks.gz + + +##******** +## Mandatory parameter - Output directory to contain all the results +##******** +OutDir=./TestData/results/ + + +##******** +## Mandatory parameter - Boolean variable indicating if the reference genome is circular +## 0, by default. If 1 (circular genome), calculation of genomic distance is slightly different +##******** +CircularGenome=0 + + +##******** +##***** Various FitHiChIP loop calling related parameters +##******** + +##Interaction type +## 1: peak to peak +## 2: peak to non peak +## 3: peak to all (default - both peak-to-peak and peak-to-nonpeak) +## 4: all to all (similar to Hi-C) +## 5: All of the modes 1 to 4 are computed. +IntType=1 + +## Bin size, in bases, for the interactions. Default = 5000 (5 Kb). +BINSIZE=2500 + +## Lower distance threshold of loops - default = 20000 (20 Kb) +LowDistThr=20000 + +## Upper distance threshold of loops - default = 2000000 (2 Mb) +UppDistThr=2000000 + +## Values 0/1 - Applicable if IntType = 3 (peak to all output interactions) +## 1 indicates FitHiChIP(S) model - uses only peak to peak loops for background modeling +## 0 corresponds to FitHiChIP(L) - uses both peak to peak and peak to nonpeak loops for background modeling +UseP2PBackgrnd=1 + +## type of bias - values: 1 / 2 +## 1: coverage bias regression +## 2: ICE bias regression +BiasType=1 + +## if 1 (default), merge filtering (corresponding to either FitHiChIP(L+M) or FitHiChIP(S+M) +## depending on the parameter UseP2PBackgrnd) is enabled +MergeInt=1 + +## FDR (q-value) threshold for loop significance +QVALUE=0.01 + +## prefix string of all the output files (Default = 'FitHiChIP'). +PREFIX=FitHiChIP + +## Binary variable 1/0: +## if 1, overwrites any existing output file. +## otherwise (0), does not overwrite any output file. +OverWrite=0 \ No newline at end of file diff --git a/config/modules.json b/config/modules.json index 70368f0..7f3c6c7 100644 --- a/config/modules.json +++ b/config/modules.json @@ -5,6 +5,7 @@ "pairtools": "pairtools/1.0.2", "samtools": "samtools/1.19", "bedtools": "bedtools/2.30.0", - "juicer": "juicer/1.6" + "juicer": "juicer/1.6", + "fithichip": "fithichip/11.0" } } diff --git a/workflow/Snakefile b/workflow/Snakefile index a50de62..9e6f4d8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -44,6 +44,7 @@ rule all: expand(join(workpath,"QC","{name}.dedupStatsSimplify.txt"), name=samples), expand(join(workpath,"QC","{name}.hichip_qc_metrics.txt"), name=samples), expand(join(workpath,"Aligned","{name}.hic"), name=samples), + expand(join(workpath,"FitHiChIP_out","{name}.interactions_FitHiC_Q0.01.bed"),name=samples), rule fastqc: """ @@ -203,6 +204,65 @@ rule contactmap: juicer_tools -Xmx{params.memory}m pre --threads {threads} {input} {output} {params.reflen} """ +rule makehicpro: + input: + join(workpath,"Aligned","{name}.mapped.pairs"), + output: + join(workpath,"Aligned","{name}.hicpro_mapped.pairs.gz"), + params: + rname='makehicpro', + shell: """ + grep -v '#' {input} | awk -F"\t" '{{print $1"\t"$2"\t"$3"\t"$6"\t"$4"\t"$5"\t"$7}}' | gzip -c > {output} + """ + +rule makeFitConfig: + input: + join(workpath,"Aligned","{name}.hicpro_mapped.pairs.gz"), + output: + join(workpath,"FitHiChIP_out","{name}.config"), + params: + rname='makeFitConfig', + inConfig=join(workpath,"config","configfile_BiasCorrection_CoverageBias"), + reflen=config['references'][genome]['reflen'], + outdir=join(workpath,"FitHiChIP_out"), + bed=chip_peaks, + sample="{name}", + CTCF="true", + shell: """ + cp {params.inConfig} {output} + sed -i 's@ValidPairs=.*@ValidPairs={input}@g' {output} + sed -i 's@PeakFile=.*@PeakFile={params.bed}@g' {output} + sed -i 's@OutDir=.*@OutDir={params.outdir}@g' {output} + sed -i 's@ChrSizeFile=.*@ChrSizeFile={params.reflen}@g' {output} + sed -i 's@PREFIX=.*@PREFIX={params.sample}@g' {output} + + if ! {params.CTCF}; then + sed -i 's@IntType=.*@IntType=3@g' {output} + fi + """ + +rule FitHiChIP: + input: + pairs=join(workpath,"Aligned","{name}.hicpro_mapped.pairs.gz"), + config=join(workpath,"FitHiChIP_out","{name}.config"), + output: + join(workpath,"FitHiChIP_out","{name}.interactions_FitHiC_Q0.01.bed"), + params: + rname='FitHiChIP', + runDir=join(workpath,"FitHiChIP"), + envmodules: + config['tools']['fithichip'], + shell: """ + if [ ! -d "{params.runDir}" ]; then + mkdir {params.runDir} + cp -r $FITHICHIP_SRC/* {params.runDir} + fi + + cd {params.runDir} + ./FitHiChIP_HiCPro.sh -C {input.config} + """ + + # Import rules