Skip to content

Commit

Permalink
Adding FitHiChIP
Browse files Browse the repository at this point in the history
  • Loading branch information
tovahmarkowitz committed Feb 2, 2024
1 parent dbd6ca6 commit fc38256
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 1 deletion.
5 changes: 5 additions & 0 deletions config/cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,10 @@
"threads": "16",
"mem": "64g",
"time": "0-12:00:00"
},
"FitHiChIP": {
"threads": "10",
"mem": "64g",
"time": "3-00:00:00"
}
}
115 changes: 115 additions & 0 deletions config/configfile_BiasCorrection_CoverageBias
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#====================================
# Sample configuration file for running FitHiChIP
#====================================

##********
##***** parameters to provide the input HiChIP alignment files
##********

##============
## option 1: provide the valid pairs from HiC-Pro pipeline - can be gzipped as well
##============
ValidPairs=./TestData/Sample_ValidPairs.txt.gz

##============
## option 2: provide the bin interval and contact matrix files from HiC-Pro pipeline
##============

## bin interval file (of the format *_abs.bed from HiC-pro output)
Interval=

## matrix file (of the format *.matrix from HiC-pro output)
Matrix=

##============
## option 3: If HiChIP was processed by aligners other than HiC-Pro
## a) provide the locus pairs as a .bed formatted file with the following format (7 fields):
## chr1 start1 end1 chr2 start2 end2 contactcounts
##============
Bed=

##============
## option 4: If HiChIP data is provided in .hic format
## Make sure that the .hic file contains the target resolution which is provided in the BINSIZE parameter (below)
##============
HIC=

##============
## option 5: If HiChIP data is provided in .cool / .mcool format
## Make sure that the .cool or .mcool file contains the target resolution which is provided in the BINSIZE parameter (below)
##============
COOL=


##********
## File containing chromomosome size information corresponding to the reference genome.
##********
ChrSizeFile=./TestData/chrom_hg19.sizes

##********
## Mandatory parameter - Reference ChIP-seq / HiChIP peaks (in .bed format) - can be gzipped as well
## We recommend using reference ChIP-seq peaks (if available)
## Otherwise, peaks can be computed from HiChIP data.
## See the documentation: https://ay-lab.github.io/FitHiChIP/usage/Utilities.html#inferring-peaks-from-hichip-data-for-use-in-the-hichip-pipeline
##********
PeakFile=./TestData/Sample.Peaks.gz


##********
## Mandatory parameter - Output directory to contain all the results
##********
OutDir=./TestData/results/


##********
## Mandatory parameter - Boolean variable indicating if the reference genome is circular
## 0, by default. If 1 (circular genome), calculation of genomic distance is slightly different
##********
CircularGenome=0


##********
##***** Various FitHiChIP loop calling related parameters
##********

##Interaction type
## 1: peak to peak
## 2: peak to non peak
## 3: peak to all (default - both peak-to-peak and peak-to-nonpeak)
## 4: all to all (similar to Hi-C)
## 5: All of the modes 1 to 4 are computed.
IntType=1

## Bin size, in bases, for the interactions. Default = 5000 (5 Kb).
BINSIZE=2500

## Lower distance threshold of loops - default = 20000 (20 Kb)
LowDistThr=20000

## Upper distance threshold of loops - default = 2000000 (2 Mb)
UppDistThr=2000000

## Values 0/1 - Applicable if IntType = 3 (peak to all output interactions)
## 1 indicates FitHiChIP(S) model - uses only peak to peak loops for background modeling
## 0 corresponds to FitHiChIP(L) - uses both peak to peak and peak to nonpeak loops for background modeling
UseP2PBackgrnd=1

## type of bias - values: 1 / 2
## 1: coverage bias regression
## 2: ICE bias regression
BiasType=1

## if 1 (default), merge filtering (corresponding to either FitHiChIP(L+M) or FitHiChIP(S+M)
## depending on the parameter UseP2PBackgrnd) is enabled
MergeInt=1

## FDR (q-value) threshold for loop significance
QVALUE=0.01

## prefix string of all the output files (Default = 'FitHiChIP').
PREFIX=FitHiChIP

## Binary variable 1/0:
## if 1, overwrites any existing output file.
## otherwise (0), does not overwrite any output file.
OverWrite=0
3 changes: 2 additions & 1 deletion config/modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"pairtools": "pairtools/1.0.2",
"samtools": "samtools/1.19",
"bedtools": "bedtools/2.30.0",
"juicer": "juicer/1.6"
"juicer": "juicer/1.6",
"fithichip": "fithichip/11.0"
}
}
60 changes: 60 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ rule all:
expand(join(workpath,"QC","{name}.dedupStatsSimplify.txt"), name=samples),
expand(join(workpath,"QC","{name}.hichip_qc_metrics.txt"), name=samples),
expand(join(workpath,"Aligned","{name}.hic"), name=samples),
expand(join(workpath,"FitHiChIP_out","{name}.interactions_FitHiC_Q0.01.bed"),name=samples),

rule fastqc:
"""
Expand Down Expand Up @@ -203,6 +204,65 @@ rule contactmap:
juicer_tools -Xmx{params.memory}m pre --threads {threads} {input} {output} {params.reflen}
"""

rule makehicpro:
input:
join(workpath,"Aligned","{name}.mapped.pairs"),
output:
join(workpath,"Aligned","{name}.hicpro_mapped.pairs.gz"),
params:
rname='makehicpro',
shell: """
grep -v '#' {input} | awk -F"\t" '{{print $1"\t"$2"\t"$3"\t"$6"\t"$4"\t"$5"\t"$7}}' | gzip -c > {output}
"""

rule makeFitConfig:
input:
join(workpath,"Aligned","{name}.hicpro_mapped.pairs.gz"),
output:
join(workpath,"FitHiChIP_out","{name}.config"),
params:
rname='makeFitConfig',
inConfig=join(workpath,"config","configfile_BiasCorrection_CoverageBias"),
reflen=config['references'][genome]['reflen'],
outdir=join(workpath,"FitHiChIP_out"),
bed=chip_peaks,
sample="{name}",
CTCF="true",
shell: """
cp {params.inConfig} {output}
sed -i 's@ValidPairs=.*@ValidPairs={input}@g' {output}
sed -i 's@PeakFile=.*@PeakFile={params.bed}@g' {output}
sed -i 's@OutDir=.*@OutDir={params.outdir}@g' {output}
sed -i 's@ChrSizeFile=.*@ChrSizeFile={params.reflen}@g' {output}
sed -i 's@PREFIX=.*@PREFIX={params.sample}@g' {output}
if ! {params.CTCF}; then
sed -i 's@IntType=.*@IntType=3@g' {output}
fi
"""

rule FitHiChIP:
input:
pairs=join(workpath,"Aligned","{name}.hicpro_mapped.pairs.gz"),
config=join(workpath,"FitHiChIP_out","{name}.config"),
output:
join(workpath,"FitHiChIP_out","{name}.interactions_FitHiC_Q0.01.bed"),
params:
rname='FitHiChIP',
runDir=join(workpath,"FitHiChIP"),
envmodules:
config['tools']['fithichip'],
shell: """
if [ ! -d "{params.runDir}" ]; then
mkdir {params.runDir}
cp -r $FITHICHIP_SRC/* {params.runDir}
fi
cd {params.runDir}
./FitHiChIP_HiCPro.sh -C {input.config}
"""




# Import rules
Expand Down

0 comments on commit fc38256

Please sign in to comment.