-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile
140 lines (121 loc) · 3.77 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#! /usr/bin/env python
#
# The Parker Lab (theparkerlab.org)
# University of Michigan, Ann Arbor
#
from os.path import join
###
# HELPER FUNCTIONS
#
def get_motifs():
"""List all motifs"""
for motif in open(config['motif_file'], 'r').readlines():
yield motif.strip()
def get_samples():
"""List all samples"""
for sample in sorted(config['samples'].keys()):
yield sample
def get_peaks(sample):
"""Get peak file associated with sample from the config"""
peakfile = config["samples"][sample]["peakfile"]
return peakfile
if config["ionice"] is None:
IONICE = ""
else:
IONICE = config["ionice"]
###
# PIPELINE
#
# Desired output
#
rule all:
input:
expand(
join(config['results'], "bmo", "{sample}",
"bound", "{motif}.bound.bed"),
sample=get_samples(), motif=get_motifs()
),
if config["use_shm"] is True:
print("Motifs will be processed using /dev/shm...")
include:
join(config["bmo_dir"], "rules",
"measure_raw_signal_shm.smk")
else:
print("Motifs will not be processed using /dev/shm...")
include:
join(config["bmo_dir"], "rules", "measure_raw_signal.smk")
rule motifs_outside_peaks:
input:
motif = rules.measure_raw_signal.output,
peaks = lambda wildcards: get_peaks(wildcards.sample)
output:
join(config['results'], "raw_signals",
"{sample}", "outside_peaks", "{motif}.bed")
conda:
"envs/bmo.yml"
shell:
"{IONICE} intersectBed -v -a {input.motif} -b {input.peaks} > {output}"
rule count_overlapping_motifs:
input:
rules.measure_raw_signal.output
output:
join(config['results'], "raw_signals",
"{sample}", "co_occurring", "{motif}.bed")
params:
com_src = join(
config["bmo_dir"], "src", "count_co-occuring_motifs.sh"),
bmo_dir = config["bmo_dir"],
vicinity = 100,
conda:
"envs/bmo.yml"
shell:
"""
{IONICE} {params.com_src} {input} {params.vicinity} {params.bmo_dir}\
> {output}
"""
rule fit_nbinoms:
input:
raw_signal = rules.measure_raw_signal.output,
motif_counts = rules.count_overlapping_motifs.output,
outside_peaks = rules.motifs_outside_peaks.output
output:
join(config['results'], "negative_binomials",
"{sample}", "{motif}.bed.gz")
params:
nb_src = join(config["bmo_dir"], "src", "rawSigNBModel.R"),
in_handle = "{motif}.bed",
out_handle = config['results'] + \
"/negative_binomials/{sample}/{motif}",
# note trailing slashes on both
d1 = join(config['results'],
"raw_signals", "{sample}", "all_regions/"),
d2 = join(config['results'], "raw_signals",
"{sample}", "outside_peaks/"),
conda:
"envs/bmo.yml"
shell:
"""
{IONICE} Rscript {params.nb_src} -f {params.in_handle} \
--dir1 {params.d1} --dir2 {params.d2} \
-o {params.out_handle} -c 7 --writeBed
"""
rule BMO:
input:
atac_nb = rules.fit_nbinoms.output,
motif_counts = rules.count_overlapping_motifs.output
output:
join(config['results'], "bmo",
"{sample}", "bound", "{motif}.bound.bed")
params:
bmo_src = join(config["bmo_dir"], "src", "bmo.R"),
bmo_output_dir = join(config['results'], "bmo", "{sample}")
conda:
"envs/bmo.yml"
threads:
1
shell:
"""
{IONICE} Rscript {params.bmo_src} --f1 {input.atac_nb} \
--f2 {input.motif_counts} \
-o {params.bmo_output_dir} -n {wildcards.motif} -p {threads}
"""