-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
116 lines (98 loc) · 5.43 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
rule download_mouseanno:
output: "anno/gencode.vM25.annotation.gtf.gz"
shell: "wget -P anno ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.annotation.gtf.gz"
rule extract_mouse_anno:
input: "anno/gencode.vM25.annotation.gtf.gz"
output: "anno/gencode.vM25.annotation.gtf"
shell: "zcat {input} > {output}"
rule get_gene_names_from_mouse_anno:
input: "anno/gencode.vM25.annotation.gtf"
output: "anno/mouse_gene_names.txt"
shell: "cat {input} | cut -d ';' -f 3 | cut -d ' ' -f 3 > {output}"
rule add_gene_names_to_mouse_anno:
input: "anno/gencode.vM25.annotation.gtf",
"anno/mouse_gene_names.txt"
output: "anno/mouse_gene_anno.txt"
shell: "paste {input[0]} {input[1]} > {output}"
rule download_neutrophil_expression:
output: "expression/GSE60336_Normalized_Data.csv"
shell: "wget -P expression https://sharehost.hms.harvard.edu/immgen/GSE60336/GSE60336_Normalized_Data.csv"
rule download_t4_expression:
output: "expression/GSE60337_Normalized_Data.csv"
shell: "wget -P expression https://sharehost.hms.harvard.edu/immgen/GSE60337/GSE60337_Normalized_Data.csv"
rule run_mousefm:
input: "data/immgen_strain_mapping.txt",
"data/cis_eqtls_pmid_252679730.txt",
"anno/mouse_gene_anno.txt",
"expression/GSE60336_Normalized_Data.csv",
"expression/GSE60337_Normalized_Data.csv"
output: "results/GN/thr_{threshold}/grp_{groupsize}/other/done",
"results/T4/thr_{threshold}/grp_{groupsize}/other/done",
"results/GN/thr_{threshold}/grp_{groupsize}/validated/done",
"results/T4/thr_{threshold}/grp_{groupsize}/validated/done",
"results/GN/thr_{threshold}/grp_{groupsize}/detected/done",
"results/T4/thr_{threshold}/grp_{groupsize}/detected/done",
params: threshold="{threshold}",
groupsize="{groupsize}"
conda: "envs/mousefm.yaml"
script: "scripts/MouseFM_expression.R"
rule run_mousefm_all:
input: expand("results/GN/thr_{threshold}/grp_{groupsize}/detected/done", threshold=["0"], groupsize=["1","2","3","4","5","6","7","8","9","10"]),
expand("results/GN/thr_{threshold}/grp_{groupsize}/detected/done", threshold=["1"], groupsize=["2","3","4","5","6","7","8","9","10"])
rule number_finemapped:
input: "results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/done",
output: "results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/summary.txt"
shell: "wc -l results/{wildcards.GN_or_T4}/thr_{wildcards.threshold}/grp_{wildcards.groupsize}/*.txt > {output}"
rule number_finemapped_all:
input: expand("results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/{subset}/summary.txt", \
GN_or_T4=["GN","T4"], \
threshold=["0"], \
groupsize=["1","2","3","4","5","6","7","8","9","10"], \
subset=["detected","validated","other"]),
expand("results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/{subset}/summary.txt", \
GN_or_T4=["GN","T4"], \
threshold=["1"], \
groupsize=["2","3","4","5","6","7","8","9","10"], \
subset=["detected","validated","other"])
rule plot_num_finemapped:
input: expand("results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/{subset}/summary.txt", \
GN_or_T4=["GN","T4"], \
threshold=["0"], \
groupsize=["1","2","3","4","5","6","7","8","9","10"], \
subset=["detected","validated","other"]),
expand("results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/{subset}/summary.txt", \
GN_or_T4=["GN","T4"], \
threshold=["1"], \
groupsize=["2","3","4","5","6","7","8","9","10"], \
subset=["detected","validated","other"])
output: "results/num_finemapped.pdf"
script: "scripts/visualize_finemap_results.R"
rule extract_info_published_genes:
input: expand("results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/{subset}/summary.txt", \
GN_or_T4=["GN","T4"], \
threshold=["0"], \
groupsize=["1","2","3","4","5","6","7","8","9","10"], \
subset=["detected","validated","other"]),
expand("results/{GN_or_T4}/thr_{threshold}/grp_{groupsize}/{subset}/summary.txt", \
GN_or_T4=["GN","T4"], \
threshold=["1"], \
groupsize=["2","3","4","5","6","7","8","9","10"], \
subset=["detected","validated","other"])
output: "results/summary_published_genes/{gene}.txt"
shell: "cat {input} | grep '{wildcards.gene}_' > {output}"
GENES = []
with open("data/published_genes_pmid_252679730.txt","r") as f_in:
for line in f_in:
GENES.append(line.strip("\n"))
# Three genes had no fine-mapping result, remove them here
GENES = [ x for x in GENES if x not in ["Plekhg1","Gcn1","Arl16"]]
rule extract_info_published_genes_all:
input: expand("results/summary_published_genes/{gene}.txt", gene=GENES)
rule list_published_genes_finemapped:
input: expand("results/summary_published_genes/{gene}.txt", gene=GENES)
output: "results/summary_published_genes/finemapped_genes.txt"
shell: "cat {input} | cut -d '/' -f 6 | cut -d '_' -f 1 | sort | " + \
" uniq > {output}"
rule combinatorial_explosion:
output: "results/max_num_comb.pdf"
script: "scripts/combinatorial_explosion.R"