-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfigure4.fdr.R
executable file
·92 lines (75 loc) · 3.43 KB
/
figure4.fdr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env Rscript
argv <- commandArgs(trailingOnly = TRUE)
if(length(argv) < 2) q()
k.max <- as.integer(argv[1]) # e.g., 10
out.file <- argv[2] # e.g., 'result/fig_fdr.pdf'
library(qvalue)
library(ggplot2)
library(directlabels)
library(dplyr)
library(gridExtra)
source('Util.R')
source('figure.util.R')
options(stringsAsFactors = FALSE)
merge.tabs <- function(...) {
do.call(rbind, lapply(..., read.table))
}
## Take PIP statistics under the null and alternative and estimate FDR
## using Storey's q-value
null.tab <- merge.tabs(glue('result/perm/chr',1:22,'/',k.max,'/snp-null.txt.gz'))
alt.tab <- merge.tabs(glue('result/stat/chr',1:22,'/',k.max,'/snp-null.txt.gz'))
colnames(null.tab) <- c('k', 'lodds')
colnames(alt.tab) <- c('k', 'lodds')
pv <- empPvals(stat = alt.tab[, 2], stat0 = null.tab[, 2])
qv <- qvalue(pv)
## Fig: Show two histograms of PIP values
sigmoid <- function(x) 1/(1+exp(-x))
.df1 <- rbind(null.tab %>% mutate(pip.100 = round(100 * sigmoid(lodds)), stat = 'permuted'),
alt.tab %>% mutate(pip.100 = round(100 * sigmoid(lodds)), stat = 'observed')) %>%
group_by(pip.100, stat) %>%
summarize(count = length(pip.100))
p1 <- ggplot(.df1, aes(x = pip.100, y = count, fill = stat)) +
theme_classic() +
geom_vline(xintercept = 50, color = 'orange', lty = 'dashed') +
geom_vline(xintercept = 95, color = 'orange', lty = 'dashed') +
geom_bar(stat = 'identity') + xlab('max PIP (%)') +
coord_cartesian(ylim = c(0, 50000)) +
theme(legend.title = element_blank(),
legend.position = c(0.5, 0.6)) +
scale_x_discrete(limits = seq(0, 100, 10), expand = c(0.01, 0.01))
## Fig: Show q-values for each cutoff
.df2 <- rbind(data.frame(alt.tab, error = pv, stat = 'p-value') %>%
mutate(pip.100 = round(100 * sigmoid(lodds))),
data.frame(alt.tab, error = qv$qvalue, stat = 'q-value') %>%
mutate(pip.100 = round(100 * sigmoid(lodds)))) %>%
group_by(pip.100, stat) %>%
summarize(count = length(pip.100), error = max(error)) %>%
mutate(log.error = -log10(error))
p2 <-
ggplot(.df2, aes(x = pip.100, y = error, group = stat,
shape = stat, color = stat, lty = stat)) +
geom_vline(xintercept = 50, color = 'orange', lty = 'dashed') +
geom_hline(data = .df2 %>% filter(pip.100 == 50), aes(yintercept = error),
color = 'orange', lty = 'dashed') +
geom_vline(xintercept = 95, color = 'orange', lty = 'dashed') +
geom_hline(data = .df2 %>% filter(pip.100 == 95), aes(yintercept = error),
color = 'orange', lty = 'dashed') +
theme_classic() +
geom_point() +
geom_dl(aes(label = stat), method = 'smart.grid') +
scale_x_discrete(limits = seq(0, 100, 10), expand = c(0.01, 0.01)) +
theme(axis.title.x=element_blank(), legend.position = 'none')
p2 <- p2 +
geom_dl(data = .df2 %>% filter(pip.100 == 50),
aes(label = signif(error, 2)),
method = list(dl.trans(y = y + 1),'first.points'))+
geom_dl(data = .df2 %>% filter(pip.100 == 95),
aes(label = signif(error, 2)),
method = list(dl.trans(y = y + 1), 'first.points'))+
scale_color_manual(values = c('black', 'gray40'),
limits = c('q-value', 'p-value'))
p0 <- ggplot(data.frame(pv), aes(x=pv)) +
geom_histogram()
pdf(file = out.file, width = 4, height = 4, useDingbats = FALSE)
grid.vcat(list(p2 + scale_y_log10(), p1, p0), heights = c(2,1,1))
dev.off()