-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathepsa.r
94 lines (81 loc) · 3.45 KB
/
epsa.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
library(dplyr)
library(magrittr)
b = import('base')
io = import('io')
st = import('stats')
#' Summarize each experiment series to mean control and mean perturbed
#'
#' @param recs List of recods
#' @param exps List of gene expression matrices
#' @param field Field to subset ("control" or "perturbed")
#' @return List with expression matrix per pathway
summarize_experiments = function(recs, exps, field) {
mapply(function(r,e) {
message(r$id)
narray::map(e[,r[[field]]], along=2, mean) %catch% NULL
}, r=recs, e=exps) %>%
narray::stack(along=2)
}
#' Get ordered list of gene symbols for all EPSA pathways
#'
#' @param ctl A matrix of mean control expression per experiment
#' @param ptb A matrix of mean perturbed expression per experiment
#' @return Named (HGNC symbols) character vector of median fold changes
epsa_vectors = function(ctl, ptb, index) {
one_pathway = function(path) {
message(path)
cc = ctl[[path]]
pp = ptb[[path]]
pp = pp * narray::rrep(index$sign[match(colnames(pp), index$id)], nrow(pp))
narray::intersect(cc, pp, along=1)
paths = t(cbind(cc, pp))
type = c(rep(0, ncol(cc)), rep(1, ncol(pp)))
de_genes = st$lm(paths ~ 0 + type, hpc_args=list(n_jobs=5)) %>%
arrange(p.value) %>%
head(100) %>%
arrange(-estimate)
med_fc = pp - cc[,colnames(pp)]
med_fc = narray::map(med_fc, along=2, function(x) median(x, na.rm=TRUE))
ordered_genes = sort(med_fc, decreasing=TRUE)
ordered_genes = ordered_genes[names(med_fc) %in% de_genes$paths]
}
split_pathway = function(x)
narray::split(x, along=2, subsets=b$grep("^([^.]+)", colnames(x)))
ctl = split_pathway(ctl)
ptb = split_pathway(ptb)
signatures = b$lnapply(names(ctl), one_pathway)
}
#' Score perturbation experiments according to EPSA
#'
#' @param sigs Named (pathways) list of numeric vector (HGNC/median FC)
#' @param ctl A matrix of mean control expression per experiment
#' @param ptb A matrix of mean perturbed expression per experiment
#' @return A score matrix of experiments x pathways
score_experiments = function(sigs, ctl, ptb) {
fc = (ptb - ctl[rownames(ptb),colnames(ptb)])
cor_fun = function(s, f) {
sig = sigs[[s]]
cmp = na.omit(data.frame(sig, fc[,f][names(sig)]))
cor(cmp[,1], cmp[,2], method="spearman")
}
epsa = b$expand_grid(sig_idx=names(sigs), fc_idx=colnames(fc)) %>%
mutate(epsa = purrr::map2_dbl(sig_idx, fc_idx, cor_fun)) %>%
narray::construct(epsa ~ fc_idx + sig_idx)
}
if (is.null(module_name())) {
EXPR = commandArgs(TRUE)[1] %or% "../../data/expr.RData"
OUTFILE = commandArgs(TRUE)[2] %or% "epsa.RData"
# load zscores, model building function, and expression for each experiment
expr = io$load(EXPR)
idx_remove = c("control", "perturbed")
sign_lookup = setNames(c(1,-1), c("activating", "inhibiting"))
index = lapply(expr$records, function(x) x[setdiff(names(x), idx_remove)]) %>%
do.call(bind_rows, .) %>%
mutate(sign = sapply(effect, function(x) sign_lookup[x]))
ctl = summarize_experiments(expr$records, expr$expr, "control")
ptb = summarize_experiments(expr$records, expr$expr, "perturbed")
signatures = epsa_vectors(ctl, ptb, index)
scores = score_experiments(signatures, ctl, ptb)
index = index[match(rownames(scores), index$id),]
save(scores, index, file=OUTFILE)
}