-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_mashr.sh
executable file
·114 lines (76 loc) · 2.31 KB
/
run_mashr.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash -l
if [ $# -lt 4 ]; then
cat << EOF
plink=\$1
yfile=\$2
output_snp=\$3
output_tis=\$4
tempdir=\$5
EOF
exit 1
fi
plink=$1
yfile=$2
output_snp=$3
output_tis=$4
tempdir=$5
cat << EOF
plink=$1
yfile=$2
output_snp=$3
output_tis=$4
tempdir=$5
EOF
if [ -z $tempdir ]; then
outdir=$(dirname $output_tis)
[ -d $outdir ] || mkdir -p $outdir
tempdir=$(mktemp -d "$outdir/temp.XXXXXX")
fi
[ -d $tempdir ] || mkdir -p $tempdir
# 1. caculate summary statistics
Rscript --vanilla run.matrix.qtl.R $yfile $plink $tempdir/me.txt
printf "[%s] Generated: $tempdir/me.txt\n\n\n" "$(date)"
# 2. Run MASH
R --vanilla << EOF
me.file <- '$tempdir/me.txt'
snp.file <- '$output_snp'
tis.file <- '$output_tis'
library(ashr)
library(mashr)
source('Util.R')
options(stringsAsFactors = FALSE)
me.tab <- read.table(me.file, header = TRUE)
snps <- sort(unique(me.tab[, 'SNP']))
genes <- sort(unique(me.tab[, 'gene']))
n.snps <- length(unique(snps))
n.genes <- length(unique(genes))
log.msg('Read %d x %d matrix QTL results\n\n', n.snps, n.genes)
mat.idx <- cbind(match(me.tab[, 'SNP'], snps),
match(me.tab[, 'gene'], genes))
t.mat <- matrix(NA, nrow = n.snps, ncol = n.genes)
beta.mat <- matrix(NA, nrow = n.snps, ncol = n.genes)
se.mat <- matrix(NA, nrow = n.snps, ncol = n.genes)
t.mat[mat.idx] <- me.tab[, 't.stat']
beta.mat[mat.idx] <- me.tab[, 'beta']
se.mat[mat.idx] <- me.tab[, 'beta'] / me.tab[, 't.stat']
mash.data <- set_mash_data(beta.mat, se.mat)
U.c <- cov_canonical(mash.data)
mash.out <- mash(mash.data, U.c, optmethod = 'cxxMixSquarem')
## a. extract SNP statistics
fdr.out <- get_lfdr(mash.out)
fsr.out <- get_lfsr(mash.out)
min.fdr <- apply(fdr.out, 1, min)
min.fsr <- apply(fsr.out, 1, min)
snp.out <- data.frame(snps, min.fdr, min.fsr)
write.table(snp.out, file = gzfile(snp.file),
quote = FALSE, row.names = FALSE, col.names = FALSE)
## b. choose the best SNP and crop posterior probabilities of genes
best.snp <- which.min(min.fsr)
tis.out <- data.frame(snps[best.snp],
genes,
fdr.out[best.snp, ],
fsr.out[best.snp, ])
write.table(tis.out, file = gzfile(tis.file),
quote = FALSE, row.names = FALSE, col.names = FALSE)
EOF
printf "[%s] Finished: $0 --> %s and %s\n\n" "$(date)" $output_tis $output_snp