forked from amyschmid/rosr-chip-utils
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrosr-timecourse-analysis.Rmd
116 lines (92 loc) · 3.26 KB
/
rosr-timecourse-analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
Analysis of RosR Timcourse ChIP data
===================================
```{r reset}
rm(list=ls())
```
# Load Libraries and Data
```{r load}
library(ggplot2)
source("lib/medichi.utils.R")
source("lib/chipchip.utils.R")
data( "halo.lowres", package="MeDiChI" )
operons = read.delim("data/nrc1_operons.csv")
gene.coords$Gene_Name = gene.coords$canonical_Name
data_dir = ""
experiment = "rosr/"
targets = c("noH2O2.fits.Rdata",
"wH2O2_10m.fits.Rdata",
"wH2O2_20m.fits.Rdata",
"wH2O2_60m.fits.Rdata",
"0258_11.fits.Rdata",
"0258_13.fits.Rdata",
"0258_21.fits.Rdata",
"0258_22.fits.Rdata",
"0258_32.fits.Rdata",
"0258_33.fits.Rdata")
targets = sapply(targets,function(x) paste(data_dir,experiment,x,sep=""))
datasets = c("noH2O2",
"wH2O2_10m",
"wH2O2_20m",
"wH2O2_60m",
"0258_11",
"0258_13",
"0258_21",
"0258_22",
"0258_32",
"0258_33")
conditions = c("noH2O2",
"wH2O2_10m",
"wH2O2_20m",
"wH2O2_60m",
"noH2O2",
"noH2O2",
"noH2O2",
"noH2O2",
"noH2O2",
"noH2O2")
```
```{r combine.hits}
hits = combineHits(targets,datasets,conditions,.5,250)
hits.combined = combineGeneHitsByDataset(hits,datasets)
hits.condition = combineGeneHitsByCondition(hits,conditions)
```
```{r process.hits}
# Convert NA pvalues to 10^(-.5) ~ .31
# and NA intensities to the minimum value
pval.cols = grep("pval",colnames(hits.condition))
intens.cols = grep("intens",colnames(hits.condition))
intens.avg.cols = grep("intens.avg",colnames(hits.condition))
for(col in pval.cols){
hits.condition[is.na(hits.condition[,col]),col] = 10^(-.5)
}
for(col in intens.cols){
hits.condition[is.na(hits.condition[,col]),col] = min(hits.condition[,intens.cols],na.rm=T)
}
#use rows with at least one pvalue <.05
hits.condition = hits.condition[apply(hits.condition[,pval.cols],1,function(x){any(x<.05)}),]
#add operons to conditional hits
hits.condition = duplicateOperonRows(hits.condition,operons)
#load expression
exp = read.csv("data/rosr_h2o2_exp.csv",header=T)
deg = read.table("deg.csv")
expression_data.ura = data.frame(gene=exp$gene)
for(tp in c("X.40","X.20","X0","X10","X20","X40","X60","X80"))
{
ura.ratio =
rowMeans(
exp[,grep(paste(tp,"_ura3*",sep=""),colnames(exp))]
)
expression_data.ura = cbind(expression_data.ura,data.frame(ura.ratio))
colnames(expression_data.ura) = tp
}
colnames(expression_data.ura) <- c("gene","ge-40.ura","ge-20.ura","ge0.ura","ge10.ura","ge20.ura","ge40.ura","ge60.ura","ge80.ura")
combined.data.ura = merge(expression_data.ura,hits.condition,by.x="gene",by.y="Gene")
# remove redundant orfs
redundant_orfs = read.delim("data/redundant_orfs.csv",sep="\t")
combined.data.ura = combined.data.ura[!combined.data.ura$gene %in% redundant_orfs$ORF.Name,]
#final_table = merge(exp,hits.condition,by.x="gene",by.y="Gene")
#final_table = final_table[as.character(final_table$gene) %in% deg$V1,]
#save table
#write.table(final_table,file="rosr/table_s2.csv",sep="\t",quote=F,row.names=F)
write.table(combined.data.ura,file="rosr/table_s2.csv",sep="\t",quote=F,row.names=F)
```