-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScript_03_aVMCs_identification_dglm.Rmd
226 lines (153 loc) · 8.46 KB
/
Script_03_aVMCs_identification_dglm.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
---
title: "Script_03_aVMCs_identification_dglm"
author: "Yunfeng Liu"
date: "1/16/2023"
output: html_document
---
# Script information
DGLM(Double generalized linear model) is a full parametric method that can be used to detect variance effect and mean effects at the same time. It include two parts:One is a generalized linear model,the other is a dispersion model. Specifically,the mean effects are estimated by a linear model,and then the varaince effects are estimated by dispersion sub-model.It works iteratively between models until convergence.
Here,using DGLM,we performed EWAS(Epigenome-wide association study) to identify two different type of age-related DNA methylation changes on X chromosome: aDMCs(age-related differentially methylated CpGs) and aVMCs(age-related variable methylated CpGs).
```{r}
#set library path.
.libPaths("~/researchdrive/yliu/Rlibs")
```
```{r}
#Load data.
load(file ="./Output/Output_02/Output_02_DGLM_Xfemales_converge.RData")
load(file ="./Output/Output_02/Output_02_DGLM_Xmales_converge.RData")
```
```{r}
#Load all necessary libraries.
library(tidyverse)
library(bacon)
```
```{r}
## Correct bias and inflation for dispersion model.
# Xfemales
set.seed(1)
bc_DGLM_Xfemales<- bacon(DGLM_Xfemales_converge[,7])
# Calculate bias and inflation
bias(bc_DGLM_Xfemales) # 0.18
inflation(bc_DGLM_Xfemales) # 1.1
# Save histogram plot of test statistics
tiff("Output_03_Bacon_DGLM_Xfemales.tiff", units="in", width=8, height=6, res=600,compression = 'lzw')
fit(bc_DGLM_Xfemales,xlab="t-statistics",main="aVMCs in females",n=100)
dev.off()
#Extract the BACON-adjusted t-statistics and p-values
tstats_DGLM_Xfemales<-tstat(bc_DGLM_Xfemales)
pvals_DGLM_Xfemales <-pval(bc_DGLM_Xfemales)
table(pvals_DGLM_Xfemales<0.05) # 6015
padjs_DGLM_Xfemales <- as.matrix(p.adjust(pvals_DGLM_Xfemales, method="bonf"))
table(padjs_DGLM_Xfemales<0.05) # 1098
# Extract BACON-adjusted effectsize.
set.seed(1)
bc_DGLM_Xfemales_es<-bacon(NULL,DGLM_Xfemales_converge[,5],DGLM_Xfemales_converge[,6])
es_DGLM_Xfemales<-es(bc_DGLM_Xfemales_es)
# Extract BACON-adjusted standard error.
set.seed(1)
bc_DGLM_Xfemales_se<-bacon(NULL,DGLM_Xfemales_converge[,5],DGLM_Xfemales_converge[,6])
se_DGLM_Xfemales<-se(bc_DGLM_Xfemales_se)
# Summary Bacon statistics
dat_DGLM_Xfemales_bacon_disp<-as.data.frame(cbind(es_DGLM_Xfemales,se_DGLM_Xfemales,tstats_DGLM_Xfemales,pvals_DGLM_Xfemales,padjs_DGLM_Xfemales))
colnames(dat_DGLM_Xfemales_bacon_disp)<-c("bacon_effectsize_disp","bacon_std.error_disp","bacon_tstatistics_disp","bacon_pval_disp","bacon_p.adj_disp")
rownames(dat_DGLM_Xfemales_bacon_disp)<-make.names(rownames(DGLM_Xfemales_converge),unique = TRUE)
```
```{r}
## Correct bias and inflation for dispersion model.
# Xmales
set.seed(1)
bc_DGLM_Xmales<- bacon(DGLM_Xmales_converge[,7])
# Calculate bias and inflation
bias(bc_DGLM_Xmales) # -0.2
inflation(bc_DGLM_Xmales) # 1.2
# Save histogram plot of test statistics
tiff("Output_03_Bacon_DGLM_Xmales.tiff", units="in", width=8, height=6, res=600,compression = 'lzw')
fit(bc_DGLM_Xmales,xlab="t-statistics",main="aVMCs in males",n=100)
dev.off()
#Extract the BACON-adjusted t-statistics and p-values
tstats_DGLM_Xmales<-tstat(bc_DGLM_Xmales)
pvals_DGLM_Xmales <-pval(bc_DGLM_Xmales)
table(pvals_DGLM_Xmales<0.05) # 1176
padjs_DGLM_Xmales <- as.matrix(p.adjust(pvals_DGLM_Xmales, method="bonf"))
table(padjs_DGLM_Xmales<0.05) # 39
# Extract BACON-adjusted effectsize.
set.seed(1)
bc_DGLM_Xmales_es<-bacon(NULL,DGLM_Xmales_converge[,5],DGLM_Xmales_converge[,6])
es_DGLM_Xmales<-es(bc_DGLM_Xmales_es)
# Extract BACON-adjusted standard error.
set.seed(1)
bc_DGLM_Xmales_se<-bacon(NULL,DGLM_Xmales_converge[,5],DGLM_Xmales_converge[,6])
se_DGLM_Xmales<-se(bc_DGLM_Xmales_se)
# Summary Bacon statistics
dat_DGLM_Xmales_bacon_disp<-as.data.frame(cbind(es_DGLM_Xmales,se_DGLM_Xmales,tstats_DGLM_Xmales,pvals_DGLM_Xmales,padjs_DGLM_Xmales))
colnames(dat_DGLM_Xmales_bacon_disp)<-c("bacon_effectsize_disp","bacon_std.error_disp","bacon_tstatistics_disp","bacon_pval_disp","bacon_p.adj_disp")
rownames(dat_DGLM_Xmales_bacon_disp)<-make.names(rownames(DGLM_Xmales_converge),unique = TRUE)
```
```{r}
# Extract dispersion effect size and standard error
df_effectsize_disp<-as.data.frame(cbind(dat_DGLM_Xfemales_bacon_disp[,1],dat_DGLM_Xmales_bacon_disp[,1]))
colnames(df_effectsize_disp)<-c("females","males")
rownames(df_effectsize_disp)<-make.names(rownames(dat_DGLM_Xfemales_bacon_disp), unique=TRUE)
df_se_disp<-as.data.frame(cbind(dat_DGLM_Xfemales_bacon_disp[,2],dat_DGLM_Xmales_bacon_disp[,2]))
colnames(df_se_disp)<-c("females","males")
rownames(df_se_disp)<-make.names(rownames(dat_DGLM_Xfemales_bacon_disp), unique=TRUE)
# Keep significant aVMCs as dataframe (bacon_p.adj<0.05).
dat_DGLM_Xfemales_bacon_disp<-filter(dat_DGLM_Xfemales_bacon_disp,bacon_p.adj_disp<0.05) # 1098
dat_DGLM_Xfemales_bacon_disp_up<-filter(dat_DGLM_Xfemales_bacon_disp,bacon_effectsize_disp>0) # 1098
dat_DGLM_Xfemales_bacon_disp_down<-filter(dat_DGLM_Xfemales_bacon_disp,bacon_effectsize_disp<0) # 0
dat_DGLM_Xmales_bacon_disp<-filter(dat_DGLM_Xmales_bacon_disp,bacon_p.adj_disp<0.05) # 39
dat_DGLM_Xmales_bacon_disp_up<-filter(dat_DGLM_Xmales_bacon_disp,bacon_effectsize_disp>0) # 38
dat_DGLM_Xmales_bacon_disp_down<-filter(dat_DGLM_Xmales_bacon_disp,bacon_effectsize_disp<0) # 1
save(dat_DGLM_Xfemales_bacon_disp,dat_DGLM_Xfemales_bacon_disp_up,dat_DGLM_Xmales_bacon_disp,dat_DGLM_Xmales_bacon_disp_down,dat_DGLM_Xmales_bacon_disp_up,file = "Output_03_Catalogue_aVMCs_BIOS.RData")
```
```{r}
# Check how many aVMCs identified only in females/males/both
# both-sex CpGs:1
both_sex_aVMCs_BIOS<-intersect(rownames(dat_DGLM_Xfemales_bacon_disp),rownames(dat_DGLM_Xmales_bacon_disp))
# female-specific CpGs:1097
fs_aVMCs_BIOS<-dat_DGLM_Xfemales_bacon_disp[!rownames(dat_DGLM_Xfemales_bacon_disp)%in%both_sex_aVMCs_BIOS,]
# male-specific CpGs:38
ms_aVMCs_BIOS<-dat_DGLM_Xmales_bacon_disp[!rownames(dat_DGLM_Xmales_bacon_disp)%in%both_sex_aVMCs_BIOS,]
```
```{r}
# # Scatter plot of aVMCs effect size between men and women.
sig.aVMCs<-c(rownames(fs_aVMCs_BIOS),rownames(ms_aVMCs_BIOS),both_sex_aVMCs_BIOS)
df_aVMCs_effectsize_BIOS<-df_effectsize_disp[sig.aVMCs,]
df_aVMCs_effectsize_BIOS$aVMCs<-c(rep("female-specific",1097),rep("male-specific",38),rep("both-sex",1))
save(df_aVMCs_effectsize_BIOS,file="Output_03_aVMCs_effectsize_BIOS.RData")
# Save plot
df_aVMCs_effectsize_BIOS$aVMCs<-factor(df_aVMCs_effectsize_BIOS$aVMCs,levels = unique(df_aVMCs_effectsize_BIOS$aVMCs))
tiff("Output_03_Comparison of aVMCs size effect between sex.tiff", units="in", width=16, height=13, res=600,compression = 'lzw')
ggplot(df_aVMCs_effectsize_BIOS) +
aes(x = males,y = females,color=aVMCs) +
geom_point(size = 4, alpha = 0.5) +
geom_vline(aes(xintercept=0)) +
geom_hline(aes(yintercept=0)) +
geom_abline(slope=1, intercept = 0)+
scale_x_continuous(limits=c(-0.04, 0.04))+
scale_y_continuous(limits=c(-0.04, 0.04))+
scale_color_manual(values = c("#F08080", "#56B4E9","#32CD32"))+
theme_bw()+
theme(legend.position = "bottom",legend.text =element_text(size = 34),legend.title = element_text(size = 34),axis.title.x = element_text(size = 34),axis.title.y = element_text(size = 34),axis.text.x = element_text(size = 34),axis.text.y = element_text(size = 34))+
labs(x="Males effect ctsize",y="Females effect size")
dev.off()
```
```{r}
# Scatter plot of aVMCs standard error between men and women
df_aVMCs_se_BIOS<-df_se_disp[sig.aVMCs,]
df_aVMCs_se_BIOS$aVMCs<-c(rep("female-specific",1097),rep("male-specific",38),rep("both-sex",1))
save(df_aVMCs_se_BIOS,file ="Output_03_aVMCs_se_BIOS.RData")
df_aVMCs_se_BIOS$aVMCs<-factor(df_aVMCs_se_BIOS$aVMCs,levels = unique(df_aVMCs_se_BIOS$aVMCs))
tiff("Output_03_Comparison of aVMCs se between sex.tiff", units="in", width=16, height=13, res=600,compression = 'lzw')
ggplot(df_aVMCs_se_BIOS) +
aes(x = males,y = females,color=aVMCs) +
geom_point(size = 4, alpha = 0.5) +
geom_abline(slope=1, intercept = 0)+
scale_x_continuous(limits=c(0.001, 0.008))+
scale_y_continuous(limits=c(0.001, 0.008))+
scale_color_manual(values = c("#F08080", "#56B4E9", "#32CD32"))+
theme_bw()+
theme(legend.position = "bottom",legend.text =element_text(size = 34),legend.title = element_text(size = 34),axis.title.x = element_text(size = 34),axis.title.y = element_text(size = 34),axis.text.x = element_text(size = 34),axis.text.y = element_text(size = 34))+
labs(x="Males standard error",y="Females standard error")
dev.off()
```