-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07-CQanalysis.R
163 lines (113 loc) · 5.65 KB
/
07-CQanalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
## %######################################################%##
# #
#### Investigating sequencing depth ####
#### ratio males/females (CQ) ####
# #
## %######################################################%##
# We do it on a per-window basis
# We compute depth for the 6 F1 pools -------------------------------------
system('Rscript depthPerWindow.R 2000 500 0 10 "-Q 20" dWXA.bam sWXA.bam dZM.bam sZM.bam dBF.bam sBF.bam allContigsWindow2000.Q20.depth.txt')
# we import sequencing depth data on all F1 bams
depth <- fread("1-F1/allContigsWindow2000.Q20.depth.txt")
# Computing and plotting CQ ----------------------------------------------------------
# standardizing depths across bams
# we create a matrix to allow using colMeans() below (not possible on a data.table)
progenyDepths <- depth[, cbind(dWXA, sWXA, dZM, sZM, dBF, sBF)]
meanDepths <- colMeans(progenyDepths)
ratios <- max(meanDepths) / meanDepths
adjusted <- t(t(progenyDepths) * ratios)
depth[, c("dWXA", "sWXA", "dZM", "sZM", "dBF", "sBF") := data.table(adjusted)]
# computing chromosome quotient per F1 family
depth[, c("cqWXA", "cqZM", "cqBF") := .(sWXA / dWXA, sZM / dZM, sBF / dBF)]
# plotting chromosomes quotient and the difference (male - female) / (male + female)
# we also name thise difference with the cq prefix as we use the same code for plotting
CQs <- list(
depth[, .(cqWXA, cqZM, cqBF)],
depth[, .(
cqWXA = (sWXA - dWXA) / (sWXA + dWXA),
cqZM = (sZM - dZM) / (sZM + dZM),
cqBF = (sBF - dBF) / (sBF + dBF)
)]
)
xlim <- list(c(0, 2.2), c(-1, 1))
# the position of ther expected value of CQ using moth metrics
# (we draw a vertica line at this position)
v <- c(1, 0)
xlab <- c("Chromosome Quotient (male / female)", "Sequencing depth difference (male - female)/(male + female)")
pdf("../figureS4.pdf", 6, 8)
par(mfrow = c(2, 1))
for (i in 1:2) {
dt <- CQs[[i]]
# ploting CQ distributionq
dt[depth[, dWXA + sWXA >= 3L] & cqWXA < 3, plot(
density(cqWXA, from = xlim[[i]][1], to = xlim[[i]][2]),
main = "", xlab = xlab[i], las = 1, bty = "n", lwd = 2
)]
dt[depth[, dWXA + sWXA >= 3L] & cqZM < 3, lines(
density(cqZM, from = xlim[[i]][1], to = xlim[[i]][2]),
col = "grey", lwd = 2
)]
dt[depth[, dWXA + sWXA >= 3L] & cqBF < 3, lines(
density(cqBF, from = xlim[[i]][1], to = xlim[[i]][2]),
col = "salmon", lwd = 2
)]
abline(v = v[i], lwd = 0.5, col = "grey")
legend("topleft", col = c("black", "grey", "salmon"), legend = c("WXA", "ZM", "BF"), bty = "n", lwd = 2)
}
dev.off()
# selecting windows and contigs of low CQ --------------------------------------------------
# we add a logical column to flag windows with low CQ ratio and sufficient female depth
depth[, lowCQ := dWXA > 5L & dZM > 5L & dBF > 5L & cqWXA < 0.3 & cqZM < 0.3 & cqBF < 0.3 & N >= 1500]
# we group successive low-CQ windows into blocks
rows <- 2:nrow(depth)
blocks <- depth[c(F, lowCQ[rows] & !lowCQ[rows - 1L]), .(contig, start = window - 1000L)]
blocks$end <- depth[c(lowCQ[rows - 1L] & !lowCQ[rows], F), window + 1000L]
blocks[start < 0L, start := 0L]
# we retrieve the SNP results for these low-CQ contigs
probs <- fread("tables/countsAndPosteriors.txt")
# the number and length of contigs with low-CQ windows
probs[CHROM %chin% blocks$contig, .(.N, sum(length))]
# the length of low-CQ blocks
CQblockLenghts <- blocks[, .(len = sum(end - start)), by = contig]
sum(CQblockLenghts$len)
# the low-CQ contigs that are linked to the SDR
# actually, there is just one
probs[CHROM %in% blocks$contig & p > 0.5]
# we import gene data for figure 7
gff <- fread(
"genome.gff", # this is the A.vulgare gff (from ncbi)
skip = 1, header = F, drop = c(2, 6, 8),
col.names = c("contig", "type", "start", "end", "strand", "description")
)
# we extain gene names and compute the mid position of genes
gff[type == "gene", c("gene", "mid") :=
.(splitToColumns(description, "Name=", 2), (start + end) / 2)]
# we extract gene id
gff[, id := stri_extract_first(description, regex = "Avbf_[0-9]+")]
geneForID <- gff[type == "gene", setNames(gene, id)]
# we add the gene name to each CDS
gff[type == "CDS" | type == "exon", gene := geneForID[id]]
# the genes that intersect with low-CQ blocks
lowCQgenes <- gff[type == "gene" & intersectsRanges(data.table(contig, start, end), blocks)]
# to plot the low-CQ contigs, we also need the data on informative SNPs
SNPs <- fread("tables/SNPcategory.txt")
# we plot data for contigs having genes in low-CQ blocks
pdf("../figure7.pdf", 6, 8)
par(mfrow = c(3, 1), mai = c(0.4, 0.7, 0.3, 0.2))
l <- lapply(lowCQgenes[, unique(contig)], plotContig, depth = depth, gff = gff, SNPs = SNPs)
dev.off()
# comparing results with those from our prevous study based
# on CQ and Y genome scan
# we import the names of previous candidate contigs
CQcandidates <- readLines("CQcandidates.txt")
# assess the assignment of these contigs to sex chromosomes
tab <- probs[, table(CHROM %in% CQcandidates, WZ = nrec < q001)]
fisher.test(tab)
# compare the distance to the SDR non-assigned contigs
# that belong to our preivous selection and those that do not
probs[nrec >= q001, median(nrec / 40 * 100), by = CHROM %in% CQcandidates]
probs[nrec >= q001, wilcox.test(nrec[CHROM %in% CQcandidates], nrec[!CHROM %in% CQcandidates])]
# investigating genes that overlap with the blocks of low CQ
lowCQgenes <- gff[type == "gene" & intersectsRanges(data.table(contig, start, end), blocks), ]
lowCQgeneInfo <- gff[type == "exon" & id %in% lowCQgenes$id, .(nbExons = .N, exonLength = sum(end - start + 1L)), by = id]
lowCQgenes <- merge(lowCQgenes, lowCQgeneInfo)