-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjulia_scrp.jl
61 lines (54 loc) · 1.81 KB
/
julia_scrp.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
using DataFrames
using RCall
#R模式下
parent_geno = fread("final.parent_genoPlink.ped")
offsp_geno =fread("final.off_genoPlink.ped")
#julia模式下
parent_geno = @rget(parent_geno)
offsp_geno = @rget(offsp_geno)
#SNPid用于存储哪些SNP保留
SNPId = Vector{Bool}(undef,294921)
for x=1:294921
index = []
#子代所有基因型
Fall_g = Vector{String}(undef,233)
#子代所有两个等位基因
F1 = []
for i=1:233
#子代的双亲
P1 = split(offsp_geno[i,1],"_")[1]*"_"
P2 = split(offsp_geno[i,1],"_")[2]*"_"
#亲本1基因型
P1_g1 = parent_geno[occursin.(Regex("^$P1"),parent_geno[:,1]),6+(2*x-1)][1]
P1_g2 = parent_geno[occursin.(Regex("^$P1"),parent_geno[:,1]),6+(2*x)][1]
#亲本2基因型
P2_g1 = parent_geno[occursin.(Regex("^$P2"),parent_geno[:,1]),6+(2*x-1)][1]
P2_g2 = parent_geno[occursin.(Regex("^$P2"),parent_geno[:,1]),6+(2*x)][1]
#子代所有可能基因型
Pall_g = collect(Set([P1_g1*P2_g1,P1_g1*P2_g2,P2_g1*P1_g1,P2_g1*P1_g2,P2_g2*P1_g1,P2_g2*P1_g2,P1_g2*P2_g1,P1_g2*P2_g2]))
#子代实际两个等位基因
F1_g1 = offsp_geno[i,6+(2*x-1)]
F1_g2 = offsp_geno[i,6+(2*x)]
#子代实际基因型
F1_g = F1_g1*F1_g2
push!(F1,F1_g1,F1_g2)
Fall_g[i] = F1_g
#判断子代观测基因型是否满足亲本分离规律
judge = F1_g in Pall_g
push!(index,judge)
end
#统计子代两个等位基因的个数用于计算MAF
g1 = count(F1.==collect(Set(F1))[1])
g2 = count(F1.==collect(Set(F1))[2])
if(g1<g2)
MAF = g1/length(F1)
else
MAF = g2/length(F1)
end
#如果该SNP满足亲本分离规律,基因型类型两种以上,且MAF大于0.05,则保留该SNP
if(all(index)&&(length(Set(Fall_g))!==1)&&(MAF>0.05))
SNPId[x] = true
else
SNPId[x] = false
end
end