-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrenum_order_gen.awk
80 lines (77 loc) · 2.25 KB
/
renum_order_gen.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/local/bin/gawk -f
#
# This program renumbers a pedigree file so that parents precede progeny
# no checking of coherence is done.
# On standard output, a file with the first three columns renumbered, the 3 next have the original pedigree
# based on an idea by I Aguilar
#
# pedigree format: animal, sire, dam separated by spaces
#
# 31/12/2010
#
# modified to use and print pseudo generation numbers
# gen(x)= max(gen(sire)+gen(dam))+1
# this is to force coding "uncle<nephew", check example ped_kempthorne
function maxval (a,b) {
max=b
if (a>b){max=a}
return max
}
BEGIN{
unknown_parent="00000000000000"
pos[0]=0 # actual position in the original data file
included[0]=1 # exported into output?
changed=1
posout[0]=0 # position in the new file
gen[0]=0
last=0
previous=0
unknowns=0
unknownd=0
unknownb=0
}
# read and store pedigree file
{
if($1==unknown_parent) $1=0
if($2==unknown_parent) $2=0
if($3==unknown_parent) $3=0
if( ($2==0) && ($3==0) ) unknownb++
if( ($2!=0) && ($3==0) ) unknownd++
if( ($2==0) && ($3!=0) ) unknowns++
pos[$1]=NR
sire[$1]=$2
dam[$1]=$3
included[$1]=0
gen[$1]=9999
if(NR%100000 == 0) printf("%10s%10s\n",NR,"read") > "/dev/stderr"
}
END{
printf("\n") > "/dev/stderr"
printf("%s\n","-----------------------------------------") > "/dev/stderr"
printf("%s%10s\n","total read",NR) > "/dev/stderr"
printf("%s%10s%10s%10s\n","both unknown,sire unknown,dam unknown",unknownb,unknowns,unknownd) > "/dev/stderr"
printf("%s\n","-----------------------------------------") > "/dev/stderr"
printf("\n") > "/dev/stderr"
iter=1
while (changed){
changed=0
for (x in pos)
if(!included[x]){
# change to number of generation
#if(included[dam[x]] && included[sire[x]]){
if((gen[dam[x]]<iter) && (gen[sire[x]]<iter)){
# the new code is actually the order animals are printed
posout[x]=posout[last]+1
gen[x]=maxval(gen[dam[x]],gen[sire[x]])+1
printf("%16s%16s%16s%16s%16s%16s%16s\n", posout[x],posout[sire[x]],posout[dam[x]],x,sire[x],dam[x],gen[x])
included[x]=1
changed=1
last=x
}
}
iter++
printf("%10s%10s%10s%10s%10s\n",iter,"included",posout[last]-previous,"total",posout[last]) > "/dev/stderr"
previous=posout[last]
}
}