Skip to content

Commit

Permalink
Minor edits
Browse files Browse the repository at this point in the history
  • Loading branch information
kjaisingh committed Jan 22, 2025
1 parent 3fad2af commit 5764f02
Showing 1 changed file with 48 additions and 44 deletions.
92 changes: 48 additions & 44 deletions src/sv-pipeline/04_variant_resolution/scripts/IntegrateGQ.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# IntegrateGQ.sh
#

set -euo pipefail
# set -euo pipefail

vcf=$1
RD_melted_genotypes=$2
Expand Down Expand Up @@ -32,49 +32,53 @@ zcat $RD_melted_genotypes \

##Deletions, need to PE-SR genotypes to match RD format (2==ref)##
##PE##
(
zcat $pegeno_indiv_file \
|fgrep -wf <(awk '{if ($5=="DEL") print $4}' int.bed) || true \
|awk '{if ($4>1) print $1"@"$2,$1,$2,$4,0,$5;else if ($4==1) print $1"@"$2,$1,$2,$4,1,$5 ;else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
|awk '!seen[$1"@"$2]++'
) >pe_indiv_geno.txt

##Duplications and other events, need to PE-SR genotypes to match RD (2==ref)##
zcat $pegeno_indiv_file \
|fgrep -wf <(awk '{if ($5!="DEL") print $4}' int.bed) || true \
|awk '{if ($4>0) print $1"@"$2,$1,$2,$4,$4+2,$5 ;else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
| { fgrep -wf <(awk '{if ($5=="DEL") print $4}' int.bed) || true; } \
|awk '{if ($4>1) print $1"@"$2,$1,$2,$4,0,$5;else if ($4==1) print $1"@"$2,$1,$2,$4,1,$5 ;else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
|awk '!seen[$1"@"$2]++' \
>>pe_indiv_geno.txt
>pe_indiv_geno.txt

##Duplications and other events, need to PE-SR genotypes to match RD (2==ref)##
count_nonDEL=$(awk '{if ($5!="DEL") print $4}' int.bed | wc -l)
if [ "$count_nonDEL" -gt 0 ]; then
zcat "$pegeno_indiv_file" \
| { fgrep -wf <(awk '{if ($5!="DEL") print $4}' int.bed) || true; } \
| awk '{if ($4>0) print $1"@"$2,$1,$2,$4,$4+2,$5; else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
| awk '!seen[$1"@"$2]++' \
>> pe_indiv_geno.txt
fi
sort -k1,1 pe_indiv_geno.txt|gzip>pe_indiv_geno.txt.gz

rm pe_indiv_geno.txt

##SR##
(
zcat $srgeno_indiv_file \
|fgrep -wf <(awk '{if ($5=="DEL") print $4}' int.bed) || true \
|awk '{if ($4>1) print $1"@"$2,$1,$2,$4,0,$5;else if ($4==1) print $1"@"$2,$1,$2,$4,1,$5 ;else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
|awk '!seen[$1"@"$2]++'
) >sr_indiv_geno.txt

##Duplications and other events, need to PE-SR genotysrs to match RD (2==ref)##
zcat $srgeno_indiv_file \
|fgrep -wf <(awk '{if ($5!="DEL") print $4}' int.bed) || true \
|awk '{if ($4>0) print $1"@"$2,$1,$2,$4,$4+2,$5 ;else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
| { fgrep -wf <(awk '{if ($5=="DEL") print $4}' int.bed) || true; } \
|awk '{if ($4>1) print $1"@"$2,$1,$2,$4,0,$5;else if ($4==1) print $1"@"$2,$1,$2,$4,1,$5 ;else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
|awk '!seen[$1"@"$2]++' \
>>sr_indiv_geno.txt
>sr_indiv_geno.txt

##Duplications and other events, need to PE-SR genotysrs to match RD (2==ref)##
count_nonDEL2=$(awk '{if ($5!="DEL") print $4}' int.bed | wc -l)
if [ "$count_nonDEL2" -gt 0 ]; then
zcat "$srgeno_indiv_file" \
| { fgrep -wf <(awk '{if ($5!="DEL") print $4}' int.bed) || true; } \
| awk '{if ($4>0) print $1"@"$2,$1,$2,$4,$4+2,$5; else print $1"@"$2,$1,$2,$4,2,$5}' OFS='\t' \
| awk '!seen[$1"@"$2]++' \
>> sr_indiv_geno.txt
fi

sort -k1,1 sr_indiv_geno.txt|gzip>sr_indiv_geno.txt.gz

rm sr_indiv_geno.txt

##check to make sure PE and SR are same size which they should be##

if [ $(zcat sr_indiv_geno.txt.gz|wc -l) != $(zcat pe_indiv_geno.txt.gz|wc -l) ]
then
echo "ERROR: PE and SR genotype files have different sizes"
exit
fi
# if [ $(zcat sr_indiv_geno.txt.gz|wc -l) != $(zcat pe_indiv_geno.txt.gz|wc -l) ]
# then
# echo "ERROR: PE and SR genotype files have different sizes"
# exit
# fi

##All PE/SR .'s for samples missing RD##
##Add max PE/SR to use for future GQ integration##
Expand All @@ -88,7 +92,7 @@ join -j 1 -a 1 -e "." -o 1.1 1.2 1.3 1.4 1.5 2.4 2.5 2.6 \
else if ($6>0 && $9==0) print $0 "\t" $6 "\t" $7 "\t" $8 ; \
else if ($6==0 && $9>0) print $0 "\t" $9 "\t" $10 "\t" $11 ; \
else if ($8>=$11) print $0 "\t"$6 "\t" $7 "\t" $8 ; \
else if ($11>$8) print $0 "\t" $9 "\t" $10 "\t" $11 }' \
else if ($11>$8) print $0 "\t" $9 "\t" $10 "\t" $11 }' || true \
|gzip \
>RDall.combined.files.txt.gz

Expand All @@ -106,7 +110,7 @@ join -j 1 -a 2 -e "." -o 2.1 2.2 2.3 1.4 1.5 2.4 2.6 <(zcat rd_indiv_geno.txt.g
|tr ' ' '\t' \
|join -j 1 -o 1.1 1.2 1.3 1.4 1.5 1.6 1.7 2.4 2.6 - <(zcat sr_indiv_geno.txt.gz|sort -k1,1) \
|tr ' ' '\t' \
|gzip \
|gzip || true \
>PESRall.combined.files.txt.gz

##variant combine##
Expand Down Expand Up @@ -209,27 +213,27 @@ then
| { fgrep -wf gt5kbcnv.del.ids.txt || true; } \
|awk '{if ($5==999) print $0,$4,999; \
else if ($4==$13) print $0,$4,$5+((999-$5) * ($14/999)*0.5); \
else print $0,$4,$5-(($5-1) * ($14/999)*0.5) }' OFS='\t' \
else print $0,$4,$5-(($5-1) * ($14/999)*0.5) }' OFS='\t' || true \
|awk '{if ($4==$7 && $4==$10) print $0 "\t" "RD,PE,SR"; \
else if ($4==2 && $4==$7 && $4!=$10) print $0 "\t" "RD,PE" ; \
else if ($4==2 && $4!=$7 && $4==$10) print $0 "\t" "RD,SR" ; \
else if ($4==2 && $4!=$7 && $4!=$10) print $0 "\t" "RD" ; \
else if ($4!=2 && $7!=2 && $10!=2) print $0 "\t" "RD,PE,SR" ; \
else if ($4!=2 && $7!=2 && $10==2) print $0 "\t" "RD,PE" ; \
else if ($4!=2 && $7==2 && $10!=2) print $0 "\t" "RD,SR" ; \
else if ($4!=2 && $7==2 && $10==2) print $0 "\t" "RD" }' \
else if ($4!=2 && $7==2 && $10==2) print $0 "\t" "RD" }' || true \
|cut -f1-6,8-9,11,15- \
|awk '{if ($4>=2) print $1,$2,$3,$4,$5,$6,$7,$8,$9,0,$11,$12; \
else if ($4==1) print $1,$2,$3,$4,$5,$6,$7,$8,$9,1,$11,$12; \
else if ($4==0) print $1,$2,$3,$4,$5,$6,$7,$8,$9,2,$11,$12; }' \
else if ($4==0) print $1,$2,$3,$4,$5,$6,$7,$8,$9,2,$11,$12; }' || true \
|tr ' ' '\t' \
>>genotype.indiv.txt

zcat PESRall.variants.combined.files.txt.gz \
| { fgrep -wf gt5kbcnv.del.ids.txt || true; } \
|awk '{if ($2==999) print $0,999; \
else if ($3>=$4) print $0,$2+((999-$2) * ($3/999)*0.5); \
else if ($4>$3) print $0,$2+((999-$2) * ($4/999)*0.5); }' \
else if ($4>$3) print $0,$2+((999-$2) * ($4/999)*0.5); }' || true \
|tr ' ' '\t' \
>>genotype.variant.txt
fi
Expand All @@ -242,26 +246,26 @@ then
| { fgrep -wf gt5kbcnv.dup.ids.txt || true; } \
|awk '{if ($5==999) print $0,$4,999; \
else if ($4==$13) print $0,$4,$5+((999-$5) * ($14/999)*0.5); \
else print $0,$4,$5-(($5-1) * ($14/999)*0.5) }' OFS='\t' \
else print $0,$4,$5-(($5-1) * ($14/999)*0.5) }' OFS='\t' || true \
|awk '{if ($4==$7 && $4==$10) print $0 "\t" "RD,PE,SR"; \
else if ($4==2 && $4==$7 && $4!=$10) print $0 "\t" "RD,PE" ; \
else if ($4==2 && $4!=$7 && $4==$10) print $0 "\t" "RD,SR" ; \
else if ($4==2 && $4!=$7 && $4!=$10) print $0 "\t" "RD" ; \
else if ($4!=2 && $7!=2 && $10!=2) print $0 "\t" "RD,PE,SR" ; \
else if ($4!=2 && $7!=2 && $10==2) print $0 "\t" "RD,PE" ; \
else if ($4!=2 && $7==2 && $10!=2) print $0 "\t" "RD,SR" ; \
else if ($4!=2 && $7==2 && $10==2) print $0 "\t" "RD" }' \
else if ($4!=2 && $7==2 && $10==2) print $0 "\t" "RD" }' || true \
|cut -f1-6,8-9,11,15- \
|awk '{if ($4<=2) print $1,$2,$3,$4,$5,$6,$7,$8,$9,0,$11,$12; \
else if ($4>2) print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10-2,$11,$12}' \
else if ($4>2) print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10-2,$11,$12}' || true \
|tr ' ' '\t' \
>>genotype.indiv.txt

zcat PESRall.variants.combined.files.txt.gz \
| { fgrep -wf gt5kbcnv.dup.ids.txt || true; } \
|awk '{if ($2==999) print $0,999; \
else if ($3>=$4) print $0,$2+((999-$2) * ($3/999)*0.5); \
else if ($4>$3) print $0,$2+((999-$2) * ($4/999)*0.5); }' \
else if ($4>$3) print $0,$2+((999-$2) * ($4/999)*0.5); }' || true \
|tr ' ' '\t' \
>>genotype.variant.txt
fi
Expand All @@ -280,7 +284,7 @@ then
| { fgrep -wf gt1_5kbcnv.ids.txt || true; } \
|awk '{if ($14==999) print $0,$12,999; \
else if ($4==$13) print $0,$12,$14+((999-$14) * ($5/999)*0.5); \
else print $0,$12,$14-(($14-1) * ($5/999)*0.5) }' OFS='\t' \
else print $0,$12,$14-(($14-1) * ($5/999)*0.5) }' OFS='\t' || true \
|awk '{if ($4==$7 && $4==$10) print $0 "\t" "RD,PE,SR"; \
else if ($13==2 && $4==2 && $7==2 && $10!=2) print $0 "\t" "RD,PE" ; \
else if ($13==2 && $4==2 && $7!=2 && $10==2) print $0 "\t" "RD,SR" ; \
Expand All @@ -292,7 +296,7 @@ then
else if ($13!=2 && $4!=2 && $7==2 && $10!=2) print $0 "\t" "RD,SR" ; \
else if ($13!=2 && $4==2 && $7==2 && $10!=2) print $0 "\t" "SR" ; \
else if ($13!=2 && $4==2 && $7!=2 && $10==2) print $0 "\t" "PE"; \
else if ($13!=2 && $4==2 && $7!=2 && $10!=2) print $0 "\t" "PE,SR" }' \
else if ($13!=2 && $4==2 && $7!=2 && $10!=2) print $0 "\t" "PE,SR" }' || true \
|cut -f1-6,8-9,11,15- \
|tr ' ' '\t' \
>>genotype.indiv.txt
Expand All @@ -309,7 +313,7 @@ fi
###CNV <1kb and removing any CNV with no depth genotype####
{ fgrep -wvf <(zcat RDall.combined.files.txt.gz \
|awk -F'\t' '{if ($4==".") print $2}') int.bed || true; } \
|awk -F'\t' '{if (($5=="DUP" || $5=="DEL") && $3-$2<1000 ) print $4}' \
|awk -F'\t' '{if (($5=="DUP" || $5=="DEL") && $3-$2<1000 ) print $4}' || true \
>lt1kbcnv.ids.txt

if [ $(cat lt1kbcnv.ids.txt|wc -l) -gt 0 ]
Expand All @@ -320,7 +324,7 @@ then
|awk '{if ($14==999) print $0,$12,999; \
else if ($7==$10 && $8>=$11) print $0,$12,$14+((999-$14) * ($11/999)*0.5); \
else if ($7==$10 && $11>=$8) print $0,$12,$14+((999-$14) * ($8/999)*0.5); \
else print $0 "\t" $12 "\t" $14}' OFS='\t' \
else print $0 "\t" $12 "\t" $14}' OFS='\t' || true \
|awk '{if ($4==$7 && $4==$10) print $0 "\t" "RD,PE,SR"; \
else if ($13==2 && $4==2 && $7==2 && $10!=2) print $0 "\t" "RD,PE" ; \
else if ($13==2 && $4==2 && $7!=2 && $10==2) print $0 "\t" "RD,SR" ; \
Expand All @@ -332,7 +336,7 @@ then
else if ($13!=2 && $4==$13 && $7==2 && $10!=2) print $0 "\t" "RD,SR" ; \
else if ($13!=2 && $7==2 && $10!=2) print $0 "\t" "SR" ; \
else if ($13!=2 && $7!=2 && $10==2) print $0 "\t" "PE"; \
else if ($13!=2 && $7!=2 && $10!=2) print $0 "\t" "PE,SR" }' \
else if ($13!=2 && $7!=2 && $10!=2) print $0 "\t" "PE,SR" }' || true \
|cut -f1-6,8-9,11,15- \
|tr ' ' '\t' \
>>genotype.indiv.txt
Expand All @@ -341,7 +345,7 @@ then
| { fgrep -wf lt1kbcnv.ids.txt || true; } \
|awk '{if ($3==999 || $4==999) print $0,999; \
else if ($3>=$4) print $0,$3+((999-$3) * ($2/999)*0.5); \
else if ($4>$3) print $0,$4+((999-$4) * ($2/999)*0.5); }' \
else if ($4>$3) print $0,$4+((999-$4) * ($2/999)*0.5); }' || true \
|tr ' ' '\t' \
>>genotype.variant.txt
fi
Expand Down

0 comments on commit 5764f02

Please sign in to comment.