-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmapping_qc.txt
88 lines (64 loc) · 3.71 KB
/
mapping_qc.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#### To perform some quality check of SAM files ###
## Following GATK best practices
# run the container for GATK-4
# updated docker container to run GATK-4
docker pull broadinstitute/gatk:latest
docker run -it -v $PWD:/data/ broadinstitute/gatk:latest
# create another sample SAM file (useful for multi-sample comparison)
# download tumor and normal sample bam files
samtools merge -r -o merged.bam normal.bam tumor.bam
gatk SortSam -I bam_files/merged.bam -O bam_files/merged_sorted.bam -SORT_ORDER coordinate
# mark duplicates
gatk MarkDuplicates \
I=bam_files/merged_sorted.bam \
O=bam_files/marked_duplicates.bam \
M=bam_files/marked_dup_metrics.txt
# BQSR
# download the file
wget https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf
gatk IndexFeatureFile --input read_mapping/Homo_sapiens_assembly38.dbsnp138.vcf
# create ref genome index using samtools
samtools faidx ref_genome/chr1.fa
## add read group info if not present already
# extract records from chr1 alone using bed file
samtools view -L bam_files/bed_file.bed bam_files/merged_sorted.bam -o bam_files/chr1_bamfile.bam
#BQSR
gatk BaseRecalibrator -I bam_files/chr1_bamfile.bam -R ref_genome/chr1.fa --known-sites read_mapping/Homo_sapiens_assembly38.dbsnp138.vcf -O recal_data.table
gatk ApplyBQSR -R ref_genome/chr1.fa -I bam_files/chr1_bamfile.bam --bqsr-recal-file recal_data.table -O bam_files/chr1_bqsr.bam
samtools index bam_files/chr1_bqsr.bam
# download ref data
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dict
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.fai
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz.tbi
### calling somatic variants using GATK
gatk Mutect2 -I bam_files/merged_sorted.bam -R ref_genome/Homo_sapiens_assembly38.fasta -O somatic_variants/merged_vars.vcf.gz
#### germline variants
gatk HaplotypeCaller -R ref_genome/Homo_sapiens_assembly38.fasta -I bam_files/normal.bam -L bam_files/bed_file.bed -O normal_germline.vcf.gz -ERC GVCF
gatk HaplotypeCaller -R ref_genome/Homo_sapiens_assembly38.fasta -I bam_files/tumor.bam -O tumor_germline.vcf.gz -ERC GVCF
# importing vcf to genomicDB
gatk GenomicsDBImport -V normal_germline.vcf.gz -V tumor_germline.vcf.gz --genomicsdb-workspace-path test_db --intervals chr1
# genotyping
gatk GenotypeGVCFs -R ref_genome/chr1.fa -V gendb://test_db -O combined_genotyping.vcf.gz
# VQSR
wget https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz
wget https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi
gatk VariantRecalibrator \
-R Homo_sapiens_assembly38.fasta \
-V combined_genotyping.vcf.gz \
--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.high_confidence.hg38.vcf.gz \
--an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR \
-mode SNP \
-O output.recal \
--tranches-file output.tranches \
--rscript-file output.plots.R
gatk ApplyVQSR \
-R Homo_sapiens_assembly38.fasta \
-V combined_genotyping.vcf.gz \
-O output_filtered.vcf.gz \
--truth-sensitivity-filter-level 99.0 \
--tranches-file output.tranches \
--recal-file output.recal \
-mode SNP
## variant annotation using VEP