diff --git a/module_notebooks/05-variant-calling-with-vg.ipynb b/module_notebooks/05-variant-calling-with-vg.ipynb index 10af416..f32c464 100644 --- a/module_notebooks/05-variant-calling-with-vg.ipynb +++ b/module_notebooks/05-variant-calling-with-vg.ipynb @@ -225,6 +225,34 @@ "!vg call SK1xyprp.chrVIII.pggb.aug.xg -k SK1xyprp.chrVIII.pggb.mapped.aug.pack -t 4 > SK1xyprp.chrVIII.pggb.aug_calls.vcf" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate stats on this VCF file. We will use `grep` to pull out the rows that start with SN." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!bcftools stats SK1xyprp.chrVIII.pggb.aug_calls.vcf | grep \"^SN\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SNPs = single nucleotide polymorphisms (a single nucleotide change; reference and alternate alleles are all of length 1) \n", + "MNPs = multi-nucleotide polymorphisms (reference and alternate alleles are all of the same length and that length is >1) \n", + "indels = insertion/deletion (reference and alternate alleles are of different lengths)\n", + "others = more complex variants\n", + "multiallelic sites = more than one alternate allele\n", + "multiallelic SNP sites = more than one alternate allele at a SNP site" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -238,6 +266,7 @@ "
  • + Create an index (xg).
  • \n", "
  • + Compute read support.
  • \n", "
  • + Generate a VCF.
  • \n", + "
  • + Generate statistics.
  • \n", " " ] }, @@ -259,6 +288,7 @@ "\n", "!vg call SK1xyprp.fullgenome.pggb.aug.xg -k SK1xyprp.fullgenome.pggb.mapped.aug.pack -t 4 > SK2xyprp.fullgenome.pggb.aug_calls.vcf\n", "\n", + "!bcftools stats SK1xyprp.fullgenome.pggb.aug_calls.vcf | grep \"^SN\"\n", "\n", "" ] @@ -282,6 +312,12 @@ } ], "metadata": { + "environment": { + "kernel": "conda-env-nigms-pangenomics-nigms-pangenomics", + "name": "workbench-notebooks.m127", + "type": "gcloud", + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m127" + }, "kernelspec": { "display_name": "nigms-pangenomics", "language": "python",