From d68b484ec36851026bad5253cd8d518fbb64bc72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hannes=20P=C3=A9tur=20Eggertsson?= <hannespetur@gmail.com>
Date: Mon, 19 Oct 2020 09:51:01 +0000
Subject: [PATCH] v2.6 (#6)

* Added option for extra call_only iteration. Added check for duplicated sample names.
* Added coverage filter in the 'genotype_sv' subcommand. It is possible to turn the filter off with a new '--no_filter_on_coverage' command.
* Another fix for a very rare bug in graph construction. Only affects very large-scale genotyping runs. WIP streamlined discovery.
* Write CSI instead of TBI with the --csi option.
* Added several per alt QC metrics to VCF output when calling genotype
* optimization for vcf_merge
* Added no_variant_overlapping option in force calling mode
* updated how QD and QDalt are calculated
* AAScore added in SNP/indel genotyping
* bugfix: In very large genotyping runs some INFO fields can get larger than INT_MAX so I needed to change them to be strings instead, otherwise bcftools is not happy with me.
---
 CMakeLists.txt                                |    4 +-
 include/graphtyper/constants.hpp.in           |    7 +
 .../graphtyper/graph/absolute_position.hpp    |    2 +-
 include/graphtyper/graph/constructor.hpp      |    5 +
 include/graphtyper/graph/haplotype.hpp        |    4 +-
 include/graphtyper/typer/alignment.hpp        |   39 +-
 include/graphtyper/typer/bucket.hpp           |   67 +
 include/graphtyper/typer/caller.hpp           |    9 +
 include/graphtyper/typer/event.hpp            |  204 ++
 include/graphtyper/typer/event_call.hpp       |   32 +
 include/graphtyper/typer/genotype_paths.hpp   |   21 +-
 .../graphtyper/typer/logistic_constants.hpp   |   72 +-
 include/graphtyper/typer/read.hpp             |   83 +
 include/graphtyper/typer/sample_call.hpp      |   12 +-
 include/graphtyper/typer/var_stats.hpp        |   60 +-
 include/graphtyper/typer/variant.hpp          |    5 +-
 include/graphtyper/typer/variant_call.hpp     |   14 +
 include/graphtyper/typer/vcf_operations.hpp   |    1 +
 include/graphtyper/utilities/genotype_sv.hpp  |    5 +-
 .../utilities/hts_parallel_reader.hpp         |    5 +-
 include/graphtyper/utilities/hts_reader.hpp   |   16 +-
 include/graphtyper/utilities/options.hpp      |    4 +-
 paw                                           |    2 +-
 seqan                                         |    2 +-
 src/CMakeLists.txt                            |    3 +
 src/graph/constructor.cpp                     |  111 +-
 src/graph/graph.cpp                           |   69 +-
 src/graph/haplotype.cpp                       |   95 +-
 src/graph/sv.cpp                              |   42 +-
 src/main.cpp                                  |   49 +-
 src/typer/alignment.cpp                       |  372 +++-
 src/typer/bucket.cpp                          |  207 ++
 src/typer/caller.cpp                          | 1657 ++++++++++++++++-
 src/typer/event.cpp                           |  410 ++++
 src/typer/genotype_paths.cpp                  |   20 +-
 src/typer/read.cpp                            |  103 +
 src/typer/sample_call.cpp                     |   38 +-
 src/typer/var_stats.cpp                       |  217 ++-
 src/typer/variant.cpp                         |  559 ++++--
 src/typer/variant_map.cpp                     |    4 +-
 src/typer/vcf.cpp                             |   75 +-
 src/typer/vcf_operations.cpp                  |  387 +---
 src/typer/vcf_writer.cpp                      |   43 +-
 src/utilities/bamshrink.cpp                   |   39 +-
 src/utilities/genotype.cpp                    |  168 +-
 src/utilities/genotype_camou.cpp              |   14 +-
 src/utilities/genotype_sv.cpp                 |   32 +-
 src/utilities/hts_parallel_reader.cpp         |  175 +-
 src/utilities/hts_reader.cpp                  |   93 +-
 src/utilities/type_conversions.cpp            |  139 +-
 50 files changed, 4686 insertions(+), 1110 deletions(-)
 create mode 100644 include/graphtyper/typer/bucket.hpp
 create mode 100644 include/graphtyper/typer/event.hpp
 create mode 100644 include/graphtyper/typer/event_call.hpp
 create mode 100644 include/graphtyper/typer/read.hpp
 create mode 100644 include/graphtyper/typer/variant_call.hpp
 create mode 100644 src/typer/bucket.cpp
 create mode 100644 src/typer/event.cpp
 create mode 100644 src/typer/read.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c273872..139d3b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,8 +5,8 @@ include(ExternalProject)
 
 # The version number
 set (graphtyper_VERSION_MAJOR 2)
-set (graphtyper_VERSION_MINOR 5)
-set (graphtyper_VERSION_PATCH 1)
+set (graphtyper_VERSION_MINOR 6)
+set (graphtyper_VERSION_PATCH 0)
 set(STATIC_DIR "" CACHE STRING "If set, GraphTyper will be built as a static binary using libraries from the given STATIC_DIR.")
 
 # Get the current working branch
diff --git a/include/graphtyper/constants.hpp.in b/include/graphtyper/constants.hpp.in
index 73141fe..b5b7805 100644
--- a/include/graphtyper/constants.hpp.in
+++ b/include/graphtyper/constants.hpp.in
@@ -44,6 +44,13 @@ uint32_t constexpr MAX_NUM_LOCATIONS_PER_PATH = 256;
 uint16_t constexpr EPSILON_0_EXPONENT = 12;
 int32_t constexpr INSERT_SIZE_WHEN_NOT_PROPER_PAIR = 0x7FFFFFFFl;
 
+// alignments scores and penalties
+uint8_t constexpr SCORE_MATCH = 1;
+uint8_t constexpr SCORE_MISMATCH = 4;
+uint8_t constexpr SCORE_GAP_OPEN = 7;
+uint8_t constexpr SCORE_GAP_EXTEND = 1;
+uint8_t constexpr SCORE_CLIP = 5;
+
 
 /**
  * Flags
diff --git a/include/graphtyper/graph/absolute_position.hpp b/include/graphtyper/graph/absolute_position.hpp
index 4c27fe8..67feb4d 100644
--- a/include/graphtyper/graph/absolute_position.hpp
+++ b/include/graphtyper/graph/absolute_position.hpp
@@ -10,7 +10,7 @@
 namespace gyper
 {
 
-class Contig;
+struct Contig;
 
 class AbsolutePosition
 {
diff --git a/include/graphtyper/graph/constructor.hpp b/include/graphtyper/graph/constructor.hpp
index 151ebbc..efb33f1 100644
--- a/include/graphtyper/graph/constructor.hpp
+++ b/include/graphtyper/graph/constructor.hpp
@@ -19,6 +19,11 @@ void construct_graph(std::string const & reference_filename,
                      bool use_absolute_positions = true,
                      bool check_index = true);
 
+void
+open_and_read_reference_genome(std::vector<char> & reference_sequence,
+                               std::string const & reference_fn,
+                               GenomicRegion const & genomic_region);
+
 std::vector<Variant>
 get_variants_using_tabix(std::string const & vcf, GenomicRegion const & genomic_region);
 
diff --git a/include/graphtyper/graph/haplotype.hpp b/include/graphtyper/graph/haplotype.hpp
index 8b56138..96815ea 100644
--- a/include/graphtyper/graph/haplotype.hpp
+++ b/include/graphtyper/graph/haplotype.hpp
@@ -115,11 +115,13 @@ class Haplotype
                         bool is_low_qual,
                         std::size_t mismatches);
 
-  void clipped_reads_to_stats(bool fully_aligned);
+  void clipped_reads_to_stats(int clipped_bp, int read_length);
   void mapq_to_stats(uint8_t mapq);
 //  void realignment_to_stats(uint32_t original_pos, uint32_t new_pos);
 
   void strand_to_stats(uint16_t const flags);
+  void mismatches_to_stats(uint8_t mismatches, int read_length);
+  void score_diff_to_stats(uint8_t score_diff);
   void coverage_to_gts(std::size_t pn_index, bool is_proper_pair);
   std::bitset<MAX_NUMBER_OF_HAPLOTYPES> explain_to_path_explain();
 
diff --git a/include/graphtyper/typer/alignment.hpp b/include/graphtyper/typer/alignment.hpp
index ed544c2..8d6e187 100644
--- a/include/graphtyper/typer/alignment.hpp
+++ b/include/graphtyper/typer/alignment.hpp
@@ -20,36 +20,37 @@ align_read(bam1_t * rec,
            seqan::IupacString const & rseq,
            gyper::PHIndex const & ph_index);
 
+//#ifndef NDEBUG
+//GenotypePaths *
+//update_unpaired_read_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
+//                           seqan::IupacString const & seq,
+//                           seqan::IupacString const & rseq,
+//                           bam1_t * rec);
+//#else
 GenotypePaths *
 update_unpaired_read_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec);
 
-void
-further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno,
-                                                 seqan::IupacString const & seq,
-                                                 seqan::IupacString const & rseq,
-                                                 bam1_t * rec
-                                                 );
+//#endif
 
-#ifndef NDEBUG
 void
-update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
-             seqan::IupacString const & seq,
-             seqan::IupacString const & rseq,
-             bam1_t * rec);
-#endif // NDEBUG
+further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno, bam1_t * rec);
+
+//#ifndef NDEBUG
+//void
+//update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
+//             seqan::IupacString const & seq,
+//             seqan::IupacString const & rseq,
+//             bam1_t * rec);
+//#endif // NDEBUG
 
-#ifdef NDEBUG
+//#ifdef NDEBUG
 void
 update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec);
-#endif // DEBUG
+//#endif // DEBUG
 
 
 void
-further_update_paths_for_discovery(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
-                                   seqan::IupacString const & seq,
-                                   seqan::IupacString const & rseq,
-                                   bam1_t * rec
-                                   );
+further_update_paths_for_discovery(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec);
 
 std::pair<GenotypePaths *, GenotypePaths *>
 get_better_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths1,
diff --git a/include/graphtyper/typer/bucket.hpp b/include/graphtyper/typer/bucket.hpp
new file mode 100644
index 0000000..834adf6
--- /dev/null
+++ b/include/graphtyper/typer/bucket.hpp
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <parallel_hashmap/phmap_fwd_decl.h>
+
+#include <graphtyper/typer/event.hpp>
+#include <graphtyper/typer/read.hpp>
+
+
+namespace gyper
+{
+
+class BucketFirstPass
+{
+public:
+  int32_t global_max_pos_end{-1};// Max pos end of alignments in this bucket and all previous buckets
+  int32_t max_pos_end{-1}; // Max pos end of alignments in this bucket
+
+  std::map<SnpEvent, SnpEventInfo> snps;
+  Tindel_events indel_events; // type is std::map<IndelEvent, EventInfo>
+};
+
+
+class Bucket
+{
+public:
+  int32_t global_max_pos_end{-1};// Max pos end of alignments in this bucket and all previous buckets
+  int32_t max_pos_end{-1}; // Max pos end of alignments in this bucket
+
+  Tindel_events indel_events; // type is std::map<IndelEvent, EventInfo>
+  std::vector<Read> reads;
+
+  std::string to_string() const;
+};
+
+
+bool
+is_indel_in_bucket(std::vector<Bucket> const & buckets,
+                   IndelEvent const & indel_event,
+                   long const region_begin,
+                   long const BUCKET_SIZE);
+
+
+std::map<SnpEvent, SnpEventInfo>::iterator
+add_snp_event_to_bucket(std::vector<BucketFirstPass> & buckets,
+                        SnpEvent && event,
+                        long const region_begin,
+                        long const BUCKET_SIZE);
+
+
+template <typename TBucket>
+Tindel_events::iterator
+add_indel_event_to_bucket(std::vector<TBucket> & buckets,
+                          IndelEvent && event,
+                          long const region_begin,
+                          long const BUCKET_SIZE,
+                          std::vector<char> const & reference_sequence,
+                          long ref_offset);
+
+//void
+//add_base_to_bucket(std::vector<Bucket> & buckets,
+//                   int32_t pos,
+//                   char seq,
+//                   char qual,
+//                   long const region_begin,
+//                   long const BUCKET_SIZE);
+
+} // namespace gyper
diff --git a/include/graphtyper/typer/caller.hpp b/include/graphtyper/typer/caller.hpp
index c7484eb..96f8600 100644
--- a/include/graphtyper/typer/caller.hpp
+++ b/include/graphtyper/typer/caller.hpp
@@ -7,11 +7,13 @@
 namespace gyper
 {
 
+class Vcf;
 class Primers;
 
 // returns the prefix to the output files
 std::vector<std::string>
 call(std::vector<std::string> const & hts_path,
+     std::vector<double> const & avg_cov_by_readlen,
      std::string const & graph_path,
      PHIndex const & ph_index,
      std::string const & output_dir,
@@ -34,4 +36,11 @@ discover_directly_from_bam(std::string const & graph_path,
                            long minimum_variant_support,
                            double minimum_variant_support_ratio);
 
+void
+streamlined_discovery(std::vector<std::string> const & hts_paths,
+                      std::string const & reference_fn,
+                      std::string const & region_str,
+                      std::string const & output_dir,
+                      gyper::Vcf & vcf);
+
 } // namespace gyper
diff --git a/include/graphtyper/typer/event.hpp b/include/graphtyper/typer/event.hpp
new file mode 100644
index 0000000..cf51f7b
--- /dev/null
+++ b/include/graphtyper/typer/event.hpp
@@ -0,0 +1,204 @@
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <sstream>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+
+namespace gyper
+{
+
+std::array<char, 4> constexpr index2base = {'A', 'C', 'G', 'T'};
+
+long
+base2index(char const base);
+
+struct BaseCount
+{
+  std::array<int32_t, 4> acgt;
+  std::array<int64_t, 4> acgt_qualsum;
+  int32_t deleted{0};
+  int32_t unknown{0};
+
+  long get_depth_without_deleted() const;
+  long get_depth_with_deleted() const;
+  std::string to_string() const;
+
+  void add_base(char seq, char qual);
+
+};
+
+
+class IndelEvent
+{
+public:
+  // Event type is the same as the CIGAR mappings, that is:
+  //   I for insertion
+  //   D for deletion
+  //   X for mismatches
+  // other cigar mappings are not used in this context
+  uint32_t pos{0}; // event is between pos-1 and pos in a 1-based system
+  char type{'\0'};
+  std::vector<char> sequence{};
+
+  IndelEvent() = default;
+
+  IndelEvent(uint32_t _pos, char _type)
+    : pos(_pos)
+    , type(_type)
+  {}
+
+  IndelEvent(uint32_t _pos, char _type, std::vector<char> && _sequence)
+    : pos{_pos}
+    , type{_type}
+    , sequence{std::forward<std::vector<char> >(_sequence)}
+  {}
+
+  inline std::string
+  to_string() const
+  {
+    std::ostringstream ss;
+    ss << pos << " " << type << ' ' << std::string(sequence.begin(), sequence.end());
+    return ss.str();
+  }
+
+
+  /*********************
+   * OPERATOR OVERLOAD *
+   *********************/
+  bool operator==(IndelEvent const & b) const;
+  bool operator!=(IndelEvent const & b) const;
+  bool operator<(IndelEvent const & b) const;
+};
+
+
+class SnpEvent
+{
+public:
+  uint32_t pos{0};
+  char base{'N'};
+
+  SnpEvent() = default;
+  SnpEvent(uint32_t _pos, char _base)
+    : pos(_pos)
+    , base(_base)
+  {}
+
+  inline
+  std::string
+  to_string() const
+  {
+    std::ostringstream ss;
+    ss << pos << " " << base;
+    return ss.str();
+  }
+
+
+  /*********************
+   * OPERATOR OVERLOAD *
+   *********************/
+  bool operator==(SnpEvent const & b) const;
+  bool operator!=(SnpEvent const & b) const;
+  bool operator<(SnpEvent const & b) const;
+};
+
+
+class SnpEventInfo
+{
+public:
+  uint16_t hq_count{0};
+  uint16_t lq_count{0};
+  uint16_t proper_pairs{0};
+  uint16_t first_in_pairs{0};
+  uint16_t sequence_reversed{0u};
+  uint16_t clipped{0};
+  uint8_t max_mapq{0};
+  uint8_t max_distance{0};
+  int32_t uniq_pos1{-1};
+  int32_t uniq_pos2{-1};
+  int32_t uniq_pos3{-1};
+  std::map<SnpEvent, uint16_t> phase;
+
+  double corrected_support() const;
+  bool has_good_support(long const cov) const;
+  std::string to_string() const;
+
+};
+
+
+std::vector<uint8_t> get_phred_biallelic(uint32_t count, uint32_t anti_count, uint32_t eps);
+uint32_t get_log_qual(uint32_t count, uint32_t anti_count, uint32_t eps = 7);
+uint32_t get_log_qual_double(double count, double anti_count, double eps = 7.0);
+
+
+class EventInfo
+{
+public:
+  uint32_t count{0};
+  uint32_t multi_count{0};
+  uint32_t anti_count{0};
+  uint16_t span{1};
+
+  // indels with realignment support are good enough to try to realign reads to them
+  bool has_realignment_support{false};
+  bool has_good_support{false};
+
+  uint32_t max_log_qual{0};
+  int max_log_qual_file_i{-1};
+
+  uint32_t log_qual(uint32_t eps = 7) const;
+  void clean_counts();
+};
+
+
+bool
+apply_indel_event(std::vector<char> & sequence,
+                  std::vector<int32_t> & ref_pos,
+                  IndelEvent const & indel_event,
+                  long const offset,
+                  bool const is_debug = false);
+
+IndelEvent
+make_deletion_event(std::vector<char> const & reference_sequence, long ref_offset, int32_t pos, long count);
+
+IndelEvent
+make_insertion_event(int32_t pos, std::vector<char> && event_sequence);
+
+} // namespace gyper
+
+
+namespace std
+{
+
+template <>
+struct hash<gyper::IndelEvent>
+{
+  size_t
+  operator()(gyper::IndelEvent const & event)
+  {
+    size_t h1 = std::hash<uint32_t>()(event.pos);
+    size_t h2 = std::hash<char>()(event.type);
+    size_t h3 = 42 ^ event.sequence.size();
+
+    for (auto const & seq : event.sequence)
+      h3 ^= std::hash<char>()(seq);
+
+    return h1 ^ (h2 << 1) ^ (h3 + 0x9e3779b9);
+  }
+
+
+};
+
+} // namespace std
+
+
+namespace gyper
+{
+
+//using Tevents = phmap::node_hash_map<Event, uint32_t>;
+using Tindel_events = std::map<IndelEvent, EventInfo>; // maps events to count
+
+} // namespace gyper
diff --git a/include/graphtyper/typer/event_call.hpp b/include/graphtyper/typer/event_call.hpp
new file mode 100644
index 0000000..e89aba7
--- /dev/null
+++ b/include/graphtyper/typer/event_call.hpp
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include <graphtyper/graph/read_strand.hpp>
+#include <graphtyper/typer/event.hpp>
+
+
+namespace gyper
+{
+
+class EventCall
+{
+public:
+  IndelEvent event; // includes pos, type and sequence
+
+  uint32_t count{0};
+  uint32_t anti_count{0};
+  uint32_t multi_count{0};
+
+  uint16_t span{0};
+};
+
+//class EventCalls
+//{
+//public:
+//
+//};
+
+} // namespace gyper
diff --git a/include/graphtyper/typer/genotype_paths.hpp b/include/graphtyper/typer/genotype_paths.hpp
index 943ea94..89c9ac2 100644
--- a/include/graphtyper/typer/genotype_paths.hpp
+++ b/include/graphtyper/typer/genotype_paths.hpp
@@ -22,7 +22,6 @@ struct GenotypePathsDetails
 {
   std::string query_name;
   std::string read_group;
-  uint32_t score_diff = 0u;
 };
 
 
@@ -32,12 +31,14 @@ class GenotypePaths
   std::vector<char> read2;
   std::vector<char> qual2;
   std::vector<Path> paths;
-  uint16_t read_length = 0;
-  uint16_t flags = 0;
-  uint32_t longest_path_length = 0;
-  uint32_t original_pos = 0; // 0-based position from global alignment
-  uint8_t mapq = 255;
-  int32_t ml_insert_size = INSERT_SIZE_WHEN_NOT_PROPER_PAIR;
+  uint16_t read_length{0};
+  uint16_t flags{0};
+  uint32_t longest_path_length{0};
+  uint32_t original_pos{0}; // 0-based position from global alignment
+  uint8_t score_diff{0}; // AS-XS from bwa
+//  uint8_t original_clipped_bp{0};
+  uint8_t mapq{255};
+  int32_t ml_insert_size{INSERT_SIZE_WHEN_NOT_PROPER_PAIR};
 
 #ifndef NDEBUG
   std::unique_ptr<GenotypePathsDetails> details; // Only used when statistics are kept
@@ -60,14 +61,12 @@ class GenotypePaths
   void add_next_kmer_labels(std::vector<KmerLabel> const & ll,
                             uint32_t start_index,
                             uint32_t read_end_index,
-                            int mismatches = 0
-                            );
+                            int mismatches = 0);
 
   void add_prev_kmer_labels(std::vector<KmerLabel> const & ll,
                             uint32_t const read_start_index,
                             uint32_t const read_end_index,
-                            int const mismatches = 0
-                            );
+                            int const mismatches = 0);
 
   void clear_paths();
 
diff --git a/include/graphtyper/typer/logistic_constants.hpp b/include/graphtyper/typer/logistic_constants.hpp
index e314c7d..092634e 100644
--- a/include/graphtyper/typer/logistic_constants.hpp
+++ b/include/graphtyper/typer/logistic_constants.hpp
@@ -6,6 +6,7 @@
 namespace
 {
 
+// logf
 double constexpr LOGF_INTERCEPT{-29.28908};
 double constexpr LOGF_ABHom{23.12909};
 double constexpr LOGF_CRBySeqDepth{-10.22658};
@@ -46,7 +47,7 @@ namespace gyper
 {
 
 inline double
-get_logf(double ab_hom,
+get_logf(double abhom,
          double cr_by_seqdepth,
          double mq,
          double pass_ratio,
@@ -63,11 +64,76 @@ get_logf(double ab_hom,
   //BOOST_LOG_TRIVIAL(info) << __HERE__ << " " << ab_hom << " " << cr_by_seqdepth << " " << mq << " " << pass_ratio
   //                        << " " << gt_yield << " " << qd << " " << ab_het_bin << " " << sbalt_bin;
 
-  double const pwr = LOGF_INTERCEPT + ab_hom * LOGF_ABHom + cr_by_seqdepth * LOGF_CRBySeqDepth +
+  double const pwr = LOGF_INTERCEPT + abhom * LOGF_ABHom + cr_by_seqdepth * LOGF_CRBySeqDepth +
                      mq * LOGF_MQ + pass_ratio * LOGF_PASS_ratio + gt_yield * LOGF_GTYield + qd * LOGF_QD +
                      LOGF_ABHet[ab_het_bin] + LOGF_SBAlt[sbalt_bin];
   //BOOST_LOG_TRIVIAL(info) << __HERE__ << " log model pwr = " << pwr;
-  return 1.0 / (1.0 + std::exp(-pwr));
+  double const _exp = std::max(0.0, std::exp(-pwr));
+  return 1.0 / (1.0 + _exp);
+}
+
+
+// aa score
+double constexpr AA_INTERCEPT{-6.347426707};
+double constexpr AA_SB{-0.252334000};
+double constexpr AA_MM{-0.013766577 * 3.0};
+double constexpr AA_SD{0.014572295};
+double constexpr AA_QD{0.065221319};
+double constexpr AA_CR{-0.006449446 * 3.0};
+double constexpr AA_MQ{0.055973424};
+
+std::array<double, 5l> constexpr AA_ABHom{{0.0,
+  1.304140117,
+  1.681221065,
+  2.214801195,
+  3.930106559}};
+
+inline double
+get_aa_score(double abhom, double sb, double mm, long sd, double qd, double cr, long mq)
+{
+  long abhom_bin{0};
+
+  if (abhom <= 0.85)
+    abhom_bin = 0;
+  else if (abhom <= 0.94)
+    abhom_bin = 1;
+  else if (abhom <= 0.98)
+    abhom_bin = 2;
+  else if (abhom <= 0.99)
+    abhom_bin = 3;
+  else
+    abhom_bin = 4;
+
+  if (mq > 60)
+    mq = 60;
+
+  double const pwr = AA_INTERCEPT +
+                     AA_ABHom[abhom_bin] +
+                     sb * AA_SB +
+                     mm * AA_MM +
+                     sd * AA_SD +
+                     qd * AA_QD +
+                     cr * AA_CR +
+                     mq * AA_MQ;
+
+  double const _exp = std::max(0.0, std::exp(-pwr));
+
+  //if (sb > 0.8)
+  //{
+  //  BOOST_LOG_TRIVIAL(info) << __HERE__
+  //                          << " abhom_bin=" << abhom_bin
+  //                          << " abhom=" << AA_ABHom[abhom_bin]
+  //                          << " sb=" << sb
+  //                          << " mm=" << mm
+  //                          << " sd=" << sd
+  //                          << " qd=" << qd
+  //                          << " cr=" << cr
+  //                          << " mq=" << mq
+  //                          << " pwr=" << pwr
+  //                          << " val=" << (1.0 / (1.0 + _exp));
+  //}
+
+  return 1.0 / (1.0 + _exp);
 }
 
 
diff --git a/include/graphtyper/typer/read.hpp b/include/graphtyper/typer/read.hpp
new file mode 100644
index 0000000..9528d2b
--- /dev/null
+++ b/include/graphtyper/typer/read.hpp
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include <parallel_hashmap/phmap_fwd_decl.h>
+
+#include <graphtyper/typer/event.hpp>
+
+
+namespace gyper
+{
+
+int32_t static constexpr READ_ANTI_SUPPORT{-1};
+int32_t static constexpr READ_MULTI_SUPPORT{-2};
+
+struct ReadIndelEvent
+{
+public:
+  int32_t read_pos{0};
+  Tindel_events::iterator event_it;
+
+  ReadIndelEvent(long _read_pos, Tindel_events::iterator _event_it)
+    : read_pos(_read_pos)
+    , event_it(_event_it)
+  {}
+
+};
+
+
+struct ReadSnpEvent
+{
+public:
+  int32_t read_pos{0};
+  int32_t ref_pos{0};
+
+  ReadSnpEvent(int32_t _read_pos, int32_t ref_pos)
+    : read_pos(_read_pos)
+    , ref_pos(ref_pos)
+  {}
+};
+
+
+class Alignment
+{
+public:
+  int32_t pos{-1};
+  int32_t pos_end{-1};
+  int32_t score{std::numeric_limits<int32_t>::min()}; // Alignment score
+  int16_t num_clipped_begin{0}; // Number of clipped bases at the begin of the read
+  int16_t num_clipped_end{0}; // Number of clipped bases at the end of the read
+  int16_t num_ins_begin{0}; // Number of bases in insertion at the begin of the read
+  //int16_t num_ins_end{0}; // Number of bases in insertion at the end of the read
+  //std::vector<ReadSnpEvent> snp_events; // read pos containing snp events
+  //std::vector<std::map<SnpEvent, SnpEventInfo>::iterator> snp_events;
+  std::vector<ReadIndelEvent> indel_events; // pair of read_offset and event
+
+  bool has_indel_event(Tindel_events::iterator) const;
+  bool is_clipped() const;
+  void add_indel_event(int32_t pos, Tindel_events::iterator indel_it);
+  void replace_indel_events(std::vector<ReadIndelEvent> && new_indel_events);
+};
+
+
+class Read
+{
+public:
+  std::string name;
+  int32_t mate_pos{-1};
+  uint16_t flags{0};
+  uint8_t mapq{255};
+
+  std::vector<char> sequence{};
+  std::vector<char> qual{};
+  Alignment alignment{};
+
+  std::string to_string() const;
+};
+
+} // namespace gyper
diff --git a/include/graphtyper/typer/sample_call.hpp b/include/graphtyper/typer/sample_call.hpp
index 42ad3da..c43a774 100644
--- a/include/graphtyper/typer/sample_call.hpp
+++ b/include/graphtyper/typer/sample_call.hpp
@@ -28,17 +28,19 @@ class SampleCall
   // If R is the number of alleles, then phred should be of size R * (R + 1) / 2 and coverage of size R.
   std::vector<uint8_t> phred; // GT and PL
   std::vector<uint16_t> coverage; // AD and DP
-  uint16_t ref_total_depth = 0u; // Total reads that support the reference allele
-  uint16_t alt_total_depth = 0u; // Total reads that support alternative allele(s)
-  uint8_t ambiguous_depth = 0u; //
-  uint8_t alt_proper_pair_depth = 0u; // Total reads in proper pairs that support the alternative allele(s)
+  uint16_t ref_total_depth{0u}; // Total reads that support the reference allele
+  uint16_t alt_total_depth{0u}; // Total reads that support alternative allele(s)
+  uint8_t ambiguous_depth{0u}; //
+  uint8_t alt_proper_pair_depth{0u}; // Total reads in proper pairs that support the alternative allele(s)
 
-  mutable int8_t filter = -1; // -1 is unknown, 0 is PASS, 1 is GQ filter
+  mutable int8_t filter{-1}; // -1 is unknown, 0 is PASS, 1 is GQ filter
 
   uint32_t get_depth() const;
   uint32_t get_unique_depth() const;
+  uint32_t get_alt_depth() const;
   std::pair<uint16_t, uint16_t> get_gt_call() const;
   uint8_t get_gq() const;
+  uint8_t get_lowest_phred_not_with(uint16_t allele) const;
   int8_t check_filter(long gq) const;
 
 private:
diff --git a/include/graphtyper/typer/var_stats.hpp b/include/graphtyper/typer/var_stats.hpp
index 06ba311..feec245 100644
--- a/include/graphtyper/typer/var_stats.hpp
+++ b/include/graphtyper/typer/var_stats.hpp
@@ -5,46 +5,74 @@
 #include <string> // std::string
 #include <vector> // std::vector
 
+#include <boost/serialization/access.hpp>
+
+
 #include <graphtyper/graph/read_strand.hpp>
 
 namespace gyper
 {
 
+struct VarStatsPerAllele
+{
+  uint64_t clipped_bp{0u};
+  uint64_t mapq_squared{0u};
+  uint32_t score_diff{0u};
+  uint32_t mismatches{0u};
+
+  template <class Archive>
+  void
+  serialize(Archive & ar, unsigned int /*version*/)
+  {
+    ar & clipped_bp;
+    ar & mapq_squared;
+    ar & score_diff;
+    ar & mismatches;
+  }
+
+
+};
+
+
 class VarStats
 {
+  friend class boost::serialization::access;
+
 public:
-  /** Clipped reads */
-  uint32_t clipped_reads = 0u;
+  /** Stats per allele (excluding ReadStrand) */
+  std::vector<VarStatsPerAllele> per_allele{};
 
   /** Strand bias per allele */
   std::vector<ReadStrand> read_strand{};
 
+  /** Clipped reads */
+  uint32_t clipped_reads{0u};
+
   /** MapQ statistics */
-  uint64_t mapq_squared = 0u;
+  uint64_t mapq_squared{0u};
 
   /**
    * CONSTRUCTORS
    */
-  VarStats(uint16_t allele_count) noexcept;
-
-  /**
-   * MODIFIERS
-   */
-  void add_mapq(uint8_t const new_mapq);
+  VarStats(std::size_t allele_count) noexcept;
 
   /**
    * CLASS INFORMATION
    */
-  std::string get_forward_strand_bias() const;
-  std::string get_reverse_strand_bias() const;
 
-  // Read pair specfici biases
-  std::string get_r1_forward_strand_bias() const;
-  std::string get_r2_forward_strand_bias() const;
-  std::string get_r1_reverse_strand_bias() const;
-  std::string get_r2_reverse_strand_bias() const;
+  void write_stats(std::map<std::string, std::string> & infos) const;
+  void write_per_allele_stats(std::map<std::string, std::string> & infos) const;
+  void write_read_strand_stats(std::map<std::string, std::string> & infos) const;
 
+  /**
+   * CLASS MODIFIERS
+   */
+  void add_stats(VarStats const & stats);
+  //void read_stats(std::map<std::string, std::string> const & infos);
 
+private:
+  template <class Archive>
+  void serialize(Archive & ar, unsigned int version);
 };
 
 template <class T>
diff --git a/include/graphtyper/typer/variant.hpp b/include/graphtyper/typer/variant.hpp
index a7b9a95..dfa825e 100644
--- a/include/graphtyper/typer/variant.hpp
+++ b/include/graphtyper/typer/variant.hpp
@@ -9,6 +9,7 @@
 #include <graphtyper/graph/genotype.hpp> // gyper::Genotype
 #include <graphtyper/graph/haplotype.hpp> // gyper::Haplotype
 #include <graphtyper/typer/sample_call.hpp> // gyper::SampleCall
+#include <graphtyper/typer/var_stats.hpp>
 #include <graphtyper/utilities/options.hpp>
 
 namespace gyper
@@ -24,9 +25,11 @@ class Variant
   uint32_t abs_pos;
   std::vector<std::vector<char> > seqs;
   std::vector<SampleCall> calls;
+  VarStats stats;
   std::map<std::string, std::string> infos;
   std::string suffix_id;
   bool is_info_generated{false};
+  char type{'.'};
 
   Variant() noexcept;
 
@@ -67,7 +70,7 @@ class Variant
   std::vector<uint64_t> get_seq_depth_of_all_alleles() const;
   uint64_t get_qual() const; // Gets quality of a record
   double get_qual_by_depth() const; // Gets total quality by depth (QD) of a record
-
+  std::vector<double> get_qual_by_depth_per_alt_allele() const;
 
   /*********************
    * OPERATOR OVERLOAD *
diff --git a/include/graphtyper/typer/variant_call.hpp b/include/graphtyper/typer/variant_call.hpp
new file mode 100644
index 0000000..7ba8833
--- /dev/null
+++ b/include/graphtyper/typer/variant_call.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <iostream>
+
+#include <graphtyper/constants.hpp>
+
+
+class VariantCall
+{
+public:
+  uint32_t pos{0};
+  char type{'\0'};
+
+};
diff --git a/include/graphtyper/typer/vcf_operations.hpp b/include/graphtyper/typer/vcf_operations.hpp
index 1843947..524ea19 100644
--- a/include/graphtyper/typer/vcf_operations.hpp
+++ b/include/graphtyper/typer/vcf_operations.hpp
@@ -13,6 +13,7 @@ void vcf_concatenate(std::vector<std::string> const & vcfs,
                      std::string const & output,
                      bool const SKIP_SORT,
                      bool const SITES_ONLY,
+                     bool const WRITE_TBI,
                      std::string const & region);
 
 void vcf_break_down(std::string const & vcf, std::string const & output, std::string const & region);
diff --git a/include/graphtyper/utilities/genotype_sv.hpp b/include/graphtyper/utilities/genotype_sv.hpp
index 8dfa9d5..e01a5aa 100644
--- a/include/graphtyper/utilities/genotype_sv.hpp
+++ b/include/graphtyper/utilities/genotype_sv.hpp
@@ -13,14 +13,15 @@ void
 genotype_sv(std::string const & interval_fn,
             std::string const & ref_path,
             std::vector<std::string> const & sams,
+            std::vector<double> const & avg_cov_by_readlen,
             gyper::GenomicRegion const & genomic_region,
-            std::string const & output_path,
-            std::vector<double> const & avg_cov_by_readlen);
+            std::string const & output_path);
 
 void
 genotype_sv_regions(std::string ref_path,
                     std::string const & sv_vcf,
                     std::vector<std::string> const & sams,
+                    std::vector<double> const & avg_cov_by_readlen,
                     std::vector<GenomicRegion> const & regions,
                     std::string const & output_path,
                     bool const is_copy_reference);
diff --git a/include/graphtyper/utilities/hts_parallel_reader.hpp b/include/graphtyper/utilities/hts_parallel_reader.hpp
index 2a69dcd..1aa753b 100644
--- a/include/graphtyper/utilities/hts_parallel_reader.hpp
+++ b/include/graphtyper/utilities/hts_parallel_reader.hpp
@@ -25,8 +25,8 @@ class HtsParallelReader
   std::vector<HtsReader> hts_files;
   std::vector<HtsRecord> heap;
   HtsStore store;
-  std::vector<std::string> samples;
-  long num_rg = 0; // Number of read groups
+  std::vector<std::string> samples; // List of sample names
+  long num_rg{0}; // Number of read groups
 
 public:
   HtsParallelReader() = default;
@@ -68,6 +68,7 @@ class HtsParallelReader
 void
 parallel_reader_genotype_only(std::string * out_path,
                               std::vector<std::string> const * hts_paths_ptr,
+                              std::vector<double> const * avg_cov_ptr,
                               std::string const * output_dir_ptr,
                               std::string const * reference_fn_ptr,
                               std::string const * region_ptr,
diff --git a/include/graphtyper/utilities/hts_reader.hpp b/include/graphtyper/utilities/hts_reader.hpp
index 67aefed..58a673c 100644
--- a/include/graphtyper/utilities/hts_reader.hpp
+++ b/include/graphtyper/utilities/hts_reader.hpp
@@ -22,8 +22,8 @@ class HtsReader
   hts_idx_t * hts_index{nullptr};
   hts_itr_t * hts_iter{nullptr};
   std::vector<std::string> samples;
-  int sample_index_offset = 0;
-  int rg_index_offset = 0;
+  int sample_index_offset{0};
+  int rg_index_offset{0};
 
 private:
   int ret{0}; // return value of the most recently read record
@@ -34,19 +34,23 @@ class HtsReader
   std::unordered_map<std::string, long> rg2index; // associates read groups with indices of that rg
   std::vector<int> rg2sample_i; // uses the rg index to determine the sample index
 
+  void set_reference(std::string const & reference_path);
+
 
 public:
   HtsReader(HtsStore & _store);
 
-  void open(std::string const & path, std::string const & region);
+  void open(std::string const & path, std::string const & region, std::string const & reference);
   void close();
-  int set_reference(std::string const & reference_path);
   void set_sample_index_offset(int new_sample_index_offset);
   void set_rg_index_offset(int new_rg_index_offset);
 
-  bam1_t * get_next_read(bam1_t * old_record);
+  // Get the read when reading a HTS file in order. It is probably never a good idea to call both get_next_read_in_order
+  //  and get_next_read().
+  bam1_t * get_next_read_in_order(bam1_t * old_record);
+  bam1_t * get_next_read_in_order();
   bam1_t * get_next_read();
-  bam1_t * get_next_pos();
+  bam1_t * get_next_read(bam1_t * old_record);
 
   void get_sample_and_rg_index(long & sample_i, long & rg_i, bam1_t * rec) const;
   long get_num_rg() const;
diff --git a/include/graphtyper/utilities/options.hpp b/include/graphtyper/utilities/options.hpp
index ed1437a..567793d 100644
--- a/include/graphtyper/utilities/options.hpp
+++ b/include/graphtyper/utilities/options.hpp
@@ -80,14 +80,16 @@ class Options
    * CALLING OPTIONS *
    *******************/
   bool hq_reads{false};
+  bool is_csi{false};
   int sam_flag_filter{3840};
-  long max_files_open{1000}; // Maximum amount of SAM/BAM/CRAM files can be opened at the same time
+  long max_files_open{512}; // Maximum amount of SAM/BAM/CRAM files can be opened at the same time
   long soft_cap_of_variants_in_100_bp_window{22};
   bool get_sample_names_from_filename{false};
   bool output_all_variants{false};
   bool is_one_genotype_per_haplotype{false};
   std::string variant_suffix_id{};
   std::string primer_bedpe{};
+  bool is_extra_call_only_iteration{false};
 
   // 7 and 0.26 for >= 1000 samples
   long genotype_aln_min_support{4};
diff --git a/paw b/paw
index 2de577d..7b86062 160000
--- a/paw
+++ b/paw
@@ -1 +1 @@
-Subproject commit 2de577dc561d22af642aa3958ab0b8ebcaa9c018
+Subproject commit 7b860629eeee0cadd4fb17715503be9273ba8175
diff --git a/seqan b/seqan
index 515b276..496ed88 160000
--- a/seqan
+++ b/seqan
@@ -1 +1 @@
-Subproject commit 515b2769ed6c6538a8b5512e16008ed71b5a0126
+Subproject commit 496ed88cc8af0c9426feb69f480ecab432fed1e9
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4399e29..bc1fccb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -22,10 +22,13 @@ set(graphtyper_SOURCE_FILES
   index/kmer_label.cpp
   index/ph_index.cpp
   typer/alignment.cpp
+  typer/bucket.cpp
   typer/caller.cpp
+  typer/event.cpp
   typer/genotype_paths.cpp
   typer/path.cpp
   typer/primers.cpp
+  typer/read.cpp
   typer/sample_call.cpp
   typer/segment.cpp
   typer/var_stats.cpp
diff --git a/src/graph/constructor.cpp b/src/graph/constructor.cpp
index bc071a0..46242e0 100644
--- a/src/graph/constructor.cpp
+++ b/src/graph/constructor.cpp
@@ -15,6 +15,7 @@
 #include <graphtyper/typer/variant.hpp>
 #include <graphtyper/utilities/options.hpp>
 #include <graphtyper/utilities/gzstream.hpp>
+#include <graphtyper/utilities/system.hpp>
 
 #include <seqan/basic.h>
 #include <seqan/sequence.h>
@@ -178,11 +179,11 @@ append_sv_tag_to_node(std::vector<char> & alt)
 
 void
 open_tabix(seqan::Tabix & tabix_file,
-           std::string const & tabix_filename,
-           gyper::GenomicRegion const & genomic_region
-           )
+           std::string const & vcf_filename,
+           gyper::GenomicRegion const & genomic_region)
 {
-  seqan::open(tabix_file, tabix_filename.c_str());
+  std::string const ext = gyper::is_file(vcf_filename + ".csi") ? ".csi" : ".tbi";
+  seqan::open(tabix_file, vcf_filename.c_str(), "r", ext.c_str());
   seqan::String<char> header;
   seqan::getHeader(header, tabix_file);
 
@@ -215,6 +216,8 @@ open_reference_genome(seqan::FaiIndex & fasta_index, std::string const & fasta_f
       ss >> new_contig.length;
       gyper::graph.contigs.push_back(std::move(new_contig));
     }
+
+    f.close();
   }
 
   // Check if the total length of contigs is too large
@@ -239,7 +242,8 @@ open_reference_genome(seqan::FaiIndex & fasta_index, std::string const & fasta_f
       BOOST_LOG_TRIVIAL(error) << "[" << __HERE__ << "] FASTA index could not be loaded or built.";
       std::exit(31);
     }
-    else if (!seqan::save(fasta_index))
+
+    if (!seqan::save(fasta_index))
     {
       BOOST_LOG_TRIVIAL(error) << "[" << __HERE__ << "] FASTA index could not be saved to disk.";
       std::exit(32);
@@ -271,8 +275,7 @@ read_reference_seq(std::vector<char> & reference_sequence,
                    seqan::FaiIndex const & fasta_index,
                    unsigned const chrom_idx,
                    uint32_t const begin,
-                   uint32_t const length
-                   )
+                   uint32_t const length)
 {
   seqan::Dna5String ref_seq;
 
@@ -296,8 +299,7 @@ read_reference_genome(std::vector<char> & reference_sequence,
                      fasta_index,
                      chrom_idx,
                      genomic_region.begin,
-                     genomic_region.end - genomic_region.begin
-                     );
+                     genomic_region.end - genomic_region.begin);
 }
 
 
@@ -331,14 +333,36 @@ read_reference_genome_ends(seqan::FaiIndex const & fasta_index,
 namespace gyper
 {
 
+void
+open_and_read_reference_genome(std::vector<char> & reference_sequence,
+                               std::string const & reference_fn,
+                               GenomicRegion const & genomic_region)
+{
+  // Load the reference genome
+  seqan::FaiIndex fai_index;
+
+  if (!seqan::open(fai_index, reference_fn.c_str()))
+  {
+    BOOST_LOG_TRIVIAL(error) << "[" << __HERE__ << "] FASTA index could not be loaded for "
+                             << reference_fn;
+    std::exit(31);
+  }
+
+  // Read the reference sequence
+  read_reference_genome(reference_sequence, fai_index, genomic_region);
+
+  // Close the file
+  seqan::clear(fai_index);
+}
+
+
 void
 add_sv_breakend(SV & sv,
                 VarRecord & var,
                 seqan::VcfRecord const & vcf_record,
                 seqan::FaiIndex const & fasta_index,
                 unsigned const chrom_idx,
-                uint32_t const EXTRA_SEQUENCE_LENGTH
-                )
+                uint32_t const EXTRA_SEQUENCE_LENGTH)
 {
   // Read the first matching reference base
   read_reference_seq(var.ref, fasta_index, chrom_idx, var.pos, 1);
@@ -358,7 +382,16 @@ add_sv_breakend(SV & sv,
   auto parse_chromosome_name = [&](std::vector<char> const & seq, char const c) -> std::string
                                {
                                  auto find_it = std::find(seq.begin(), seq.end(), c);
-                                 auto find_colon_it = std::find(find_it + 1, seq.end(), ':');
+                                 auto find_last_colon_it = std::find(seq.rbegin(), seq.rend(), ':');
+
+                                 if (find_last_colon_it == seq.rend())
+                                   invalid_bnd_alt();
+
+                                 long const colon_pos = seq.size() -
+                                                        std::distance(seq.rbegin(), find_last_colon_it) - 1;
+                                 assert(colon_pos < static_cast<long>(seq.size()));
+                                 assert(colon_pos >= 0);
+                                 auto find_colon_it = seq.begin() + colon_pos;
 
                                  if (find_colon_it == seq.end())
                                    invalid_bnd_alt();
@@ -368,13 +401,23 @@ add_sv_breakend(SV & sv,
 
   auto parse_position = [&](std::vector<char> const & seq, char const c) -> long
                         {
-                          auto find_colon_it = std::find(seq.begin() + 1, seq.end(), ':');
+                          auto find_last_colon_it = std::find(seq.rbegin(), seq.rend(), ':');
+
+                          if (find_last_colon_it == seq.rend())
+                            invalid_bnd_alt();
+
+                          long const colon_pos = seq.size() -
+                                                 std::distance(seq.rbegin(), find_last_colon_it) - 1;
+                          assert(colon_pos < static_cast<long>(seq.size()));
+                          assert(colon_pos >= 0);
+                          auto find_colon_it = seq.begin() + colon_pos;
+
                           auto find_end_it = std::find(find_colon_it + 1, seq.end(), c);
 
                           if (find_end_it == seq.end())
                             invalid_bnd_alt();
 
-                          long pos = 0;
+                          long pos{0};
                           std::istringstream ss {
                             std::string(find_colon_it + 1, find_end_it)
                           };
@@ -419,9 +462,9 @@ add_sv_breakend(SV & sv,
       if (find2_it == alt.end())
         invalid_bnd_alt();
 
-      auto const len = EXTRA_SEQUENCE_LENGTH - std::distance(find2_it, alt.end()) - 1;
+      auto const len = EXTRA_SEQUENCE_LENGTH - std::distance(find2_it, alt.end());
       std::vector<char> seq;
-      read_reference_seq(seq, fasta_index, chrom_idx_2, pos, len);
+      read_reference_seq(seq, fasta_index, chrom_idx_2, pos - 1, len);
 
       // Complement the sequence
       std::transform(seq.begin(), seq.end(), seq.begin(), complement);
@@ -453,7 +496,7 @@ add_sv_breakend(SV & sv,
         invalid_bnd_alt();
 
       auto const len = EXTRA_SEQUENCE_LENGTH - std::distance(find2_it, alt.end()) - 1;
-      read_reference_seq(bnd, fasta_index, chrom_idx_2, pos - len - 1, len);
+      read_reference_seq(bnd, fasta_index, chrom_idx_2, pos - len, len);
       std::copy(find2_it + 1, alt.end(), std::back_inserter(bnd));
     }
     else
@@ -465,7 +508,7 @@ add_sv_breakend(SV & sv,
       std::copy(alt.begin() + 1, find_it, std::back_inserter(bnd)); // Extra insertion
       auto const len = EXTRA_SEQUENCE_LENGTH - bnd.size() + 1;
       std::vector<char> seq;
-      read_reference_seq(seq, fasta_index, chrom_idx_2, pos - len - 1, len);
+      read_reference_seq(seq, fasta_index, chrom_idx_2, pos - len, len);
 
       // Copy the reverse complement
       std::transform(seq.begin(), seq.end(), seq.begin(), complement);
@@ -487,8 +530,7 @@ add_sv_deletion(SV & sv,
                 VarRecord & var,
                 seqan::FaiIndex const & fasta_index,
                 unsigned const chrom_idx,
-                uint32_t const EXTRA_SEQUENCE_LENGTH
-                )
+                uint32_t const EXTRA_SEQUENCE_LENGTH)
 {
   // Read the first matching reference base
   read_reference_seq(var.ref, fasta_index, chrom_idx, var.pos, 1);
@@ -529,8 +571,7 @@ add_sv_insertion(SV & sv,
                  seqan::VcfRecord const & vcf_record,
                  seqan::FaiIndex const & fasta_index,
                  unsigned const chrom_idx,
-                 uint32_t const EXTRA_SEQUENCE_LENGTH
-                 )
+                 uint32_t const EXTRA_SEQUENCE_LENGTH)
 {
   assert(seqan::length(vcf_record.ref) >= 1);
 
@@ -557,8 +598,7 @@ add_sv_insertion(SV & sv,
     {
       std::copy(sv.seq.begin(),
                 sv.seq.begin() + EXTRA_SEQUENCE_LENGTH,
-                std::back_inserter(alt1)
-                );
+                std::back_inserter(alt1));
 
       append_sv_tag_to_node(alt1);
       sv.related_sv = static_cast<int>(graph.SVs.size()) + 1; // Next SV is related
@@ -568,8 +608,7 @@ add_sv_insertion(SV & sv,
 
       std::copy(sv.seq.end() - EXTRA_SEQUENCE_LENGTH,
                 sv.seq.end(),
-                std::back_inserter(alt2)
-                );
+                std::back_inserter(alt2));
 
       sv.related_sv = static_cast<int>(graph.SVs.size()) - 1;  // Previous SV is related
       sv.model = "BREAKPOINT2";
@@ -1398,13 +1437,13 @@ add_var_record(std::vector<VarRecord> & var_records,
           sv.length = (int)sv.ins_seq.size();
       }
 
-      // If length is less than 50 then it is not an SV
-      if (sv.length < 50 && ((int)sv.ins_seq_left.size() + (int)sv.ins_seq_right.size()) < 20)
-      {
-        BOOST_LOG_TRIVIAL(warning) << "[" << __HERE__ << "] Ignored SV at " << var.pos
-                                   << " because it was less than 50 bp.";
-        return;
-      }
+      //// If length is less than 50 then it is not an SV
+      //if (sv.length < 50 && ((int)sv.ins_seq_left.size() + (int)sv.ins_seq_right.size()) < 20)
+      //{
+      //  BOOST_LOG_TRIVIAL(warning) << "[" << __HERE__ << "] Ignored SV at " << var.pos
+      //                             << " because it was less than 50 bp.";
+      //  return;
+      //}
     }
 
     // Make sure sv.size is set
@@ -1744,8 +1783,7 @@ construct_graph(std::string const & reference_filename,
     for (auto & var_record : var_records)
     {
       genomic_region.add_reference_to_record_if_they_have_a_matching_prefix(var_record,
-                                                                            reference_sequence
-                                                                            );
+                                                                            reference_sequence);
     }
 
 #ifndef NDEBUG
@@ -1765,8 +1803,7 @@ construct_graph(std::string const & reference_filename,
 
   graph.add_genomic_region(std::move(reference_sequence),
                            std::move(var_records),
-                           std::move(genomic_region)
-                           );
+                           std::move(genomic_region));
 
 #ifndef NDEBUG
   if (!graph.check())
diff --git a/src/graph/graph.cpp b/src/graph/graph.cpp
index adc197d..afb5ea6 100644
--- a/src/graph/graph.cpp
+++ b/src/graph/graph.cpp
@@ -208,60 +208,7 @@ Graph::add_genomic_region(std::vector<char> && reference_sequence,
           continue;
         }
 
-        std::vector<char> suffix = var_records[i].get_common_suffix();
-
-        //if (var_records[i + 1].pos >= var_records[i].pos + var_records[i].ref.size() - suffix.size())
-        //{
-        //  long const suffix_to_remove = var_records[i].pos + var_records[i].ref.size() - var_records[i + 1].pos;
-        //
-        //  // Check if it is possible to remove enough suffix so we can safely join the two records
-        //  if (suffix_to_remove <= 0 || suffix_to_remove > static_cast<long>(suffix.size()))
-        //  {
-        //    var_records[i + 1].merge_one_path(std::move(var_records[i]));
-        //  }
-        //  else
-        //  {
-        //    // Erase from previous record so that it ends where the next record starts
-        //    assert(static_cast<long>(var_records[i].ref.size()) > suffix_to_remove);
-        //    var_records[i].ref.erase(var_records[i].ref.end() - suffix_to_remove, var_records[i].ref.end());
-        //
-        //    for (auto & alt : var_records[i].alts)
-        //    {
-        //      assert(static_cast<long>(alt.size()) > suffix_to_remove);
-        //      alt.erase(alt.end() - suffix_to_remove, alt.end());
-        //    }
-        //
-        //    assert(var_records[i + 1].pos == var_records[i].pos + var_records[i].ref.size());
-        //
-        //    // Merge the two records
-        //    long const suffix_to_add = suffix_to_remove - static_cast<long>(var_records[i + 1].ref.size());
-        //
-        //    var_records[i + 1].merge(std::move(var_records[i]));
-        //
-        //    if (suffix_to_add > 0)
-        //    {
-        //      var_records[i + 1].ref.insert(var_records[i + 1].ref.end(), suffix.end() - suffix_to_add, suffix.end());
-        //
-        //      for (auto & alt : var_records[i + 1].alts)
-        //        alt.insert(alt.end(), suffix.end() - suffix_to_add, suffix.end());
-        //    }
-        //  }
-        //}
-        //else
-        {
-          // In a few extreme scenarios we cannot merge only one path here.
-          //BOOST_LOG_TRIVIAL(fatal) << "i    : " << var_records[i].to_string();
-          //BOOST_LOG_TRIVIAL(fatal) << "i+1  : " << var_records[i + 1].to_string();
-          var_records[i + 1].merge(std::move(var_records[i]), 4); // last parameter is EXTRA_SUFFIX
-          //BOOST_LOG_TRIVIAL(fatal) << "after: " << var_records[i + 1].to_string();
-        }
-
-        if (var_records[i + 1].alts.size() >= (MAX_NUMBER_OF_HAPLOTYPES - 1))
-        {
-          BOOST_LOG_TRIVIAL(warning) << "[" << __HERE__ << "] Found a variant with too many alleles.";
-          var_records[i + 1].alts.resize(MAX_NUMBER_OF_HAPLOTYPES - 2);
-        }
-
+        var_records[i + 1].merge(std::move(var_records[i]), 4);
         var_records[i].clear(); // Clear variants that have been merged into others
         ++i;
       } // while
@@ -290,6 +237,16 @@ Graph::add_genomic_region(std::vector<char> && reference_sequence,
     }
                                    ), var_records.end());
 
+  // Remove var_records with too many alternatives
+  for (auto & var_record : var_records)
+  {
+    if (var_record.alts.size() >= (MAX_NUMBER_OF_HAPLOTYPES - 1))
+    {
+      BOOST_LOG_TRIVIAL(warning) << __HERE__ << " Found a variant with too many alleles.";
+      var_record.alts.resize(MAX_NUMBER_OF_HAPLOTYPES - 2);
+    }
+  }
+
   // Remove common suffix
   for (auto & var_record : var_records)
   {
@@ -1013,12 +970,12 @@ Graph::get_locations_of_an_actual_position(uint32_t pos, Path const & path, bool
           // Only add this node if the path has it
           auto find_it = std::find(path.var_order.cbegin(),
                                    path.var_order.cend(),
-                                   var_nodes[v].get_label().order
-                                   );
+                                   var_nodes[v].get_label().order);
 
           long const j = std::distance(path.var_order.cbegin(), find_it);
           assert(j >= 0);
           assert(i == this->get_variant_num(v));
+          assert(i < static_cast<int>(MAX_NUMBER_OF_HAPLOTYPES));
 
           if (path.is_empty() || (j < static_cast<long>(path.nums.size()) && path.nums[j].test(i)))
           {
diff --git a/src/graph/haplotype.cpp b/src/graph/haplotype.cpp
index 3517432..7f940f7 100644
--- a/src/graph/haplotype.cpp
+++ b/src/graph/haplotype.cpp
@@ -273,17 +273,28 @@ Haplotype::add_coverage(uint32_t const i, uint16_t const c)
 
 
 void
-Haplotype::clipped_reads_to_stats(bool const fully_aligned)
+Haplotype::clipped_reads_to_stats(int const clipped_bp, int const read_length)
 {
-  if (!fully_aligned)
+  if (clipped_bp == 0)
+    return;
+
+  assert(var_stats.size() == gts.size());
+  assert(var_stats.size() == coverage.size());
+  long const num_allele = var_stats.size();
+  long const scaled_clipped_bp = (clipped_bp * 1000l) / read_length;
+
+  for (long i = 0; i < num_allele; ++i)
   {
-    assert(var_stats.size() == gts.size());
-    assert(var_stats.size() == coverage.size());
+    auto & var_stat = var_stats[i];
+    auto const & cov = coverage[i];
+
+    if (cov != NO_COVERAGE)
+      ++var_stats[i].clipped_reads;
 
-    for (long i = 0; i < static_cast<long>(var_stats.size()); ++i)
+    if (cov < MULTI_REF_COVERAGE) // true when the coverage is unique to a particular allele
     {
-      if (coverage[i] != NO_COVERAGE)
-        ++var_stats[i].clipped_reads;
+      assert(cov < var_stat.per_allele.size());
+      var_stat.per_allele[cov].clipped_bp += scaled_clipped_bp;
     }
   }
 }
@@ -292,13 +303,27 @@ Haplotype::clipped_reads_to_stats(bool const fully_aligned)
 void
 Haplotype::mapq_to_stats(uint8_t const new_mapq)
 {
+  if (new_mapq == 255)
+    return; // means that MQ is unavailable
+
   assert(var_stats.size() == gts.size());
   assert(var_stats.size() == coverage.size());
+  uint64_t const mapq_squared = static_cast<uint64_t>(new_mapq) * static_cast<uint64_t>(new_mapq);
+  long const num_allele = var_stats.size();
 
-  for (long i = 0; i < static_cast<long>(var_stats.size()); ++i)
+  for (long i = 0; i < num_allele; ++i)
   {
-    if (coverage[i] != NO_COVERAGE)
-      var_stats[i].add_mapq(new_mapq);
+    auto & var_stat = var_stats[i];
+    auto const & cov = coverage[i];
+
+    if (cov != NO_COVERAGE)
+      var_stat.mapq_squared += mapq_squared;
+
+    if (cov < MULTI_REF_COVERAGE) // true when the coverage is unique to a particular allele
+    {
+      assert(cov < var_stat.per_allele.size());
+      var_stat.per_allele[cov].mapq_squared += mapq_squared;
+    }
   }
 }
 
@@ -310,13 +335,14 @@ Haplotype::strand_to_stats(uint16_t const flags)
   bool const is_first_in_pair = (flags & IS_FIRST_IN_PAIR) != 0;
   assert(var_stats.size() == gts.size());
   assert(var_stats.size() == coverage.size());
+  long const num_allele = var_stats.size();
 
-  for (long i = 0; i < static_cast<long>(var_stats.size()); ++i)
+  for (long i = 0; i < num_allele; ++i)
   {
     auto & var_stat = var_stats[i];
     auto const & cov = coverage[i];
 
-    if (coverage[i] < MULTI_REF_COVERAGE) // true when the coverage is unique to a particular allele
+    if (cov < MULTI_REF_COVERAGE) // true when the coverage is unique to a particular allele
     {
       assert(cov < var_stat.read_strand.size());
 
@@ -339,6 +365,51 @@ Haplotype::strand_to_stats(uint16_t const flags)
 }
 
 
+void
+Haplotype::mismatches_to_stats(uint8_t const mismatches, int const read_length)
+{
+  if (mismatches == 0)
+    return;
+
+  long const scaled_mismatches = (mismatches * 1000l) / read_length;
+  long const num_allele = var_stats.size();
+
+  for (long i = 0; i < num_allele; ++i)
+  {
+    auto & var_stat = var_stats[i];
+    auto const & cov = coverage[i];
+
+    if (cov < MULTI_REF_COVERAGE) // true when the coverage is unique to a particular allele
+    {
+      assert(cov < var_stat.per_allele.size());
+      var_stat.per_allele[cov].mismatches += scaled_mismatches;
+    }
+  }
+}
+
+
+void
+Haplotype::score_diff_to_stats(uint8_t const score_diff)
+{
+  if (score_diff == 0)
+    return;
+
+  long const num_allele = var_stats.size();
+
+  for (long i = 0; i < num_allele; ++i)
+  {
+    auto & var_stat = var_stats[i];
+    auto const & cov = coverage[i];
+
+    if (cov < MULTI_REF_COVERAGE) // true when the coverage is unique to a particular allele
+    {
+      assert(cov < var_stat.per_allele.size());
+      var_stat.per_allele[cov].score_diff += score_diff;
+    }
+  }
+}
+
+
 void
 Haplotype::coverage_to_gts(std::size_t const pn_index, bool const is_proper_pair)
 {
diff --git a/src/graph/sv.cpp b/src/graph/sv.cpp
index 350ea84..d3cb85a 100644
--- a/src/graph/sv.cpp
+++ b/src/graph/sv.cpp
@@ -173,6 +173,7 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
         continue; // Nothing to do, there are no SVs here
     }
 
+    /*
     auto merge_alt_info_lambda =
       [](Variant & new_var, std::string const & id, long const aa)
       {
@@ -190,6 +191,7 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
         values.resize(2);
         new_var.infos[id] = join_strand_bias(values);
       };
+    */
 
     auto make_new_sv_var =
       [&](Variant const & old_var, long const aa) -> Variant
@@ -200,15 +202,16 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
         new_var.seqs.push_back(old_var.seqs[0]);
         new_var.seqs.push_back(old_var.seqs[aa + 1]);
         new_var.infos = old_var.infos;
+        new_var.stats = old_var.stats;
 
-        merge_alt_info_lambda(new_var, "SBF", aa);
-        merge_alt_info_lambda(new_var, "SBR", aa);
-        merge_alt_info_lambda(new_var, "SBF1", aa);
-        merge_alt_info_lambda(new_var, "SBR1", aa);
-        merge_alt_info_lambda(new_var, "SBF2", aa);
-        merge_alt_info_lambda(new_var, "SBR2", aa);
-        //merge_alt_info_lambda(new_var, "RACount", aa);
-        //merge_alt_info_lambda(new_var, "RADist", aa);
+        // Set value at index 1 to the alternative allele stats and then shrink the vector with .resize()
+        new_var.stats.per_allele[1] = old_var.stats.per_allele[aa + 1];
+        new_var.stats.read_strand[1] = old_var.stats.read_strand[aa + 1];
+
+        new_var.stats.per_allele.resize(2);
+        new_var.stats.read_strand.resize(2);
+
+        assert(new_var.stats.per_allele.size() == new_var.seqs.size());
 
         new_var.calls.reserve(old_var.calls.size());
 
@@ -413,6 +416,8 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
         // Put first SV allele in new VCF records
         Variant new_sv_var = make_new_sv_var(var, aa);
         assert(new_sv_var.seqs.size() == 2);
+        assert(new_sv_var.stats.per_allele.size() == 2);
+        assert(new_sv_var.stats.read_strand.size() == 2);
         SV const & sv = graph.SVs[sv_ids[aa]];
 
         if (sv.type != BND)
@@ -431,8 +436,7 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
 
         // Fix genotype likelihood for duplication
         if (sv.type == DUP &&
-            (sv.model == "BREAKPOINT1" || sv.model == "BREAKPOINT2")
-            )
+            (sv.model == "BREAKPOINT1" || sv.model == "BREAKPOINT2"))
         {
           for (auto & call : new_sv_var.calls)
           {
@@ -445,8 +449,7 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
                                                    minus_10log10_one_third *
                                                    static_cast<double>(call.coverage[1]) +
                                                    minus_10log10_two_third *
-                                                   static_cast<double>(call.coverage[0])
-                                                   );
+                                                   static_cast<double>(call.coverage[0]));
 
             uint64_t gt_11 = 3ul * (call.coverage[0] + static_cast<uint64_t>(call.coverage[1]));
             uint64_t min_gt = std::min(gt_00, std::min(gt_01, gt_11));
@@ -464,12 +467,13 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
         // If the new variant is an insertion and this is the second breakpoint, make a combined
         // variant. Also do the same if it is a duplication and we have coverage turned off
         if ((sv.type == INS && related_svs_map.count(sv_ids[aa]) == 1) ||
-            (sv.type == INV && related_svs_map.count(sv_ids[aa]) == 1)
-            )
+            (sv.type == INV && related_svs_map.count(sv_ids[aa]) == 1))
         {
           assert(new_vars.size() > 0);
           Variant const & var_bp1 = new_vars[related_svs_map.at(sv_ids[aa])];
+          assert(var_bp1.seqs.size() == var_bp1.stats.per_allele.size());
           Variant combined_var = make_variant_with_combined_calls(new_sv_var, var_bp1);
+          assert(combined_var.seqs.size() == combined_var.stats.per_allele.size());
           add_sv_to_new_vars_vector(std::move(combined_var), sv, "AGGREGATED");
         }
 
@@ -479,14 +483,18 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
           if ((sv.type == DEL || sv.type == DEL_ALU))
           {
             Variant cov_var(new_sv_var);
-            assert(cov_var.seqs.size() == 2ul);
+            assert(cov_var.seqs.size() == 2);
+            assert(cov_var.stats.per_allele.size() == 2);
+            assert(cov_var.stats.read_strand.size() == 2);
 
             for (long pn_index = 0; pn_index < static_cast<long>(cov_var.calls.size()); ++pn_index)
               cov_var.calls[pn_index] = make_call_based_on_coverage(pn_index, sv, reference_depth);
 
             // Make a combined variant with both breakpoint and coverage
             Variant combined_var = make_variant_with_combined_calls(new_sv_var, cov_var);
-            assert(combined_var.seqs.size() >= 2);
+            assert(combined_var.seqs.size() == 2);
+            assert(combined_var.stats.per_allele.size() == 2);
+            assert(combined_var.stats.read_strand.size() == 2);
             add_sv_to_new_vars_vector(std::move(combined_var), sv, "AGGREGATED");
             assert(cov_var.seqs.size() >= 2);
             add_sv_to_new_vars_vector(std::move(cov_var), sv, "COVERAGE");
@@ -496,6 +504,7 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
             // Second breakpoint of a duplication
             Variant cov_var(new_sv_var);
             assert(cov_var.seqs.size() == 2ul);
+            assert(cov_var.seqs.size() == cov_var.stats.per_allele.size());
 
             for (long pn_index = 0; pn_index < static_cast<long>(cov_var.calls.size()); ++pn_index)
               cov_var.calls[pn_index] = make_call_based_on_coverage(pn_index, sv, reference_depth);
@@ -544,6 +553,7 @@ reformat_sv_vcf_records(std::vector<Variant> & variants, ReferenceDepth const &
         continue; // Not SV
       }
 
+      assert(var.seqs.size() == var.stats.per_allele.size());
       add_sv_variant(var, aa, related_svs);
     }
 
diff --git a/src/main.cpp b/src/main.cpp
index 455f9b6..3b1e88c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -680,6 +680,11 @@ subcmd_genotype(paw::Parser & parser)
                         "genotype_dis_min_support_ratio",
                         "(advanced) Minimum graph discovery support ratio for a variant so it can be added to the graph.");
 
+    parser.parse_option(opts.is_csi,
+                        'C',
+                        "csi",
+                        "(advanced) If set, graphtyper will make csi indices instead of tbi.");
+
     parser.parse_option(opts.is_only_cigar_discovery,
                         ' ',
                         "is_only_cigar_discovery",
@@ -724,6 +729,11 @@ subcmd_genotype(paw::Parser & parser)
                         "(advanced) How many alleles each batch of internal VCFs has. Increasing this number inceases "
                         "memory used, while slightly decreases compute time.");
 
+    parser.parse_option(opts.is_extra_call_only_iteration,
+                        ' ',
+                        "is_extra_call_only_iteration",
+                        "(advanced) Set to add an extra call only iteration.");
+
 #ifndef NDEBUG
     parser.parse_option(opts.stats, ' ', "stats", "(advanced) Directory for statistics files.");
 #endif // ifndef NDEBUG
@@ -777,7 +787,7 @@ subcmd_genotype_sv(paw::Parser & parser)
   gyper::Options & opts = *(gyper::Options::instance());
 
   std::string avg_cov_by_readlen_fn;
-  std::string output_dir = "sv_results";
+  std::string output_dir{"sv_results"};
   std::string opts_region{};
   std::string opts_region_file{};
   std::string ref_fn{};
@@ -787,17 +797,33 @@ subcmd_genotype_sv(paw::Parser & parser)
   bool force_copy_reference{false};
   bool force_no_copy_reference{false};
 
+  // Change the default values
+  opts.max_files_open = 128;
+
   // Parse options
+  parser.parse_option(avg_cov_by_readlen_fn,
+                      ' ',
+                      "avg_cov_by_readlen",
+                      "File with average coverage by read length (one value per line). "
+                      "The values are used for subsampling regions with extremely high coverage and should be in the same "
+                      "order as the BAM/CRAM list.");
+  parser.parse_option(force_copy_reference, ' ', "force_copy_reference",
+                      "Force copy of the reference FASTA to temporary folder.");
+  parser.parse_option(force_no_copy_reference, ' ', "force_no_copy_reference",
+                      "Force that the reference FASTA is NOT copied to temporary folder.");
+  parser.parse_option(opts.force_use_input_ref_for_cram_reading, ' ', "force_use_input_ref_for_cram_reading",
+                      "Force using the input reference FASTA file when reading CRAMs.");
+  parser.parse_option(opts.is_csi,
+                      'C',
+                      "csi",
+                      "(advanced) If set, graphtyper will make csi indices instead of tbi.");
+
   parser.parse_option(opts.max_files_open,
                       ' ',
                       "max_files_open",
                       "Select how many files can be open at the same time.");
   parser.parse_option(opts.no_cleanup, ' ', "no_cleanup",
                       "Set to skip removing temporary files. Useful for debugging.");
-  parser.parse_option(force_copy_reference, ' ', "force_copy_reference",
-                      "Force copy of the reference FASTA to temporary folder.");
-  parser.parse_option(force_no_copy_reference, ' ', "force_no_copy_reference",
-                      "Force that the reference FASTA is NOT copied to temporary folder.");
   parser.parse_option(output_dir, 'O', "output", "Output directory.");
   parser.parse_option(opts_region, 'r', "region", "Genomic region to genotype.");
   parser.parse_option(opts_region_file, 'R', "region_file", "File with genomic regions to genotype.");
@@ -807,6 +833,10 @@ subcmd_genotype_sv(paw::Parser & parser)
 #ifndef NDEBUG
   parser.parse_option(opts.stats, ' ', "stats", "Directory for statistics files.");
 #endif // NDEBUG
+  parser.parse_option(opts.no_filter_on_coverage,
+                      ' ',
+                      "no_filter_on_coverage",
+                      "(advanced) Set to disable filter on coverage.");
   //parser.parse_option(opts.vcf, ' ', "vcf", "Input VCF file with SNP/indel variant sites.");
 
   parser.parse_positional_argument(ref_fn, "REF.FA", "Reference genome in FASTA format.");
@@ -826,8 +856,9 @@ subcmd_genotype_sv(paw::Parser & parser)
   // Get the genomic regions to process from the --region and --region_file options
   std::vector<gyper::GenomicRegion> regions = get_regions(ref_fn, opts_region, opts_region_file, 1000000);
 
-  // Get the SAM/BAM/CRAM file names
+  // Get the BAM/CRAM file names
   std::vector<std::string> sams_fn = get_sams(sam, sams);
+  std::vector<double> avg_cov_by_readlen = get_avg_cov_by_readlen(avg_cov_by_readlen_fn, sams_fn.size());
 
   // If neither force copy reference or force no copy reference we determine it from number of SAMs
   bool is_copy_reference = force_copy_reference || (!force_no_copy_reference && sams_fn.size() >= 100);
@@ -835,9 +866,11 @@ subcmd_genotype_sv(paw::Parser & parser)
   gyper::genotype_sv_regions(ref_fn,
                              sv_vcf,
                              sams_fn,
+                             avg_cov_by_readlen,
                              regions,
                              output_dir,
                              is_copy_reference);
+
   return 0;
 }
 
@@ -965,18 +998,20 @@ subcmd_vcf_concatenate(paw::Parser & parser)
   std::string output_fn = "-";
   bool sites_only{false};
   std::string region;
+  bool write_tbi{false};
 
   parser.parse_option(no_sort, ' ', "no_sort", "Set to skip sorting the variants.");
   parser.parse_option(output_fn, 'o', "output", "Output VCF file name.");
   parser.parse_option(sites_only, ' ', "sites_only", "Set to write only variant site information.");
   parser.parse_option(region, 'r', "region", "Region to print variant in.");
+  parser.parse_option(write_tbi, 't', "write_tbi", "Set to write TBI index.");
 
   parser.parse_remaining_positional_arguments(vcfs, "vcfs...", "VCFs to concatenate");
 
   parser.finalize();
   setup_logger();
 
-  gyper::vcf_concatenate(vcfs, output_fn, no_sort, sites_only, region);
+  gyper::vcf_concatenate(vcfs, output_fn, no_sort, sites_only, write_tbi, region);
   return 0;
 }
 
diff --git a/src/typer/alignment.cpp b/src/typer/alignment.cpp
index 0746d0d..4837e55 100644
--- a/src/typer/alignment.cpp
+++ b/src/typer/alignment.cpp
@@ -27,8 +27,7 @@ void
 find_genotype_paths_of_one_of_the_sequences(seqan::IupacString const & read,
                                             gyper::GenotypePaths & geno,
                                             gyper::PHIndex const & ph_index,
-                                            gyper::Graph const & graph = gyper::graph
-                                            )
+                                            gyper::Graph const & graph = gyper::graph)
 {
   using namespace gyper;
 
@@ -54,7 +53,7 @@ find_genotype_paths_of_one_of_the_sequences(seqan::IupacString const & read,
   }
 
   {
-    uint32_t read_start_index = 0;
+    uint32_t read_start_index{0};
 
     {
       assert(r_hamming0.size() == r_hamming1.size());
@@ -64,14 +63,12 @@ find_genotype_paths_of_one_of_the_sequences(seqan::IupacString const & read,
         geno.add_next_kmer_labels(r_hamming0[i],
                                   read_start_index,
                                   read_start_index + (K - 1),
-                                  0 /*mismatches*/
-                                  );
+                                  0 /*mismatches*/);
 
         geno.add_next_kmer_labels(r_hamming1[i],
                                   read_start_index,
                                   read_start_index + (K - 1),
-                                  1 /*mismatches*/
-                                  );
+                                  1 /*mismatches*/);
 
         read_start_index += (K - 1);
       }
@@ -93,13 +90,18 @@ find_genotype_paths_of_one_of_the_sequences(seqan::IupacString const & read,
     geno.remove_fully_special_paths();
 
   geno.remove_non_ref_paths_when_read_matches_ref(); // Should be the last check
-
   geno.update_longest_path_size();
   geno.remove_short_paths();
 
   if (graph.is_sv_graph)
     geno.remove_support_from_read_ends();
 
+  // store read sequence
+  geno.read2 = std::vector<char>(seqan::begin(read), seqan::end(read));
+
+  // store score diff
+
+
 #ifndef NDEBUG
   /*
   if (!geno.check_no_variant_is_missing())
@@ -112,6 +114,7 @@ find_genotype_paths_of_one_of_the_sequences(seqan::IupacString const & read,
 }
 
 
+/*
 bool
 is_clipped(bam1_t const & b)
 {
@@ -141,6 +144,233 @@ is_clipped(bam1_t const & b)
 
   return false;
 }
+*/
+
+long
+clipped_count(bam1_t const & b)
+{
+  long count{0};
+
+  if (b.core.n_cigar > 0)
+  {
+    auto it = b.data + b.core.l_qname;
+
+    // Check first
+    uint32_t opAndCnt;
+    memcpy(&opAndCnt, it, sizeof(uint32_t));
+
+    if ((opAndCnt & 15) == 4)
+    {
+      uint32_t const cigar_count = opAndCnt >> 4;
+      count += cigar_count;
+      //std::cerr << "MIDNSHP=X*******"[opAndCnt & 15] << " first ";
+      return true;
+    }
+
+    // Check last
+    memcpy(&opAndCnt, it + sizeof(uint32_t) * (b.core.n_cigar - 1), sizeof(uint32_t));
+
+    if ((opAndCnt & 15) == 4)
+    {
+      //std::cerr << "MIDNSHP=X*******"[opAndCnt & 15] << " last ";
+      uint32_t const cigar_count = opAndCnt >> 4;
+      count += cigar_count;
+      return true;
+    }
+  }
+
+  return count;
+}
+
+
+uint8_t
+get_score_diff(bam1_t * rec)
+{
+  uint8_t * it = bam_get_aux(rec);
+  auto const l_aux = bam_get_l_aux(rec);
+  unsigned i{0};
+  int64_t as{-1};
+  int64_t xs{-1};
+
+  while (i < l_aux)
+  {
+    i += 3;
+    char type = *(it + i - 1);
+    bool is_as = false;
+    bool is_xs = false;
+
+    // Check for AS and XS
+    if (*(it + i - 2) == 'S')
+    {
+      if (*(it + i - 3) == 'A')
+        is_as = true;
+      else if (*(it + i - 3) == 'X')
+        is_xs = true;
+    }
+
+    switch (type)
+    {
+    case 'A':
+    {
+      // A printable character
+      ++i;
+      break;
+    }
+
+    case 'Z':
+    {
+      // A string! Let's loop it until qNULL
+      // Check for RG
+      while (*(it + i) != '\0' && *(it + i) != '\n')
+        ++i;
+
+      ++i;
+      break;
+    }
+
+    case 'c':
+    {
+      if (is_as)
+      {
+        int8_t num;
+        memcpy(&num, it + i, sizeof(int8_t));
+        as = num;
+      }
+      else if (is_xs)
+      {
+        int8_t num;
+        memcpy(&num, it + i, sizeof(int8_t));
+        xs = num;
+      }
+
+      i += sizeof(int8_t);
+      break;
+    }
+
+    case 'C':
+    {
+      if (is_as)
+      {
+        uint8_t num;
+        memcpy(&num, it + i, sizeof(uint8_t));
+        as = num;
+      }
+      else if (is_xs)
+      {
+        uint8_t num;
+        memcpy(&num, it + i, sizeof(uint8_t));
+        xs = num;
+      }
+
+      i += sizeof(uint8_t);
+      break;
+    }
+
+    case 's':
+    {
+      if (is_as)
+      {
+        int16_t num;
+        memcpy(&num, it + i, sizeof(int16_t));
+        as = num;
+      }
+      else if (is_xs)
+      {
+        int16_t num;
+        memcpy(&num, it + i, sizeof(int16_t));
+        xs = num;
+      }
+
+      i += sizeof(int16_t);
+      break;
+    }
+
+    case 'S':
+    {
+      if (is_as)
+      {
+        uint16_t num;
+        memcpy(&num, it + i, sizeof(uint16_t));
+        as = num;
+      }
+      else if (is_xs)
+      {
+        uint16_t num;
+        memcpy(&num, it + i, sizeof(uint16_t));
+        xs = num;
+      }
+
+      i += sizeof(uint16_t);
+      break;
+    }
+
+    case 'i':
+    {
+      if (is_as)
+      {
+        int32_t num;
+        memcpy(&num, it + i, sizeof(int32_t));
+        as = num;
+      }
+      else if (is_xs)
+      {
+        int32_t num;
+        memcpy(&num, it + i, sizeof(int32_t));
+        xs = num;
+      }
+
+      i += sizeof(int32_t);
+      break;
+    }
+
+    case 'I':
+    {
+      if (is_as)
+      {
+        uint32_t num;
+        memcpy(&num, it + i, sizeof(uint32_t));
+        as = num;
+      }
+      else if (is_xs)
+      {
+        uint32_t num;
+        memcpy(&num, it + i, sizeof(uint32_t));
+        xs = num;
+      }
+
+      i += sizeof(uint32_t);
+      break;
+    }
+
+    case 'f':
+    {
+      i += sizeof(float);
+      break;
+    }
+
+    default:
+    {
+      i = l_aux;       // Unkown tag, stop
+      break;
+    }
+    }
+  }
+
+
+  if (as == -1 || as < xs)
+  {
+    return 0;
+  }
+  else
+  {
+    if (xs == -1)
+      xs = 0;
+
+    // check for overflow
+    long const diff = as - xs;
+    return diff < std::numeric_limits<uint8_t>::max() ? diff : std::numeric_limits<uint8_t>::max();
+  }
+}
 
 
 } // anon namespace
@@ -164,12 +394,14 @@ align_read(bam1_t * rec,
       GenotypePaths(core.flag, core.l_qseq));
 
   // Hard restriction on read length is 63 bp (2*32 - 1)
-  if (seqan::length(seq) >= (2 * K - 1))
-  {
-    find_genotype_paths_of_one_of_the_sequences(seq, geno_paths.first, ph_index);
-    find_genotype_paths_of_one_of_the_sequences(rseq, geno_paths.second, ph_index);
-  }
-
+  if (seqan::length(seq) < (2 * K - 1))
+    return geno_paths;
+
+  find_genotype_paths_of_one_of_the_sequences(seq, geno_paths.first, ph_index);
+  find_genotype_paths_of_one_of_the_sequences(rseq, geno_paths.second, ph_index);
+  //uint8_t const score_diff = get_score_diff(rec);
+  //geno_paths.first.score_diff = score_diff;
+  //geno_paths.second.score_diff = score_diff;
   return geno_paths;
 }
 
@@ -178,55 +410,80 @@ GenotypePaths *
 update_unpaired_read_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec)
 {
   assert(rec);
-
   auto const & core = rec->core;
 
   switch (compare_pair_of_genotype_paths(geno_paths.first, geno_paths.second))
   {
   case 1:
   {
-    GenotypePaths * geno = &geno_paths.first;
-    geno->flags = core.flag & ~IS_PROPER_PAIR; // clear proper pair bit
-    geno->mapq = core.qual;
+    GenotypePaths & geno = geno_paths.first;
+    geno.flags = core.flag & ~IS_PROPER_PAIR; // clear proper pair bit
+    geno.mapq = core.qual;
 
     if (!(core.flag & IS_UNMAPPED)) // True if read was mapped
-      geno->original_pos = core.pos;
+      geno.original_pos = core.pos;
 
     if (core.qual < 25) // True if MQ is below 25
-      set_bit(geno->flags, IS_MAPQ_BAD);
+      set_bit(geno.flags, IS_MAPQ_BAD);
+
+    {
+      long const clipped_count_bp = clipped_count(*rec);
 
-    if (is_clipped(*rec))
-      set_bit(geno->flags, IS_CLIPPED);
+      if (clipped_count_bp > 3)
+        set_bit(geno.flags, IS_CLIPPED);
+    }
+
+    geno.score_diff = get_score_diff(rec);
 
 #ifndef NDEBUG
     if (Options::instance()->stats.size() > 0)
-      geno->details->query_name = std::string(reinterpret_cast<char *>(rec->data));
+    {
+      geno.details->query_name = std::string(reinterpret_cast<char *>(rec->data));
+      geno.qual2.resize(core.l_qseq);
+      auto it = bam_get_qual(rec);
+
+      for (int i = 0; i < core.l_qseq; ++it, ++i)
+        geno.qual2[i] = static_cast<char>(*it + 33);
+    }
 #endif // NDEBUG
 
-    return geno;
+    return &geno;
   }
 
   case 2:
   {
-    GenotypePaths * geno = &geno_paths.second;
-    geno->flags = (core.flag ^ IS_SEQ_REVERSED) & ~IS_PROPER_PAIR; // clear proper pair bit
-    geno->mapq = core.qual;
+    GenotypePaths & geno = geno_paths.second;
+    geno.flags = (core.flag ^ IS_SEQ_REVERSED) & ~IS_PROPER_PAIR; // clear proper pair bit
+    geno.mapq = core.qual;
 
     if (!(core.flag & IS_UNMAPPED)) // True if read was mapped
-      geno->original_pos = core.pos;
+      geno.original_pos = core.pos;
 
     if (core.qual < 25) // True if MQ is below 25
-      set_bit(geno->flags, IS_MAPQ_BAD);
+      set_bit(geno.flags, IS_MAPQ_BAD);
+
+    {
+      long const clipped_count_bp = clipped_count(*rec);
+
+      if (clipped_count_bp > 3)
+        set_bit(geno.flags, IS_CLIPPED);
+    }
 
-    if (is_clipped(*rec))
-      set_bit(geno->flags, IS_CLIPPED);
+    geno.score_diff = get_score_diff(rec);
 
 #ifndef NDEBUG
     if (Options::instance()->stats.size() > 0)
-      geno->details->query_name = std::string(reinterpret_cast<char *>(rec->data));
+    {
+      geno.details->query_name = std::string(reinterpret_cast<char *>(rec->data));
+      geno.qual2.resize(core.l_qseq);
+      auto it = bam_get_qual(rec);
+
+      for (int i = (core.l_qseq - 1); i >= 0; ++it, --i)
+        geno.qual2[i] = static_cast<char>(*it + 33);
+    }
 #endif // NDEBUG
 
-    return geno;
+    return &geno;
   }
 
   default: break;
@@ -237,11 +494,7 @@ update_unpaired_read_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
 
 
 void
-further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno,
-                                                 seqan::IupacString const & seq,
-                                                 seqan::IupacString const & rseq,
-                                                 bam1_t * rec
-                                                 )
+further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno, bam1_t * rec)
 {
   assert(rec);
   auto const & core = rec->core;
@@ -249,7 +502,6 @@ further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno,
   if ((geno.flags & IS_SEQ_REVERSED) == (core.flag & IS_SEQ_REVERSED))
   {
     // seq reversed bit has not been flipped
-    geno.read2 = std::vector<char>(seqan::begin(seq), seqan::end(seq));
     geno.qual2.resize(core.l_qseq);
     auto it = bam_get_qual(rec);
 
@@ -261,7 +513,6 @@ further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno,
   }
   else
   {
-    geno.read2 = std::vector<char>(seqan::begin(rseq), seqan::end(rseq));
     geno.qual2.resize(core.l_qseq);
     auto it = bam_get_qual(rec);
 
@@ -275,16 +526,8 @@ further_update_unpaired_read_paths_for_discovery(GenotypePaths & geno,
 }
 
 
-#ifndef NDEBUG
-void
-update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
-             seqan::IupacString const & seq,
-             seqan::IupacString const & rseq,
-             bam1_t * rec)
-#else
 void
 update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec)
-#endif
 {
   assert(rec);
   auto const & core = rec->core;
@@ -305,10 +548,20 @@ update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec)
   if (core.qual < 25)
     set_bit(geno1.flags, IS_MAPQ_BAD);
 
-  if (is_clipped(*rec))
   {
-    set_bit(geno1.flags, IS_CLIPPED);
-    set_bit(geno2.flags, IS_CLIPPED);
+    long const clipped_count_bp = clipped_count(*rec);
+
+    if (clipped_count_bp > 3)
+    {
+      set_bit(geno1.flags, IS_CLIPPED);
+      set_bit(geno2.flags, IS_CLIPPED);
+    }
+  }
+
+  {
+    auto const score_diff = get_score_diff(rec);
+    geno1.score_diff = score_diff;
+    geno2.score_diff = score_diff;
   }
 
   geno2.flags = (core.flag ^ IS_SEQ_REVERSED) & ~IS_PROPER_PAIR;
@@ -321,14 +574,12 @@ update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec)
     geno1.details->query_name = std::string(reinterpret_cast<char *>(rec->data));
     geno2.details->query_name = geno1.details->query_name;
 
-    geno1.read2 = std::vector<char>(seqan::begin(seq), seqan::end(seq));
     geno1.qual2.resize(core.l_qseq);
     auto it = bam_get_qual(rec);
 
     for (int i = 0; i < core.l_qseq; ++it, ++i)
       geno1.qual2[i] = static_cast<char>(*it + 33);
 
-    geno2.read2 = std::vector<char>(seqan::begin(rseq), seqan::end(rseq));
     geno2.qual2 = std::vector<char>(geno1.qual2.rbegin(), geno1.qual2.rend());
   }
 #endif // NDEBUG
@@ -336,11 +587,7 @@ update_paths(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec)
 
 
 void
-further_update_paths_for_discovery(std::pair<GenotypePaths, GenotypePaths> & geno_paths,
-                                   seqan::IupacString const & seq,
-                                   seqan::IupacString const & rseq,
-                                   bam1_t * rec
-                                   )
+further_update_paths_for_discovery(std::pair<GenotypePaths, GenotypePaths> & geno_paths, bam1_t * rec)
 {
   assert(rec);
 
@@ -348,21 +595,12 @@ further_update_paths_for_discovery(std::pair<GenotypePaths, GenotypePaths> & gen
   GenotypePaths & geno1 = geno_paths.first;
   GenotypePaths & geno2 = geno_paths.second;
 
-  assert(seqan::length(seq) > 0);
-  assert(static_cast<long>(seqan::length(seq)) == core.l_qseq);
-  assert(seqan::length(seq) == seqan::length(rseq));
-  assert(seqan::length(seq) == seqan::length(rseq));
-  assert(seqan::length(seq) == geno1.read_length);
-  assert(seqan::length(seq) == geno2.read_length);
-
-  geno1.read2 = std::vector<char>(seqan::begin(seq), seqan::end(seq));
   geno1.qual2.resize(core.l_qseq);
   auto it = bam_get_qual(rec);
 
   for (int i = 0; i < core.l_qseq; ++it, ++i)
     geno1.qual2[i] = static_cast<char>(*it + 33);
 
-  geno2.read2 = std::vector<char>(seqan::begin(rseq), seqan::end(rseq));
   geno2.qual2 = std::vector<char>(geno1.qual2.rbegin(), geno1.qual2.rend());
 }
 
diff --git a/src/typer/bucket.cpp b/src/typer/bucket.cpp
new file mode 100644
index 0000000..5851536
--- /dev/null
+++ b/src/typer/bucket.cpp
@@ -0,0 +1,207 @@
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <boost/log/trivial.hpp>
+
+#include <parallel_hashmap/phmap.h>
+
+#include <graphtyper/constants.hpp>
+#include <graphtyper/typer/bucket.hpp>
+#include <graphtyper/typer/event.hpp>
+#include <graphtyper/typer/read.hpp>
+
+
+namespace gyper
+{
+
+std::string
+Bucket::to_string() const
+{
+  std::ostringstream ss;
+  ss << " indel events " << "," << indel_events.size();
+  ss << " n_reads=" << reads.size();
+
+  for (auto const & read : reads)
+    ss << "\n" << read.to_string();
+
+  return ss.str();
+}
+
+
+bool
+is_indel_in_bucket(std::vector<Bucket> const & buckets,
+                   IndelEvent const & indel_event,
+                   long const region_begin,
+                   long const BUCKET_SIZE)
+{
+  long const event_bucket_index = (indel_event.pos - region_begin) / BUCKET_SIZE;
+  assert(event_bucket_index < static_cast<long>(buckets.size()));
+  auto & indel_events = buckets[event_bucket_index].indel_events;
+  auto find_it = indel_events.find(indel_event);
+  return find_it != indel_events.end();
+}
+
+
+template <typename TBucket>
+Tindel_events::iterator
+add_indel_event_to_bucket(std::vector<TBucket> & buckets,
+                          IndelEvent && new_indel_event,
+                          long const region_begin,
+                          long const BUCKET_SIZE,
+                          std::vector<char> const & reference_sequence,
+                          long ref_offset)
+{
+  long const REF_SIZE = reference_sequence.size();
+
+  assert(new_indel_event.pos >= region_begin);
+  long const event_bucket_index = (new_indel_event.pos - region_begin) / BUCKET_SIZE;
+  assert(event_bucket_index >= 0);
+
+  if (event_bucket_index >= static_cast<long>(buckets.size()))
+    buckets.resize(event_bucket_index + 1);
+
+  assert(event_bucket_index < static_cast<long>(buckets.size()));
+
+  EventInfo new_info;
+  std::pair<Tindel_events::iterator, bool> it_pair =
+    buckets[event_bucket_index].indel_events.insert({std::move(new_indel_event), std::move(new_info)});
+
+  assert(it_pair.first != buckets[event_bucket_index].indel_events.end());
+
+  if (it_pair.second)
+  {
+    IndelEvent const & new_event = it_pair.first->first;
+    EventInfo & new_event_info = it_pair.first->second;
+
+    long span{0};
+    long const count{static_cast<long>(new_event.sequence.size())};
+
+    // Calculate span
+    if (new_event.type == 'I' || new_event.type == 'i')
+    {
+      // Check insterted sequence
+      while (span < count)
+      {
+        if ((ref_offset + span) >= REF_SIZE ||
+            new_event.sequence[span] != reference_sequence[ref_offset + span])
+        {
+          break;
+        }
+
+        ++span;
+      }
+
+      // Check further if we have matched the entire inserted sequence
+      if (span == count)
+      {
+        while ((ref_offset + span) < REF_SIZE)
+        {
+          if (reference_sequence[ref_offset + span - count] != reference_sequence[ref_offset + span])
+            break;
+
+          ++span;
+        }
+      }
+
+      // Check for overflow
+      if ((span + 1) >= std::numeric_limits<uint16_t>::max())
+        span = std::numeric_limits<uint16_t>::max() - 1;
+
+      new_event_info.span = span + 1; // to 1-based system
+    }
+    else
+    {
+      assert(new_event.type == 'D' || new_event.type == 'd');
+
+      // Check after deleted sequence
+      while ((ref_offset + span) < REF_SIZE)
+      {
+        if (reference_sequence[ref_offset + span] != reference_sequence[ref_offset + span + count])
+          break;
+
+        ++span;
+      }
+
+      // Check for overflow
+      if ((span + 1) >= std::numeric_limits<uint16_t>::max())
+        span = std::numeric_limits<uint16_t>::max() - 1;
+
+      new_event_info.span = span + 1; // to 1-based system
+    }
+  }
+
+  return it_pair.first;
+}
+
+
+std::map<SnpEvent, SnpEventInfo>::iterator
+add_snp_event_to_bucket(std::vector<BucketFirstPass> & buckets,
+                        SnpEvent && event,
+                        long const region_begin,
+                        long const BUCKET_SIZE)
+{
+  assert(event.pos >= region_begin);
+  long const event_bucket_index = (event.pos - region_begin) / BUCKET_SIZE;
+  assert(event_bucket_index >= 0);
+
+  if (event_bucket_index >= static_cast<long>(buckets.size()))
+    buckets.resize(event_bucket_index + 1);
+
+  assert(event_bucket_index < static_cast<long>(buckets.size()));
+
+  SnpEventInfo new_info;
+  std::pair<std::map<SnpEvent, SnpEventInfo>::iterator, bool> it_pair =
+    buckets[event_bucket_index].snps.insert({std::move(event), std::move(new_info)});
+
+  return it_pair.first;
+}
+
+
+/*
+void
+add_base_to_bucket(std::vector<Bucket> & buckets,
+                   int32_t pos,
+                   char seq,
+                   char qual,
+                   long const region_begin,
+                   long const BUCKET_SIZE)
+{
+  // Any sensible compiler will optimize this
+  long const event_bucket_index = (pos - region_begin) / BUCKET_SIZE;
+  long const local_pos = (pos - region_begin) % BUCKET_SIZE;
+
+  if (event_bucket_index >= static_cast<long>(buckets.size()))
+    buckets.resize(event_bucket_index + 1);
+
+  auto & bucket = buckets[event_bucket_index];
+
+  if (bucket.pileup.size() == 0)
+    bucket.pileup.resize(BUCKET_SIZE);
+
+  assert(local_pos < static_cast<long>(bucket.pileup.size()));
+  bucket.pileup[local_pos].add_base(seq, qual);
+}
+*/
+
+
+// explicit instantiation
+template Tindel_events::iterator
+add_indel_event_to_bucket(std::vector<BucketFirstPass> & buckets,
+                          IndelEvent && new_indel_event,
+                          long const region_begin,
+                          long const BUCKET_SIZE,
+                          std::vector<char> const & reference_sequence,
+                          long ref_offset);
+
+template Tindel_events::iterator
+add_indel_event_to_bucket(std::vector<Bucket> & buckets,
+                          IndelEvent && new_indel_event,
+                          long const region_begin,
+                          long const BUCKET_SIZE,
+                          std::vector<char> const & reference_sequence,
+                          long ref_offset);
+
+
+} // namespace gyper
diff --git a/src/typer/caller.cpp b/src/typer/caller.cpp
index 8215cfa..7e89ad8 100644
--- a/src/typer/caller.cpp
+++ b/src/typer/caller.cpp
@@ -1,10 +1,12 @@
 #include <cassert> // assert
+#include <limits> // std::numeric_limits<T>
 #include <memory> // std::unique_ptr
 #include <sstream> // std::ostringstream
 #include <string> // std::string
 #include <unordered_map> // std::unordered_map
 #include <vector> // std::vector
 
+#include <paw/align.hpp>
 #include <paw/station.hpp>
 
 #include <seqan/basic.h>
@@ -14,8 +16,11 @@
 
 #include <boost/log/trivial.hpp> // BOOST_LOG_TRIVIAL
 
+#include <parallel_hashmap/phmap.h>
+
 #include <graphtyper/constants.hpp>
 #include <graphtyper/graph/absolute_position.hpp> // gyper::absolute_pos
+#include <graphtyper/graph/constructor.hpp>
 #include <graphtyper/graph/graph_serialization.hpp> // gyper::load_graph
 #include <graphtyper/graph/haplotype_calls.hpp>
 #include <graphtyper/graph/haplotype_extractor.hpp>
@@ -23,10 +28,15 @@
 #include <graphtyper/index/indexer.hpp> // gyper::index (global)
 #include <graphtyper/index/ph_index.hpp>
 #include <graphtyper/typer/alignment.hpp>
+#include <graphtyper/typer/bucket.hpp>
 #include <graphtyper/typer/caller.hpp>
+#include <graphtyper/typer/event.hpp>
+#include <graphtyper/typer/event_call.hpp>
 #include <graphtyper/typer/primers.hpp> // gyper::Primers
+#include <graphtyper/typer/read.hpp>
 #include <graphtyper/typer/variant_candidate.hpp>
 #include <graphtyper/typer/variant_map.hpp>
+#include <graphtyper/typer/vcf.hpp>
 #include <graphtyper/utilities/hts_parallel_reader.hpp> // gyper::HtsParallelReader
 #include <graphtyper/utilities/hts_reader.hpp> // gyper::HtsReader
 #include <graphtyper/utilities/io.hpp>
@@ -36,6 +46,40 @@
 namespace
 {
 
+std::string const debug_read_name = "simulated.452/2";
+
+
+bool
+is_clipped(bam1_t const & b)
+{
+  if (b.core.n_cigar == 0)
+    return false;
+
+  auto it = b.data + b.core.l_qname;
+
+  // Check first
+  uint32_t opAndCnt;
+  memcpy(&opAndCnt, it, sizeof(uint32_t));
+
+  if ((opAndCnt & 15) == 4)
+  {
+    //std::cerr << "MIDNSHP=X*******"[opAndCnt & 15] << " first ";
+    return true;
+  }
+
+  // Check last
+  memcpy(&opAndCnt, it + sizeof(uint32_t) * (b.core.n_cigar - 1), sizeof(uint32_t));
+
+  if ((opAndCnt & 15) == 4)
+  {
+    //std::cerr << "MIDNSHP=X*******"[opAndCnt & 15] << " last ";
+    return true;
+  }
+
+  return false;
+}
+
+
 void
 _read_rg_and_samples(std::vector<std::string> & samples,
                      std::vector<std::unordered_map<std::string, int> > & vec_rg2sample_i,
@@ -75,11 +119,12 @@ _determine_num_jobs_and_num_parts(long & jobs,
 {
   using namespace gyper;
 
-  jobs = Options::const_instance()->threads;
+  gyper::Options const & copts = *(Options::const_instance());
+  jobs = copts.threads;
   num_parts = jobs;
-  long const MAX_FILES_OPEN = Options::const_instance()->max_files_open;
+  long const MAX_FILES_OPEN = copts.max_files_open > jobs ? copts.max_files_open : jobs;
 
-  if (jobs >= NUM_SAMPLES || jobs >= MAX_FILES_OPEN)
+  if (jobs >= NUM_SAMPLES)
   {
     // Special case where there are more threads than samples OR more threads than max files open. POOL_SIZE is 1
     num_parts = std::min(NUM_SAMPLES, MAX_FILES_OPEN);
@@ -105,6 +150,7 @@ namespace gyper
 
 std::vector<std::string>
 call(std::vector<std::string> const & hts_paths,
+     std::vector<double> const & avg_cov_by_readlen,
      std::string const & graph_path,
      PHIndex const & ph_index,
      std::string const & output_dir,
@@ -130,27 +176,35 @@ call(std::vector<std::string> const & hts_paths,
 
   // Split hts_paths
   std::vector<std::unique_ptr<std::vector<std::string> > > spl_hts_paths;
+  std::vector<std::unique_ptr<std::vector<double> > > spl_avg_cov;
   assert(Options::const_instance()->max_files_open > 0);
 
-  long jobs = 1; // Running jobs
-  long num_parts = 1; // Number of parts to split the work into
-  long const NUM_SAMPLES = hts_paths.size();
+  long jobs{1}; // Running jobs
+  long num_parts{1}; // Number of parts to split the work into
+  long const NUM_SAMPLES{static_cast<long>(hts_paths.size())};
 
   _determine_num_jobs_and_num_parts(jobs, num_parts, NUM_SAMPLES);
 
   {
     auto it = hts_paths.begin();
+    auto cov_it = avg_cov_by_readlen.begin();
 
     for (long i = 0; i < num_parts; ++i)
     {
-      auto end_it = it + NUM_SAMPLES / num_parts + (i < (NUM_SAMPLES % num_parts));
+      long const advance = NUM_SAMPLES / num_parts + (i < (NUM_SAMPLES % num_parts));
+
+      auto end_it = it + advance;
       assert(std::distance(hts_paths.begin(), end_it) <= NUM_SAMPLES);
       spl_hts_paths.emplace_back(new std::vector<std::string>(it, end_it));
       it = end_it;
+
+      auto cov_end_it = cov_it + advance;
+      spl_avg_cov.emplace_back(new std::vector<double>(cov_it, cov_end_it));
+      cov_it = cov_end_it;
     }
   }
 
-  BOOST_LOG_TRIVIAL(debug) << "[" << __HERE__ << "] Number of pools = " << spl_hts_paths.size();
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Number of pools = " << spl_hts_paths.size();
   long const NUM_POOLS = spl_hts_paths.size();
   paths.resize(NUM_POOLS);
 
@@ -166,6 +220,7 @@ call(std::vector<std::string> const & hts_paths,
         call_station.add_work(parallel_reader_genotype_only,
                               &paths[i],
                               spl_hts_paths[i].get(),
+                              spl_avg_cov[i].get(),
                               &output_dir,
                               &reference_fn,
                               &region,
@@ -180,6 +235,7 @@ call(std::vector<std::string> const & hts_paths,
                                  parallel_reader_genotype_only,
                                  &paths[NUM_POOLS - 1],
                                  spl_hts_paths[NUM_POOLS - 1].get(),
+                                 spl_avg_cov[NUM_POOLS - 1].get(),
                                  &output_dir,
                                  &reference_fn,
                                  &region,
@@ -243,10 +299,10 @@ find_variants_in_cigar(seqan::BamAlignmentRecord const & record,
                                 graph.ref_nodes[0].get_label().order :
                                 0;
 
-  long read_pos = 0;
+  long read_pos{0};
   std::string read(seqan::begin(record.seq), seqan::end(record.seq));
-  long begin_clipping_size = 0;
-  bool is_clipped = false;
+  long begin_clipping_size{0};
+  bool is_clipped{false};
   std::string reference_seq;
   std::string read_seq;
 
@@ -473,7 +529,7 @@ parallel_discover_from_cigar(std::string * output_ptr,
 
         if (!rg_tag)
         {
-          BOOST_LOG_TRIVIAL(error) << "[graphtyper::typer::caller] Unable to find RG tag in read.";
+          BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unable to find RG tag in read.";
           std::exit(1);
         }
 
@@ -483,7 +539,7 @@ parallel_discover_from_cigar(std::string * output_ptr,
 
         if (find_rg_it == rg2sample_i.end())
         {
-          BOOST_LOG_TRIVIAL(error) << "[graphtyper::typer::caller] Unable to find read group. " << read_group;
+          BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unable to find read group. " << read_group;
           std::exit(1);
         }
 
@@ -514,9 +570,7 @@ parallel_discover_from_cigar(std::string * output_ptr,
   std::ostringstream variant_map_path;
   variant_map_path << output_dir << "/" << samples[0] << "_variant_map";
 
-  BOOST_LOG_TRIVIAL(debug) << "[graphtyper::caller] Writing variant map to '"
-                           << variant_map_path.str();
-
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Writing variant map to '" << variant_map_path.str();
   varmap.create_varmap_for_all(reference_depth);
 
 #ifndef NDEBUG
@@ -570,7 +624,7 @@ discover_directly_from_bam(std::string const & graph_path,
   }
 
   long const NUM_POOLS = spl_hts_paths.size();
-  BOOST_LOG_TRIVIAL(debug) << "[graphtyper::caller] Number of pools = " << NUM_POOLS;
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Number of pools = " << NUM_POOLS;
   output_file_paths.resize(NUM_POOLS);
 
   // Run in parallel
@@ -609,4 +663,1573 @@ discover_directly_from_bam(std::string const & graph_path,
 }
 
 
+void
+run_first_pass(bam1_t * hts_rec,
+               HtsReader & hts_reader,
+               long file_i,
+               std::vector<BucketFirstPass> & buckets,
+               std::map<SnpEvent, std::set<SnpEvent> > & pool_snp_haplotypes,
+               long const BUCKET_SIZE,
+               long const region_begin,
+               std::vector<char> const & reference_sequence)
+{
+  long const REF_SIZE{static_cast<long>(reference_sequence.size())};
+  int32_t global_max_pos_end{0};
+  std::vector<uint32_t> cov_up(REF_SIZE);
+  std::vector<uint32_t> cov_down(REF_SIZE);
+
+  while (true)
+  {
+    assert(hts_rec);
+    auto const & core = hts_rec->core;
+
+    if (core.n_cigar == 0 || core.pos < region_begin)
+    {
+      hts_rec = hts_reader.get_next_read(hts_rec);
+
+      if (!hts_rec)
+        break;
+    }
+
+    std::array<char, 16> static constexpr CIGAR_MAP = {{
+      'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X', 'B', '*', '*', '*', '*', '*', '*'
+    }};
+
+    auto it = hts_rec->data;
+    auto cigar_it = it + core.l_qname;
+    auto seq_it = cigar_it + (core.n_cigar << 2);
+
+    // Index of first aligned read base in read sequence
+    long read_offset{0};
+
+    // Index of first aligned read base in reference_sequence
+    long ref_offset{static_cast<long>(core.pos) - region_begin};
+    assert(ref_offset >= 0);
+
+    // Get reference to bucket
+    long const bucket_id{ref_offset / BUCKET_SIZE};
+
+    // While experimenting say that id is also index, then go back to:
+    //   long const bucket_index = bucket_id % NUM_BUCKETS;
+    long const bucket_index{bucket_id};
+
+    if (bucket_index >= static_cast<long>(buckets.size()))
+      buckets.resize(bucket_index + 1);
+
+    assert(bucket_index >= 0);
+    assert(bucket_index < static_cast<long>(buckets.size()));
+    long const N_CIGAR = core.n_cigar;
+
+    if (ref_offset >= REF_SIZE)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unexpected ref_offset = " << ref_offset;
+      std::exit(1);
+    }
+
+    Read read;
+    read.name = reinterpret_cast<char *>(it);
+    //read.mate_pos = static_cast<int32_t>(core.mpos);
+
+    if ((core.flag & IS_FIRST_IN_PAIR) != 0u)
+      read.name.append("/1");
+    else
+      read.name.append("/2");
+
+    read.flags = core.flag;
+    read.sequence.resize(core.l_qseq);
+
+    for (int i = 0; i < core.l_qseq; ++i)
+      read.sequence[i] = seq_nt16_str[bam_seqi(seq_it, i)];
+
+
+    auto const qual_it = bam_get_qual(hts_rec);
+    std::vector<std::map<SnpEvent, SnpEventInfo>::iterator> snp_events;
+    bool const is_read_clipped = is_clipped(*hts_rec);
+
+    for (long i{0}; i < N_CIGAR; ++i)
+    {
+      uint32_t opAndCnt;
+      memcpy(&opAndCnt, cigar_it, sizeof(uint32_t));
+      cigar_it += sizeof(uint32_t);
+
+      char const cigar_operation = CIGAR_MAP[opAndCnt & 15];
+      uint32_t const cigar_count = opAndCnt >> 4;
+      assert(cigar_count > 0);
+
+      switch (cigar_operation)
+      {
+      case 'M':
+      case '=':       // '=' and 'X' are typically not used by aligners (at least not by default),
+      case 'X':       // but we keep it here just in case
+      {
+        assert((ref_offset + cigar_count - 1l) < REF_SIZE);
+        //auto ref_it = reference_sequence.begin() + ref_offset;
+
+        for (long r{0}; r < cigar_count; ++r)
+        {
+          long const ref_pos = ref_offset + r;
+          char const ref = reference_sequence[ref_pos];
+          long const read_pos = read_offset + r;
+          char const read_base = read.sequence[read_pos];
+
+          if (read_base == ref ||
+              (ref != 'A' && ref != 'C' && ref != 'G' && ref != 'T') ||
+              (read_base != 'A' && read_base != 'C' && read_base != 'G' && read_base != 'T'))
+          {
+            continue;
+          }
+
+          SnpEvent new_snp_event(ref_pos, read_base);
+
+          std::map<SnpEvent, SnpEventInfo>::iterator snp_it =
+            add_snp_event_to_bucket(buckets, std::move(new_snp_event), region_begin, BUCKET_SIZE);
+
+          SnpEventInfo & info = snp_it->second;
+          uint8_t const qual = *(qual_it + read_pos);
+
+          if (qual >= 25)
+            ++info.hq_count;
+          else
+            ++info.lq_count;
+
+          //if (qual > info.max_qual)
+          //  info.max_qual = qual;
+
+          if (core.qual != 255 && core.qual > info.max_mapq)
+            info.max_mapq = core.qual;
+
+          info.proper_pairs += ((read.flags & IS_PROPER_PAIR) != 0);
+          info.first_in_pairs += ((read.flags & IS_FIRST_IN_PAIR) != 0);
+          info.sequence_reversed += ((read.flags & IS_SEQ_REVERSED) != 0);
+          info.clipped += is_read_clipped;
+
+          if (info.uniq_pos1 == -1)
+          {
+            info.uniq_pos1 = core.pos;
+          }
+          else if (info.uniq_pos2 == -1)
+          {
+            if (info.uniq_pos1 != core.pos)
+            {
+              info.uniq_pos2 = core.pos;
+            }
+          }
+          else if (info.uniq_pos3 == -1 && info.uniq_pos2 != core.pos)
+          {
+            assert(info.uniq_pos1 != core.pos); // should not happend if input is sorted
+            info.uniq_pos3 = core.pos;
+          }
+
+          long const max_distance = std::min(read_pos, core.l_qseq - 1 - read_pos);
+          assert(max_distance >= 0);
+
+          if (max_distance > info.max_distance)
+            info.max_distance = max_distance;
+
+          snp_events.push_back(snp_it);
+        }
+
+        read_offset += cigar_count;
+        ref_offset += cigar_count;
+        break;
+      }
+
+      case 'I':
+      {
+        assert(cigar_count > 0);
+
+        auto end_it = (read_offset + cigar_count) < static_cast<long>(read.sequence.size()) ?
+                      read.sequence.begin() + read_offset + cigar_count :
+                      read.sequence.end();
+
+        std::vector<char> event_sequence(read.sequence.begin() + read_offset, end_it);
+        IndelEvent new_event = make_insertion_event(region_begin + ref_offset,
+                                                    std::move(event_sequence));
+
+        auto indel_event_it = add_indel_event_to_bucket(buckets,
+                                                        std::move(new_event),
+                                                        region_begin,
+                                                        BUCKET_SIZE,
+                                                        reference_sequence,
+                                                        ref_offset);
+
+        if (read.name == debug_read_name)
+          BOOST_LOG_TRIVIAL(debug) << __HERE__ << " new ins=" << indel_event_it->first.to_string();
+
+        ++indel_event_it->second.count;
+        read_offset += cigar_count;
+        break;
+      }
+
+      case 'D':
+      {
+        assert(cigar_count > 0);
+
+        IndelEvent new_event =
+          make_deletion_event(reference_sequence, ref_offset, region_begin + ref_offset, cigar_count);
+
+        auto indel_event_it = add_indel_event_to_bucket(buckets,
+                                                        std::move(new_event),
+                                                        region_begin,
+                                                        BUCKET_SIZE,
+                                                        reference_sequence,
+                                                        ref_offset);
+
+        if (read.name == debug_read_name)
+          BOOST_LOG_TRIVIAL(debug) << __HERE__ << " new del=" << indel_event_it->first.to_string();
+
+        ++indel_event_it->second.count;
+        ref_offset += cigar_count;
+        break;
+      }
+
+      case 'S':
+      {
+        read_offset += cigar_count;
+        break;
+      } // else 'H' or 'P' which move neither reference nor read
+      }
+    }
+
+    for (long e{1}; e < static_cast<long>(snp_events.size()); ++e)
+    {
+      auto const & event = snp_events[e]->first;
+
+      // Add event to previous snp events
+      for (long prev{0}; prev < e; ++prev)
+      {
+        auto & prev_info = snp_events[prev]->second;
+        auto insert_it = prev_info.phase.insert({event, 0});
+        ++insert_it.first->second;
+      }
+    }
+
+    read.alignment.pos = static_cast<int32_t>(core.pos);
+    read.alignment.pos_end = region_begin + std::min(ref_offset, REF_SIZE - 1);
+
+    assert((read.alignment.pos - region_begin) < static_cast<long>(cov_up.size()));
+    ++cov_up[read.alignment.pos - region_begin];
+
+    assert((read.alignment.pos_end - region_begin) < static_cast<long>(cov_down.size()));
+    ++cov_down[read.alignment.pos_end - region_begin];
+
+    auto & bucket = buckets[bucket_index];
+    int32_t const end_with_clip = read.alignment.pos_end + read.alignment.num_clipped_end;
+
+    if (end_with_clip > bucket.max_pos_end)
+    {
+      bucket.max_pos_end = end_with_clip;
+      global_max_pos_end = std::max(global_max_pos_end, end_with_clip);
+    }
+
+    bucket.global_max_pos_end = global_max_pos_end;
+    hts_rec = hts_reader.get_next_read(hts_rec);
+
+    if (!hts_rec)
+      break;
+  }
+
+  long const NUM_BUCKETS{static_cast<long>(buckets.size())};
+
+  auto update_coverage =
+    [&cov_up, &cov_down](long & cov, long const pos, long const b, long const BUCKET_SIZE) -> void
+    {
+      long offset{pos + 1};
+
+      if (offset > b * BUCKET_SIZE)
+      {
+        offset = b * BUCKET_SIZE;
+
+        // Add coverage until naive begin
+        while (offset <= pos)
+        {
+          cov += (static_cast<long>(cov_up[offset]) - static_cast<long>(cov_down[offset]));
+          ++offset;
+        }
+      }
+    };
+
+  // Remove SNPs with low support
+  {
+    long depth{0};
+
+    for (long b{0}; b < NUM_BUCKETS; ++b)
+    {
+      auto & bucket = buckets[b];
+
+      for (auto snp_it = bucket.snps.begin(); snp_it != bucket.snps.end(); /*++it*/)
+      {
+        SnpEvent const & snp = snp_it->first;
+        SnpEventInfo const & info = snp_it->second;
+
+        long cov{depth};
+        long const begin = std::max(0l, snp.pos - region_begin);
+        update_coverage(cov, begin, b, BUCKET_SIZE);
+
+        if (info.has_good_support(cov))
+          ++snp_it;
+        else
+          snp_it = bucket.snps.erase(snp_it);
+      }
+
+      // Add coverage from bucket b
+      assert(b * BUCKET_SIZE < REF_SIZE);
+      long offset{b * BUCKET_SIZE};
+      long const end_offset = std::min(REF_SIZE, (b + 1) * BUCKET_SIZE);
+
+      while (offset < end_offset)
+      {
+        depth += static_cast<long>(cov_up[offset]) - static_cast<long>(cov_down[offset]);
+        ++offset;
+      }
+    }
+  }
+
+  long depth{0};
+
+  // Find HQ SNPs and indels with good enough support to allow realignments
+  for (long b{0}; b < NUM_BUCKETS; ++b)
+  {
+    auto & bucket = buckets[b];
+
+    // Check SNP phase
+    for (auto snp_it = bucket.snps.begin(); snp_it != bucket.snps.end(); ++snp_it)
+    {
+      SnpEvent const & snp = snp_it->first;
+      SnpEventInfo const & info = snp_it->second;
+
+      long const begin = std::max(0l, snp.pos - region_begin);
+      long cov{depth}; // starting coverage depth
+
+      auto it_bool = pool_snp_haplotypes.insert({snp, std::set<SnpEvent>()});
+      std::set<SnpEvent> & snp_haplotypes = it_bool.first->second;
+
+      assert(begin >= b * BUCKET_SIZE);
+      update_coverage(cov, begin, b, BUCKET_SIZE);
+
+      //BOOST_LOG_TRIVIAL(info) << __HERE__
+      //                        << " snp has good support @ " << snp.pos
+      //                        << " base=" << snp.base
+      //                        << " cov=" << cov
+      //                        << " INFO=" << info.to_string();
+
+      auto is_good_support =
+        [&cov_down, &region_begin](long local_cov,
+                                   long local_offset,
+                                   std::map<SnpEvent, uint16_t>::const_iterator find_it) -> bool
+        {
+          long const end = std::max(0l, static_cast<long>(find_it->first.pos) - region_begin);
+
+          // reduce coverage by reads who do not overlap the whole interval
+          while (local_offset <= end)
+          {
+            local_cov -= static_cast<long>(cov_down[local_offset]);
+            ++local_offset;
+          }
+
+          //BOOST_LOG_TRIVIAL(info) << __HERE__ << " pos="
+          //                        << find_it->first.pos << " base="
+          //                        << find_it->first.base << " support="
+          //                        << find_it->second << "/" << local_cov;
+
+          // Make sure there is some coverage
+          if (local_cov <= 0)
+            local_cov = 1;
+
+          if ((static_cast<double>(find_it->second) / static_cast<double>(local_cov)) < 0.22)
+          {
+            return false;
+          }
+
+          return true;
+        };
+
+      // check this bucket
+      for (auto it2 = std::next(snp_it); it2 != bucket.snps.end(); ++it2)
+      {
+        auto find_it = info.phase.find(it2->first);
+
+        if (find_it != info.phase.end())
+        {
+          assert(find_it->first.pos > snp.pos);
+          bool const is_good = is_good_support(cov, begin + 1, find_it);
+
+          if (is_good)
+          {
+            //BOOST_LOG_TRIVIAL(info) << __HERE__ << " GOOD SUPPORT";
+            snp_haplotypes.insert(find_it->first);
+          }
+          //else
+          //{
+          //  BOOST_LOG_TRIVIAL(info) << __HERE__ << " BAD SUPPORT";
+          //}
+        }
+      }
+
+      // check next bucket
+      if (b + 1 < NUM_BUCKETS)
+      {
+        auto const & next_bucket = buckets[b + 1];
+
+        for (auto it2 = next_bucket.snps.begin(); it2 != next_bucket.snps.end(); ++it2)
+        {
+          auto find_it = info.phase.find(it2->first);
+
+          if (find_it == info.phase.end())
+            continue;
+
+          assert(find_it->first.pos > snp.pos);
+          long const MAX_PHASE_DISTANCE{BUCKET_SIZE};
+
+          if (find_it->first.pos > (snp.pos + MAX_PHASE_DISTANCE))
+          {
+            //BOOST_LOG_TRIVIAL(info) << __HERE__ << " Too far away "
+            //                        << snp.pos << " + " << BUCKET_SIZE << " < " << find_it->first.pos;
+            continue; // I can break here?
+          }
+
+          bool const is_good = is_good_support(cov, begin + 1, find_it);
+
+          if (is_good)
+          {
+            //BOOST_LOG_TRIVIAL(info) << __HERE__ << " GOOD SUPPORT";
+            snp_haplotypes.insert(find_it->first);
+            //snp_haplotypes.push_back(find_it);
+          }
+          //else
+          //{
+          //  BOOST_LOG_TRIVIAL(info) << __HERE__ << " BAD SUPPORT";
+          //}
+        }
+      }
+    }
+
+    // INDELS
+    for (Tindel_events::iterator it = bucket.indel_events.begin(); it != bucket.indel_events.end(); /*++it*/)
+    {
+      assert(!it->second.has_realignment_support);
+
+      IndelEvent const & indel = it->first;
+      EventInfo const & indel_info = it->second;
+
+      long const naive_pad = 4.0 + static_cast<double>(indel.sequence.size()) / 3.0;
+      long const naive_begin = std::max(0l, indel.pos - naive_pad - region_begin);
+      long const naive_end = std::min(REF_SIZE, indel.pos + indel_info.span + naive_pad - region_begin);
+
+      double const correction = indel.type == 'I' ?
+                                static_cast<double>(indel.sequence.size() / 2.0 + 8.0) / 8.0 :
+                                static_cast<double>(indel.sequence.size() / 3.0 + 10.0) / 10.0;
+
+      double const count = correction * indel_info.count;
+
+      //BOOST_LOG_TRIVIAL(info) << "Indel [pos,pos+span], size [begin,end]: [" << region_begin << " "
+      //                        <<  indel.pos << "," << (indel.pos + indel_info.span)
+      //                        << "] " << indel.sequence.size() << " " << correction
+      //                        << " [" << naive_begin << "," << naive_end << "]"
+      //                        << " count=" << count << " depth=" << depth;
+
+      long cov{depth}; // starting coverage depth
+      long offset{naive_begin};
+
+      // Fix coverage starting before here
+      if (offset <= b * BUCKET_SIZE)
+      {
+        while (offset < b * BUCKET_SIZE)
+        {
+          cov -= (static_cast<long>(cov_up[offset]) - static_cast<long>(cov_down[offset]));
+          ++offset;
+        }
+      }
+      else
+      {
+        offset = b * BUCKET_SIZE;
+
+        // Add coverage until naive begin
+        while (offset < naive_begin)
+        {
+          cov += (static_cast<long>(cov_up[offset]) - static_cast<long>(cov_down[offset]));
+          ++offset;
+        }
+      }
+
+      // reduce coverage by reads who do not overlap the whole naive interval
+      while (offset <= naive_end)
+      {
+        cov -= static_cast<long>(cov_down[offset]);
+        ++offset;
+      }
+
+      uint32_t log_qual{0};
+
+      {
+        double const corrected_cov{std::max(static_cast<double>(cov), count)};
+        double const anti_count_d{corrected_cov - count};
+        log_qual = get_log_qual_double(count, anti_count_d, 10.0);
+      }
+
+      if (indel_info.count >= 2 && count >= 4.0 && log_qual >= 30)
+      {
+        //BOOST_LOG_TRIVIAL(info) << __HERE__ << " Indel has good support.";
+        it->second.has_good_support = true;
+        it->second.has_realignment_support = true;
+        it->second.max_log_qual = log_qual;
+        it->second.max_log_qual_file_i = file_i;
+
+        ++it;
+      }
+      else if (log_qual > 0)
+      {
+        // Realignment support, meaning low support but needs realignment to confirm/deny
+        //BOOST_LOG_TRIVIAL(info) << __HERE__ << " Indel has realignment support.";
+        assert(it->second.has_good_support == false);
+        it->second.has_realignment_support = true;
+        it->second.max_log_qual = log_qual;
+        it->second.max_log_qual_file_i = file_i;
+        ++it;
+      }
+      else
+      {
+        // erase indel if it is not good enough
+        //BOOST_LOG_TRIVIAL(info) << __HERE__ << " erasing indel=" << indel.to_string();
+        it = bucket.indel_events.erase(it);
+      }
+    }
+
+    // Add coverage from bucket b
+    assert(b * BUCKET_SIZE < REF_SIZE);
+    long offset{b * BUCKET_SIZE};
+    long const end_offset = std::min(REF_SIZE, (b + 1) * BUCKET_SIZE);
+
+    while (offset < end_offset)
+    {
+      depth += static_cast<long>(cov_up[offset]) - static_cast<long>(cov_down[offset]);
+      ++offset;
+    }
+  } // for (long b{0}; b < static_cast<long>(buckets.size()); ++b)
+}
+
+
+void
+realign_to_indels(std::vector<Tindel_events::iterator> const & realignment_indels,
+                  std::vector<Bucket> & buckets,
+                  long const BUCKET_SIZE,
+                  long const region_begin,
+                  std::vector<char> const & reference_sequence)
+{
+  long const REF_SIZE = reference_sequence.size();
+  long const PAD{50}; // TODO make an option
+  long const MAX_READ_SIZE{151};
+  using Tuint = uint16_t;
+  paw::AlignmentOptions<Tuint> opts;
+  opts.set_match(SCORE_MATCH).set_mismatch(SCORE_MISMATCH);
+  opts.set_gap_open(SCORE_GAP_OPEN).set_gap_extend(SCORE_GAP_EXTEND);
+  opts.left_column_free = true;
+  opts.right_column_free = true;
+  long _alignment_counter{0};
+
+  // Realign reads and add reference support
+  for (Tindel_events::iterator indel_it : realignment_indels)
+  {
+    IndelEvent const & indel = indel_it->first;
+    EventInfo const & indel_info = indel_it->second;
+    long const indel_span = indel.pos + indel_info.span;
+
+    BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Realignment to indel=" << indel.to_string() << " span="
+                             << indel_info.span;
+
+    //create reference sequence with the indel
+    long const begin_padded = std::max(0l, indel.pos - MAX_READ_SIZE - 2 * PAD - region_begin);
+    assert(begin_padded < REF_SIZE);
+    long const end_padded = indel.pos + MAX_READ_SIZE + 2 * PAD - region_begin;
+
+    auto end_it = end_padded >= REF_SIZE ?
+                  reference_sequence.end() :
+                  reference_sequence.begin() + end_padded;
+
+    std::vector<char> new_ref(reference_sequence.begin() + begin_padded, end_it);
+    std::vector<int32_t> ref_pos(new_ref.size());
+    std::iota(ref_pos.begin(), ref_pos.end(), 0);
+    //BOOST_LOG_TRIVIAL(info) << "New ref before:\n" << std::string(new_ref.begin(), new_ref.end());
+
+    {
+      bool const is_applied = apply_indel_event(new_ref, ref_pos, indel, begin_padded + region_begin);
+      assert(is_applied);
+
+      if (!is_applied)
+        continue;
+    }
+
+    //BOOST_LOG_TRIVIAL(info) << "New ref after:\n" << std::string(new_ref.begin(), new_ref.end());
+    assert(buckets.size() > 0);
+    long b = begin_padded / BUCKET_SIZE;
+    long b_end = std::min(static_cast<long>(buckets.size()) - 1, end_padded / BUCKET_SIZE);
+    assert(b < static_cast<long>(buckets.size()));
+
+    std::vector<char> const new_ref_stable(new_ref);
+    std::vector<int32_t> const ref_pos_stable(ref_pos);
+
+    // Find the smallest bucket that might contain reads that are overlapping the indel
+    while (b > 0 && buckets[b].global_max_pos_end > (indel.pos - PAD))
+      --b;
+
+    // Check any reads that might overlap the indel
+    for ( ; b <= b_end; ++b)
+    {
+      assert(b >= 0);
+      assert(b < static_cast<long>(buckets.size()));
+      auto & bucket = buckets[b];
+
+      if (bucket.max_pos_end <= (indel.pos - PAD))
+        continue;
+
+      for (auto & read : bucket.reads)
+      {
+        if (read.alignment.pos < 0 || read.sequence.size() == 0)
+          continue;
+
+        // If the variant already has the indel then we can just update the score and skip the realignment
+        if (read.alignment.has_indel_event(indel_it))
+          continue;
+
+        if ((read.alignment.num_clipped_end == 0 && read.alignment.pos_end < static_cast<long>(indel.pos)) ||
+            (read.alignment.pos_end + read.alignment.num_clipped_end +
+             std::min(static_cast<long>(read.alignment.num_clipped_end), PAD) < indel.pos) ||
+            (read.alignment.num_clipped_begin == 0 && read.alignment.pos > indel_span) ||
+            (read.alignment.pos - read.alignment.num_clipped_begin -
+             std::min(static_cast<long>(read.alignment.num_clipped_begin), PAD) > indel_span))
+        {
+          continue;
+        }
+
+        //if (read.alignment.score == static_cast<long>(read.sequence.size())) // Cannot improve score
+        //  continue; // I cannot make this optimization unless I also add anti-support
+        assert(new_ref == new_ref_stable);
+        assert(ref_pos == ref_pos_stable);
+        bool reset_new_ref{false};
+        std::vector<ReadIndelEvent> applied_events{};
+        applied_events.push_back({0, indel_it});
+
+        for (auto it = read.alignment.indel_events.cbegin(); it != read.alignment.indel_events.cend(); ++it)
+        {
+          IndelEvent const & indel_event = it->event_it->first;
+          EventInfo const & event_info = it->event_it->second;
+
+          if (event_info.has_realignment_support)
+          {
+            bool ret = apply_indel_event(new_ref, ref_pos, indel_event, begin_padded + region_begin, false);
+
+            if (ret)
+            {
+              if (read.name == debug_read_name)
+              {
+                BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Applied event " << it->event_it->first.to_string();
+              }
+
+              applied_events.push_back({0, it->event_it});
+              reset_new_ref = true;
+            }
+            else
+            {
+              if (read.name == debug_read_name)
+              {
+                apply_indel_event(new_ref, ref_pos, indel_event, begin_padded + region_begin, true);
+                BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Could not apply event " << it->event_it->first.to_string();
+              }
+
+              applied_events.push_back({READ_ANTI_SUPPORT, it->event_it});
+            }
+          }
+        }
+
+        long const old_score = read.alignment.score;
+
+        // TODO write greedy alignment
+        paw::global_alignment(read.sequence, new_ref, opts);
+        ++_alignment_counter;
+        assert(opts.get_alignment_results());
+        auto const clip = opts.get_alignment_results()->apply_clipping(read.sequence,
+                                                                       new_ref,
+                                                                       opts.get_match(),
+                                                                       opts.get_mismatch(),
+                                                                       opts.get_gap_open(),
+                                                                       opts.get_gap_extend(),
+                                                                       opts.get_clip());
+
+        paw::AlignmentResults<Tuint> const & ar = *opts.get_alignment_results();
+        auto p = ar.get_database_begin_end(read.sequence, new_ref);
+
+        if (p.first == 0 || p.second == ar.database_end)
+        {
+          auto aligned_strings = ar.get_aligned_strings(read.sequence, new_ref);
+
+          BOOST_LOG_TRIVIAL(warning) << __HERE__ << " Insufficient padding in read alignment, skipping read. "
+                                     << read.to_string() << "\n"
+                                     << aligned_strings.first << '\n'
+                                     << aligned_strings.second;
+
+          if (reset_new_ref)
+          {
+            new_ref = new_ref_stable;
+            ref_pos = ref_pos_stable;
+          }
+
+          continue;
+        }
+
+        if (read.name == debug_read_name)
+        {
+          //if (clip.first > 0 || clip.second < static_cast<long>(read.sequence.size()))
+          {
+            BOOST_LOG_TRIVIAL(debug) << __HERE__ << " name=" << read.name << " pos=" << read.alignment.pos
+                                     << " begin,end_clip="  << clip.first << "," << clip.second
+                                     << " old_score=" << old_score << " new_score=" << ar.score;
+          }
+        }
+
+        if (ar.score <= old_score)
+        {
+          // If the score is worse and the sequence is not clipped it is anti-support
+          if (ar.score < old_score)
+          {
+            if (read.name == debug_read_name)
+            {
+              auto aligned_strings = ar.get_aligned_strings(read.sequence, new_ref);
+
+              BOOST_LOG_TRIVIAL(debug) << __HERE__ << " ANTI old_score, new_score, read = " << old_score << ", "
+                                       << ar.score << ", "
+                                       << read.name << '\n' << aligned_strings.first << '\n'
+                                       << aligned_strings.second;
+            }
+
+            read.alignment.add_indel_event(READ_ANTI_SUPPORT, indel_it);
+          }
+          else if (indel_it->first.pos >= (ref_pos[p.first] + begin_padded) &&
+                   indel_it->first.pos <= (ref_pos[p.second] + begin_padded))
+          {
+            assert(ar.score == old_score);
+
+            if (read.name == debug_read_name)
+            {
+              BOOST_LOG_TRIVIAL(debug) << __HERE__ << " SAME SCORE " << old_score
+                                       << " read=" << read.to_string();
+            }
+
+            read.alignment.add_indel_event(READ_MULTI_SUPPORT, indel_it);
+          }
+
+          if (reset_new_ref)
+          {
+            new_ref = new_ref_stable;
+            ref_pos = ref_pos_stable;
+          }
+
+          continue;
+        }
+
+        // We got a better score
+        read.alignment.replace_indel_events(std::move(applied_events));
+
+        // Update alignment
+        read.alignment.pos = ref_pos[p.first] + begin_padded;
+        read.alignment.pos_end = ref_pos[p.second] + begin_padded;
+        read.alignment.score = ar.score;
+        read.alignment.num_clipped_begin = clip.first;
+        read.alignment.num_clipped_end = static_cast<long>(read.sequence.size()) - clip.second;
+
+        // count number of inserted bases at the beginning of the read
+        {
+          long num_ins{0};
+
+          while (p.first + num_ins + 1 < static_cast<long>(ref_pos.size()) &&
+                 ref_pos[p.first + num_ins] == ref_pos[p.first + num_ins + 1])
+          {
+            ++num_ins;
+          }
+
+          read.alignment.num_ins_begin = num_ins;
+        }
+
+        if (read.name == debug_read_name)
+        {
+          auto aligned_strings = ar.get_aligned_strings(read.sequence, new_ref);
+          BOOST_LOG_TRIVIAL(info) << __HERE__ << " score=" << ar.score << "\n"
+                                  << aligned_strings.first << "\n" << aligned_strings.second;
+        }
+
+        // Generate and edit script. No normalize and ignore indels at ends
+        /*
+        std::set<paw::Event2> edits = paw::get_edit_script(aligned_strings, false, true);
+        long const num_edits = edits.size();
+
+        auto it = edits.begin();
+        long gaps{0};
+
+        for (long e{0}; e < num_edits; ++e, ++it)
+        {
+          // Skip first and last edit (they are padding events)
+          assert(it != edits.end());
+          paw::Event2 const & edit = *it;
+
+          if (read.name == debug_read_name)
+          {
+            BOOST_LOG_TRIVIAL(debug) << __HERE__ << " e=" << e << " pos=" << edit.pos << " pos_q="
+                                     << edit.pos_q << " ref="
+                                     << edit.ref << " alt=" << edit.alt << " ref_pos="
+                                     << (begin_padded + gaps + edit.pos);
+          }
+
+          // Skip SNPs
+          if (edit.alt.size() == edit.ref.size())
+            continue;
+
+          IndelEvent new_indel;
+          assert((gaps + edit.pos) < static_cast<long>(ref_pos.size()));
+
+          if (edit.alt.size() > edit.ref.size())
+          {
+            assert(edit.ref.size() == 0);
+
+            new_indel.pos = (begin_padded + ref_pos[edit.pos]);
+            new_indel.type = 'i';     // second level insertion
+            new_indel.sequence = std::vector<char>(edit.alt.begin(), edit.alt.end());
+          }
+          else
+          {
+            assert(edit.alt.size() == 0);
+
+            new_indel.pos = (begin_padded + ref_pos[edit.pos]);
+            new_indel.type = 'd';
+            new_indel.sequence = std::vector<char>(edit.ref.begin(), edit.ref.end());
+          }
+
+          // the new indel needs to be in the region
+          if (new_indel.pos >= region_begin)
+          {
+            long const read_offset{edit.pos};
+            long const ref_offset{new_indel.pos - region_begin};
+
+            auto indel_event_it =
+              add_indel_event_to_bucket(buckets,
+                                        std::move(new_indel),
+                                        region_begin,
+                                        BUCKET_SIZE,
+                                        reference_sequence,
+                                        ref_offset);
+
+            read.alignment.add_indel_event(read_offset, indel_event_it);
+          }
+
+          gaps += edit.alt.size() - edit.ref.size();
+        }
+        //*/
+
+        if (reset_new_ref)
+        {
+          new_ref = new_ref_stable;
+          ref_pos = ref_pos_stable;
+        }
+      }
+    }
+  }
+
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " alignment counter = " << _alignment_counter;
+
+  // Print counts etc
+  for (Tindel_events::iterator indel_it : realignment_indels)
+  {
+    IndelEvent const & indel = indel_it->first;
+    EventInfo & indel_info = indel_it->second;
+
+    if (indel_info.has_good_support)
+      continue;
+
+    BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Realignment results for indel=" << indel.to_string() << " "
+                             << indel_info.count << "," << indel_info.anti_count << " MD=" << indel_info.multi_count
+                             << " log_qual=" << indel_info.log_qual(10);
+
+    // Check if it is good after realignment
+    if (indel_info.log_qual(10) > 30)
+      indel_info.has_good_support = true;
+  }
+}
+
+
+void
+read_hts_and_return_realignment_indels(bam1_t * hts_rec,
+                                       HtsReader & hts_reader,
+                                       std::vector<Bucket> & buckets,
+                                       long const BUCKET_SIZE,
+                                       long const region_begin,
+                                       std::vector<char> const & reference_sequence)
+{
+  long const REF_SIZE{static_cast<long>(reference_sequence.size())};
+  int32_t global_max_pos_end{0};
+
+  while (true)
+  {
+    assert(hts_rec);
+    auto const & core = hts_rec->core;
+
+    if (core.n_cigar == 0 || core.pos < region_begin)
+    {
+      hts_rec = hts_reader.get_next_read(hts_rec);
+
+      if (!hts_rec)
+        break;
+    }
+
+    std::array<char, 16> static constexpr CIGAR_MAP = {{
+      'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X', 'B', '*', '*', '*', '*', '*', '*'
+    }};
+
+    auto it = hts_rec->data;
+    auto cigar_it = it + core.l_qname;
+    auto seq_it = cigar_it + (core.n_cigar << 2);
+
+    // Index of first aligned read base in read sequence
+    long read_offset{0};
+
+    // Index of first aligned read base in reference_sequence
+    long ref_offset{static_cast<long>(core.pos) - region_begin};
+
+    // Get reference to bucket
+    long const bucket_id{ref_offset / BUCKET_SIZE};
+
+    // While experimenting say that id is also index, then go back to:
+    //   long const bucket_index = bucket_id % NUM_BUCKETS;
+    long const bucket_index{bucket_id};
+
+    assert(bucket_index < static_cast<long>(buckets.size()));
+    //if (bucket_index >= static_cast<long>(buckets.size()))
+    //  buckets.resize(bucket_index + 1);
+
+    long const N_CIGAR = core.n_cigar;
+
+    if (ref_offset < 0 || ref_offset >= REF_SIZE)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unexpected ref_offset = " << ref_offset;
+      std::exit(1);
+    }
+
+    Read read;
+    read.name = reinterpret_cast<char *>(it);
+
+    if ((core.flag & IS_FIRST_IN_PAIR) != 0u)
+      read.name.append("/1");
+    else
+      read.name.append("/2");
+
+    read.mate_pos = static_cast<int32_t>(core.mpos);
+    read.flags = core.flag;
+    read.mapq = core.qual;
+
+    read.sequence.resize(core.l_qseq);
+
+    for (int i = 0; i < core.l_qseq; ++i)
+      read.sequence[i] = seq_nt16_str[bam_seqi(seq_it, i)];
+
+    read.qual.resize(core.l_qseq);
+
+    {
+      auto qual_it = bam_get_qual(hts_rec);
+
+      for (int i = 0; i < core.l_qseq; ++qual_it, ++i)
+        read.qual[i] = static_cast<char>(*qual_it);
+    }
+
+    read.alignment.score = 0;
+
+    for (long i{0}; i < N_CIGAR; ++i)
+    {
+      uint32_t opAndCnt;
+      memcpy(&opAndCnt, cigar_it, sizeof(uint32_t));
+      cigar_it += sizeof(uint32_t);
+
+      char const cigar_operation = CIGAR_MAP[opAndCnt & 15];
+      uint32_t const count = opAndCnt >> 4;
+      assert(count > 0);
+
+      switch (cigar_operation)
+      {
+      case 'M':
+      case '=':       // '=' and 'X' are typically not used by aligners (at least not by default),
+      case 'X':       // but we keep it here just in case
+      {
+        assert(ref_offset < REF_SIZE);
+        auto ref_it = reference_sequence.begin() + ref_offset;
+
+        for (long r{0}; r < count; ++r, ++ref_it)
+        {
+          char const alt = read.sequence[r + read_offset];
+
+          if (alt != *ref_it && alt != 'N' && *ref_it != 'N')
+          {
+            read.alignment.score -= SCORE_MISMATCH;
+          }
+          else
+          {
+            read.alignment.score += SCORE_MATCH;
+          }
+        }
+
+        read_offset += count;
+        ref_offset += count;
+        break;
+      }
+
+      case 'I':
+      {
+        assert(count > 0);
+
+        auto end_it = (read_offset + count) < static_cast<long>(read.sequence.size()) ?
+                      read.sequence.begin() + read_offset + count :
+                      read.sequence.end();
+
+        std::vector<char> event_sequence(read.sequence.begin() + read_offset, end_it);
+        IndelEvent new_event = make_insertion_event(region_begin + ref_offset,
+                                                    std::move(event_sequence));
+
+        auto indel_event_it =
+          add_indel_event_to_bucket(buckets,
+                                    std::move(new_event),
+                                    region_begin,
+                                    BUCKET_SIZE,
+                                    reference_sequence,
+                                    ref_offset);
+
+        //indel_event_it->second.has_support_in_file = true;
+
+        if (!indel_event_it->second.has_realignment_support)
+          read.alignment.score -= (SCORE_GAP_OPEN + (count - 1) * SCORE_GAP_EXTEND);
+        else
+          read.alignment.score += SCORE_MATCH * count;
+
+        read.alignment.add_indel_event(read_offset, indel_event_it);
+        read_offset += count;
+        break;
+      }
+
+      case 'D':
+      {
+        assert(count > 0);
+
+        IndelEvent new_event =
+          make_deletion_event(reference_sequence, ref_offset, region_begin + ref_offset, count);
+
+        auto indel_event_it =
+          add_indel_event_to_bucket(buckets,
+                                    std::move(new_event),
+                                    region_begin,
+                                    BUCKET_SIZE,
+                                    reference_sequence,
+                                    ref_offset);
+
+        //indel_event_it->second.has_support_in_file = true;
+
+        if (!indel_event_it->second.has_realignment_support)
+          read.alignment.score -= (SCORE_GAP_OPEN + (count - 1) * SCORE_GAP_EXTEND);
+
+        read.alignment.add_indel_event(read_offset, indel_event_it);
+        ref_offset += count;
+        break;
+      }
+
+      case 'S':
+      {
+        read_offset += count;
+        set_bit(read.flags, IS_CLIPPED);
+        read.alignment.score -= SCORE_CLIP;
+
+        if (i == 0)
+        {
+          read.alignment.num_clipped_begin = count;
+        }
+        else
+        {
+          assert(i + 1 == N_CIGAR);
+          read.alignment.num_clipped_end = count;
+        }
+
+        break;
+      } // else 'H' or 'P' which move neither reference nor read
+      }
+    }
+
+    read.alignment.pos = static_cast<int32_t>(core.pos);
+    read.alignment.pos_end = region_begin + ref_offset;
+    auto & bucket = buckets[bucket_index];
+    int32_t const end_with_clip = read.alignment.pos_end + read.alignment.num_clipped_end;
+
+    if (end_with_clip > bucket.max_pos_end)
+    {
+      bucket.max_pos_end = end_with_clip;
+      global_max_pos_end = std::max(global_max_pos_end, end_with_clip);
+    }
+
+    bucket.global_max_pos_end = global_max_pos_end;
+    bucket.reads.push_back(std::move(read));
+
+    hts_rec = hts_reader.get_next_read(hts_rec);
+
+    if (!hts_rec)
+      break;
+  }
+
+  // realign
+}
+
+
+void
+parallel_first_pass(std::vector<std::string> * hts_paths_ptr,
+                    std::map<SnpEvent, std::set<SnpEvent> > * pool_snp_haplotypes_ptr,
+                    std::vector<std::vector<BucketFirstPass> > * file_buckets_first_pass_ptr,
+                    std::vector<std::string> * sample_names_ptr,
+                    std::vector<char> * reference_sequence_ptr,
+                    long const BUCKET_SIZE,
+                    long const region_begin,
+                    long const lowest_file_i)
+{
+  assert(hts_paths_ptr);
+  assert(pool_snp_haplotypes_ptr);
+
+  std::vector<std::string> const & hts_paths = *hts_paths_ptr;
+  std::map<SnpEvent, std::set<SnpEvent> > & pool_snp_haplotypes = *pool_snp_haplotypes_ptr;
+  std::vector<char> const & reference_sequence = *reference_sequence_ptr;
+
+  for (long i{0}; i < static_cast<long>(hts_paths.size()); ++i)
+  {
+    // BAM/CRAM record storage
+    HtsStore store;
+
+    // Open BAM/CRAM file
+    HtsReader hts_reader(store);
+    hts_reader.open(hts_paths[i], std::string("."), "");
+
+    // Get the first read
+    bam1_t * hts_rec = hts_reader.get_next_read();
+
+    // Check if there were any reads
+    if (!hts_rec)
+    {
+      hts_reader.close();
+      continue;
+    }
+
+    run_first_pass(hts_rec,
+                   hts_reader,
+                   lowest_file_i + i,
+                   (*file_buckets_first_pass_ptr)[lowest_file_i + i],
+                   pool_snp_haplotypes,
+                   BUCKET_SIZE,
+                   region_begin,
+                   reference_sequence);
+
+    // Add sample
+    if (hts_reader.samples.size() > 1)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " We found file with multiple samples, sorry, "
+                               << "this is currently not supported.";
+      std::exit(1);
+    }
+
+    assert(hts_reader.samples.size() == 1);
+    (*sample_names_ptr)[lowest_file_i + i] = hts_reader.samples[0];
+
+    // Close BAM/CRAM file
+    hts_reader.close();
+  } // for (long file_i{0}; file_i < static_cast<long>(hts_paths.size()); ++file_i)
+}
+
+
+void
+parallel_second_pass(std::string const * hts_path_ptr,
+                     std::vector<char> const * reference_sequence_ptr,
+                     Tindel_events * indel_events_ptr,
+                     std::vector<Tindel_events::iterator> * indel_to_realign_ptr,
+                     long const BUCKET_SIZE,
+                     long const NUM_BUCKETS,
+                     long const region_begin,
+                     long const file_i)
+{
+  if (indel_to_realign_ptr->size() == 0)
+    return;
+
+  std::string const & hts_path = *hts_path_ptr;
+  std::vector<char> const & reference_sequence = *reference_sequence_ptr;
+  Tindel_events const & indel_events = *indel_events_ptr;
+  std::vector<Tindel_events::iterator> & indel_to_realign = *indel_to_realign_ptr;
+
+  std::vector<Bucket> buckets;
+  buckets.resize(NUM_BUCKETS);
+
+  // I/O
+  {
+    // BAM/CRAM record storage
+    HtsStore store;
+
+    // Open BAM/CRAM file
+    HtsReader hts_reader(store);
+    hts_reader.open(hts_path, std::string("."), "");
+
+    // Get the first read
+    bam1_t * hts_rec = hts_reader.get_next_read();
+
+    // Check if there were any reads
+    if (!hts_rec)
+    {
+      hts_reader.close();
+      return;
+    }
+
+    read_hts_and_return_realignment_indels(hts_rec,
+                                           hts_reader,
+                                           buckets,
+                                           BUCKET_SIZE,
+                                           region_begin,
+                                           reference_sequence);
+
+    // Close BAM/CRAM file
+    hts_reader.close();
+  } // I/O ends
+
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " I/O DONE.";
+  std::vector<Tindel_events::iterator> nearby_good_events;
+  long constexpr NEARBY_BP = 20;
+
+  for (Tindel_events::iterator indel : indel_to_realign)
+  {
+    Tindel_events::iterator indel_it = indel;
+
+    // Check before the indel
+    while (indel_it != indel_events.begin())
+    {
+      --indel_it;
+
+      if (indel_it->second.has_good_support && indel_it->first.pos + NEARBY_BP >= indel->first.pos)
+      {
+        // It also needs to be found in the file
+        if (is_indel_in_bucket(buckets, indel_it->first, region_begin, BUCKET_SIZE))
+          nearby_good_events.push_back(indel_it);
+      }
+    }
+
+    // Check after the indel
+    indel_it = std::next(indel);
+
+    while (indel_it != indel_events.end())
+    {
+      if (indel_it->second.has_good_support && indel_it->first.pos - NEARBY_BP <= indel->first.pos)
+      {
+        // It also needs to be found in the file
+        if (is_indel_in_bucket(buckets, indel_it->first, region_begin, BUCKET_SIZE))
+          nearby_good_events.push_back(indel_it);
+      }
+
+      ++indel_it;
+    }
+  }
+
+  std::sort(nearby_good_events.begin(), nearby_good_events.end(),
+            [](Tindel_events::iterator const & a,
+               Tindel_events::iterator const & b) -> bool
+    {
+      return a->first.pos < b->first.pos;
+    });
+
+  auto end_unique_it = std::unique(nearby_good_events.begin(), nearby_good_events.end());
+  std::move(nearby_good_events.begin(), end_unique_it, std::back_inserter(indel_to_realign));
+
+  std::sort(indel_to_realign.begin(), indel_to_realign.end(),
+            [](Tindel_events::iterator const & a,
+               Tindel_events::iterator const & b) -> bool
+    {
+      //return a->second.max_log_qual > b->second.max_log_qual ||
+      //  (a->second.max_log_qual == b->second.max_log_qual && a->first.pos < b->first.pos);
+      return a->second.has_good_support > b->second.has_good_support ||
+      (a->second.has_good_support == b->second.has_good_support && a->first.pos < b->first.pos);
+    });
+
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Number of realignment indels,file_i="
+                           << indel_to_realign.size() << "," << file_i;
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Nearby realignment indels="
+                           << nearby_good_events.size();
+
+  realign_to_indels(indel_to_realign, //realignment_indels,
+                    buckets,
+                    BUCKET_SIZE,
+                    region_begin,
+                    reference_sequence);
+
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Realignment DONE.";
+}
+
+
+void
+streamlined_discovery(std::vector<std::string> const & hts_paths,
+                      std::string const & ref_path,
+                      std::string const & region_str,
+                      std::string const & output_dir,
+                      gyper::Vcf & vcf)
+{
+  BOOST_LOG_TRIVIAL(info) << "Start WIP on region " << region_str;
+
+  long const NUM_FILES = hts_paths.size();
+  GenomicRegion genomic_region(region_str);
+  long const region_begin{genomic_region.begin};
+  std::vector<char> reference_sequence;
+  open_and_read_reference_genome(reference_sequence, ref_path, genomic_region);
+  long constexpr BUCKET_SIZE{50}; // TODO make an option
+  std::vector<std::string> sample_names;
+  sample_names.resize(NUM_FILES);
+
+  std::vector<std::unique_ptr<std::vector<std::string> > > spl_hts_paths{};
+  std::vector<std::unique_ptr<std::map<SnpEvent, std::set<SnpEvent> > > > spl_snp_hq_haplotypes{};
+  long jobs{1};
+
+  {
+    long num_parts{1};
+    _determine_num_jobs_and_num_parts(jobs, num_parts, NUM_FILES);
+
+    {
+      auto it = hts_paths.cbegin();
+      spl_hts_paths.reserve(num_parts);
+      spl_snp_hq_haplotypes.reserve(num_parts);
+
+      for (long i = 0; i < num_parts; ++i)
+      {
+        auto end_it = it + NUM_FILES / num_parts + (i < (NUM_FILES % num_parts));
+        assert(std::distance(hts_paths.cbegin(), end_it) <= NUM_FILES);
+        spl_hts_paths.emplace_back(new std::vector<std::string>(it, end_it));
+        spl_snp_hq_haplotypes.emplace_back(new std::map<SnpEvent, std::set<SnpEvent> >());
+        it = end_it;
+      }
+    }
+  }
+
+  // New pipeline which splits into two passes: (1) discover potential indels (2) discover SNPs+genotypes all variants
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " First pass starting.";
+  long const NUM_POOLS{static_cast<long>(spl_hts_paths.size())};
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Number of pools = " << NUM_POOLS;
+  Tindel_events indel_events;
+  long NUM_BUCKETS{0};
+
+  // FIRST PASS
+  {
+    std::vector<std::vector<BucketFirstPass> > file_buckets_first_pass(NUM_FILES);
+
+    // Parallel first pass
+    {
+      paw::Station first_station(jobs);
+      long file_i{0};
+
+      for (long i{0}; i < NUM_POOLS - 1; ++i)
+      {
+        first_station.add_work(parallel_first_pass,
+                               &(*spl_hts_paths[i]),
+                               &(*spl_snp_hq_haplotypes[i]),
+                               &file_buckets_first_pass,
+                               &sample_names,
+                               &reference_sequence,
+                               BUCKET_SIZE,
+                               region_begin,
+                               file_i);
+
+        file_i += spl_hts_paths[i]->size();
+      }
+
+      // Do the last pool on the current thread
+      first_station.add_to_thread(jobs - 1,
+                                  parallel_first_pass,
+                                  &(*spl_hts_paths[NUM_POOLS - 1]),
+                                  &(*spl_snp_hq_haplotypes[NUM_POOLS - 1]),
+                                  &file_buckets_first_pass,
+                                  &sample_names,
+                                  &reference_sequence,
+                                  BUCKET_SIZE,
+                                  region_begin,
+                                  file_i);
+
+      first_station.join();
+    }
+
+    // Find SNPs and their SNP haplotypes
+    std::map<SnpEvent, std::set<SnpEvent> > snp_haplotypes = std::move(*spl_snp_hq_haplotypes[0]);
+
+    for (long i{1}; i < NUM_POOLS; ++i)
+    {
+      std::map<SnpEvent, std::set<SnpEvent> > const & pool_snp_haplotypes = *spl_snp_hq_haplotypes[i];
+
+      for (auto it = pool_snp_haplotypes.begin(); it != pool_snp_haplotypes.end(); ++it)
+      {
+        auto insert_it = snp_haplotypes.insert(*it);
+
+        if (!insert_it.second)
+        {
+          std::copy(it->second.begin(),
+                    it->second.end(),
+                    std::inserter(insert_it.first->second, insert_it.first->second.begin()));
+        }
+      }
+    }
+
+    for (auto const & snp_haplotype : snp_haplotypes)
+    {
+      BOOST_LOG_TRIVIAL(info) << __HERE__ << " SNP=" << snp_haplotype.first.to_string();
+
+      for (auto const & hap : snp_haplotype.second)
+      {
+        BOOST_LOG_TRIVIAL(info) << __HERE__ << " phased with=" << hap.to_string();
+      }
+    }
+
+    // Create buckets
+    for (auto && buckets_first : file_buckets_first_pass)
+    {
+      if (static_cast<long>(buckets_first.size()) > NUM_BUCKETS)
+        NUM_BUCKETS = buckets_first.size();
+
+      for (long b{0}; b < static_cast<long>(buckets_first.size()); ++b)
+      {
+        auto && bucket_first = buckets_first[b];
+
+        for (auto && indel_event : bucket_first.indel_events)
+        {
+          auto find_it = indel_events.find(indel_event.first);
+
+          if (find_it == indel_events.end())
+          {
+            // Not found - Insert it
+            indel_events.insert(std::move(indel_event));
+          }
+          else
+          {
+            // Found - Check if support is better
+            if (indel_event.second.max_log_qual > find_it->second.max_log_qual)
+            {
+              find_it->second.max_log_qual = indel_event.second.max_log_qual;
+              find_it->second.max_log_qual_file_i = indel_event.second.max_log_qual_file_i;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // FIRST PASS ENDS
+  BOOST_LOG_TRIVIAL(info) << __HERE__ << " First pass DONE. Starting second pass.";
+
+
+  // SECOND PASS BEGINS
+  {
+    std::vector<std::vector<Tindel_events::iterator> > indel_to_realign;
+    indel_to_realign.resize(NUM_FILES);
+
+    // Add indels to events
+    for (auto it = indel_events.begin(); it != indel_events.end(); ++it)
+    {
+      it->second.count = 0;
+      assert(it->second.anti_count == 0);
+      assert(it->second.multi_count == 0);
+      assert(it->second.has_realignment_support);
+
+      if (!it->second.has_good_support)
+      {
+        BOOST_LOG_TRIVIAL(info) << __HERE__ << " realignment indel=" << it->first.to_string() << " qual,file_i="
+                                << it->second.max_log_qual << " " << it->second.max_log_qual_file_i;
+
+        assert(it->second.max_log_qual_file_i < static_cast<long>(indel_to_realign.size()));
+        indel_to_realign[it->second.max_log_qual_file_i].push_back(it);
+      }
+    }
+
+    {
+      paw::Station second_station(jobs);
+
+      for (long file_i{0}; file_i < (NUM_FILES - 1); ++file_i)
+      {
+        if (indel_to_realign[file_i].size() > 0)
+        {
+          second_station.add_work(parallel_second_pass,
+                                  &(hts_paths[file_i]),
+                                  &reference_sequence,
+                                  &indel_events,
+                                  &(indel_to_realign[file_i]),
+                                  BUCKET_SIZE,
+                                  NUM_BUCKETS,
+                                  region_begin,
+                                  file_i);
+        }
+      }
+
+      // Last file is run on current thread
+      long const file_i{(NUM_FILES - 1)};
+
+      if (indel_to_realign[file_i].size() > 0)
+      {
+        second_station.add_to_thread(jobs - 1,
+                                     parallel_second_pass,
+                                     &(hts_paths[file_i]),
+                                     &reference_sequence,
+                                     &indel_events,
+                                     &(indel_to_realign[file_i]),
+                                     BUCKET_SIZE,
+                                     NUM_BUCKETS,
+                                     region_begin,
+                                     file_i);
+      }
+
+      second_station.join();
+    }
+
+    vcf.open(WRITE_BGZF_MODE, output_dir + "/indel_sites.vcf.gz");
+    uint64_t const chromosome_offset = genomic_region.get_absolute_position(1);
+
+    for (auto it = indel_events.begin(); it != indel_events.end();)  // No increment
+    {
+      if (it->second.has_good_support)
+      {
+        IndelEvent const & indel_event = it->first;
+
+        Variant variant{};
+        uint32_t const abs_pos = indel_event.pos + chromosome_offset;
+
+        if (indel_event.type == 'D')
+        {
+          variant.abs_pos = abs_pos;
+          variant.seqs.push_back({indel_event.sequence});
+          variant.seqs.push_back({});
+          variant.add_base_in_front(true);
+          variant.type = 'D';
+        }
+        else if (indel_event.type == 'I')
+        {
+          variant.abs_pos = abs_pos;
+          variant.seqs.push_back({});
+          variant.seqs.push_back({indel_event.sequence});
+          variant.add_base_in_front(true);
+          variant.type = 'I';
+        }
+
+        vcf.variants.push_back(std::move(variant));
+
+        // clean indel info
+        it->second.clean_counts();
+        ++it;
+      }
+      else
+      {
+        BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Removing indel=" << it->first.to_string();
+        it = indel_events.erase(it);
+      }
+    }
+
+#ifndef NDEBUG
+    vcf.write();
+#endif // NDEBUG
+  }
+
+  BOOST_LOG_TRIVIAL(info) << __HERE__ << " WIP done";
+}
+
+
 } // namespace gyper
diff --git a/src/typer/event.cpp b/src/typer/event.cpp
new file mode 100644
index 0000000..6a80902
--- /dev/null
+++ b/src/typer/event.cpp
@@ -0,0 +1,410 @@
+#include <string>
+#include <vector>
+
+#include <boost/log/trivial.hpp>
+
+#include <graphtyper/constants.hpp>
+#include <graphtyper/typer/event.hpp>
+#include <graphtyper/utilities/options.hpp>
+
+
+namespace gyper
+{
+
+long
+base2index(char const base)
+{
+  switch (base)
+  {
+  case 'A':
+  {
+    return 0;
+  }
+
+  case 'C':
+  {
+    return 1;
+  }
+
+  case 'G':
+  {
+    return 2;
+  }
+
+  case 'T':
+  {
+    return 3;
+  }
+
+  case 'N':
+  {
+    return -1;
+  }
+
+  case '*':
+  {
+    return -2;
+  }
+
+  default:
+  {
+    BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unexpected base '" << base << "'";
+    std::exit(1);
+  }
+  }   // switch ends
+}
+
+
+long
+BaseCount::get_depth_without_deleted() const
+{
+  return acgt[0] + acgt[1] + acgt[2] + acgt[3];
+}
+
+
+long
+BaseCount::get_depth_with_deleted() const
+{
+  return acgt[0] + acgt[1] + acgt[2] + acgt[3] + deleted;
+}
+
+
+std::string
+BaseCount::to_string() const
+{
+  std::ostringstream ss;
+  ss << acgt[0] << "," << acgt[1] << "," << acgt[2] << "," << acgt[3] << ":"
+     << acgt_qualsum[0] << "," << acgt_qualsum[1] << "," << acgt_qualsum[2]
+     << "," << acgt_qualsum[3] << " " << deleted;
+  return ss.str();
+}
+
+
+void
+BaseCount::add_base(char seq, char qual)
+{
+  long i = base2index(seq);
+
+  if (i < 0)
+  {
+    if (i == -1)
+    {
+      ++unknown;
+      return;
+    }
+    else
+    {
+      assert(i == -2);
+      // -2 is deleted base
+      ++deleted;
+      return;
+    }
+  }
+
+  assert(i < 4);
+  ++(acgt[i]);
+  acgt_qualsum[i] += static_cast<int64_t>(qual);
+}
+
+
+bool
+IndelEvent::operator==(IndelEvent const & b) const
+{
+  return (pos == b.pos) &&
+         (type == b.type) &&
+         (sequence == b.sequence);
+}
+
+
+bool
+IndelEvent::operator!=(IndelEvent const & b) const
+{
+  return !(*this == b);
+}
+
+
+bool
+IndelEvent::operator<(IndelEvent const & b) const
+{
+  return pos < b.pos || (pos == b.pos && type < b.type) || (pos == b.pos && type == b.type && sequence < b.sequence);
+}
+
+
+bool
+SnpEvent::operator==(SnpEvent const & b) const
+{
+  return (pos == b.pos) && (base == b.base);
+}
+
+
+bool
+SnpEvent::operator!=(SnpEvent const & b) const
+{
+  return !(*this == b);
+}
+
+
+bool
+SnpEvent::operator<(SnpEvent const & b) const
+{
+  return pos < b.pos || (pos == b.pos && base < b.base);
+}
+
+
+double
+SnpEventInfo::corrected_support() const
+{
+  return static_cast<double>(hq_count) + static_cast<double>(lq_count) / 2.0;
+}
+
+
+bool
+SnpEventInfo::has_good_support(long const cov) const
+{
+  int const _depth = hq_count + lq_count;
+  bool const is_promising = uniq_pos3 != -1 && hq_count >= 4 && proper_pairs >= 3;
+  gyper::Options const & copts = *(Options::const_instance());
+
+  return (copts.no_filter_on_begin_pos || uniq_pos2 != -1)
+         &&
+         (!copts.filter_on_proper_pairs || (proper_pairs >= 2))
+         &&
+         (hq_count >= 3)
+         &&
+         (!copts.filter_on_read_bias ||
+          is_promising ||
+          (first_in_pairs > 0 && first_in_pairs < _depth))
+         &&
+         (!copts.filter_on_strand_bias ||
+          (is_promising && sequence_reversed > 0 && sequence_reversed < _depth) ||
+          (sequence_reversed > 1 && sequence_reversed < (_depth - 1)))
+         &&
+         ((clipped + 3) <= _depth)
+         &&
+         ((is_promising && hq_count >= 7) || max_distance > 10)
+         &&
+         (corrected_support() >= 3.9)
+         &&
+         (cov <= 0 || (static_cast<double>(_depth) / static_cast<double>(cov) > 0.26));
+}
+
+
+std::string
+SnpEventInfo::to_string() const
+{
+  std::ostringstream ss;
+
+  ss << "hq=" << hq_count
+     << ",lq=" << lq_count
+     << ",pp=" << proper_pairs
+     << ",first=" << first_in_pairs
+     << ",reversed=" << sequence_reversed
+     << ",clipped=" << clipped
+     << ",max_mapq=" << static_cast<long>(max_mapq)
+     << ",max_distance=" << static_cast<long>(max_distance)
+     << ",uniq_pos1=" << uniq_pos1
+     << ",uniq_pos2=" << uniq_pos2
+     << ",uniq_pos3=" << uniq_pos3
+     << ",phase_num=" << phase.size();
+
+  return ss.str();
+}
+
+
+std::vector<uint8_t>
+get_phred_biallelic(uint32_t count, uint32_t anti_count, uint32_t eps)
+{
+  std::vector<uint8_t> phred(3);
+
+  uint64_t gt00 = count * eps;
+  uint64_t gt01 = count + anti_count;
+  uint64_t gt11 = anti_count * eps;
+  uint64_t const min_gt = std::min({gt00, gt01, gt11});
+  gt00 = (gt00 - min_gt) * 3ul;
+  gt01 = (gt01 - min_gt) * 3ul;
+  gt11 = (gt11 - min_gt) * 3ul;
+
+  uint64_t constexpr MAX = std::numeric_limits<uint8_t>::max();
+  phred[0] = std::min(gt00, MAX);
+  phred[1] = std::min(gt01, MAX);
+  phred[2] = std::min(gt11, MAX);
+  return phred;
+}
+
+
+uint32_t
+get_log_qual(uint32_t count, uint32_t anti_count, uint32_t eps)
+{
+  uint32_t const gt00 = count * eps;
+  uint32_t const gt01 = count + anti_count;
+  uint32_t const gt11 = anti_count * eps;
+  uint32_t const gt_alt = std::min(gt01, gt11);
+
+  if (gt00 > gt_alt)
+    return gt00 - gt_alt;
+  else
+    return 0u;
+}
+
+
+uint32_t
+get_log_qual_double(double count, double anti_count, double eps)
+{
+  double const gt00 = count * eps;
+  double const gt01 = count + anti_count;
+  double const gt11 = anti_count * eps;
+  double const gt_alt = std::min(gt01, gt11);
+
+  if (gt00 > gt_alt)
+    return static_cast<uint32_t>(gt00 - gt_alt + 0.5);
+  else
+    return 0u;
+}
+
+
+uint32_t
+EventInfo::log_qual(uint32_t eps) const
+{
+  return get_log_qual(count, anti_count, eps);
+}
+
+
+void
+EventInfo::clean_counts()
+{
+  count = 0;
+  anti_count = 0;
+  multi_count = 0;
+}
+
+
+bool
+apply_indel_event(std::vector<char> & sequence,
+                  std::vector<int32_t> & ref_positions,
+                  IndelEvent const & indel_event,
+                  long const offset,
+                  bool const is_debug)
+{
+  assert(sequence.size() == ref_positions.size());
+  long const ref_pos{indel_event.pos - offset};
+
+  if (ref_pos <= 0)
+    return false;
+
+  long pos{ref_pos}; // Start the search for the reference position here
+  long const event_size{static_cast<long>(indel_event.sequence.size())};
+  long const seq_size{static_cast<long>(sequence.size())};
+  assert(pos < seq_size);
+
+  if (ref_positions[pos] != ref_pos)
+  {
+    while ((pos + 1) < seq_size && ref_positions[pos] < ref_pos)
+      ++pos;
+
+    while (pos > 0 && ref_positions[pos] > ref_pos)
+      --pos;
+
+    // Check again
+    if (ref_positions[pos] != ref_pos)
+    {
+      if (is_debug)
+        BOOST_LOG_TRIVIAL(warning) << __HERE__ << " Couldn't find pos for event " << indel_event.to_string();
+
+      return false;
+    }
+  }
+
+  // Check purity
+  {
+    long constexpr PURITY_PAD{3};
+    long const begin = std::max(0l, pos - PURITY_PAD);
+    long const end = std::min(static_cast<long>(ref_positions.size()), pos + PURITY_PAD);
+
+    assert(begin >= 0);
+    assert(begin <= end);
+    assert(end <= static_cast<long>(ref_positions.size()));
+
+    long prev_ref_pos = ref_positions[begin];
+
+    for (long p{begin + 1}; p < end; ++p)
+    {
+      if (ref_positions[p] == prev_ref_pos + 1)
+      {
+        ++prev_ref_pos;
+      }
+      else
+      {
+        if (is_debug)
+          BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Not pure " << indel_event.to_string();
+
+        return false; // Not pure, skip indel
+      }
+    }
+  }
+
+  if (indel_event.type == 'D')
+  {
+    if ((pos + event_size) >= static_cast<long>(ref_positions.size()) ||
+        ref_positions[pos + event_size] != (ref_pos + event_size))
+    {
+      if (is_debug)
+        BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Cannot apply deletion";
+
+      return false;
+    }
+
+    std::vector<char>::iterator end_it = sequence.end();
+    std::vector<int32_t>::iterator end_pos_it = ref_positions.end();
+
+    if ((pos + event_size) < seq_size)
+    {
+      end_it = sequence.begin() + pos + event_size;
+      end_pos_it = ref_positions.begin() + pos + event_size;
+    }
+
+    sequence.erase(sequence.begin() + pos, end_it);
+    ref_positions.erase(ref_positions.begin() + pos, end_pos_it);
+  }
+  else if (indel_event.type == 'I')
+  {
+    sequence.insert(sequence.begin() + pos, indel_event.sequence.begin(), indel_event.sequence.end());
+
+    std::vector<int32_t> new_pos(event_size, pos + 1);
+    ref_positions.insert(ref_positions.begin() + pos + 1, new_pos.begin(), new_pos.end());
+  }
+  else
+  {
+    BOOST_LOG_TRIVIAL(error) << "Unknown type: " << indel_event.type;
+    return false;
+  }
+
+  assert(sequence.size() == ref_positions.size());
+  return true;
+}
+
+
+IndelEvent
+make_deletion_event(std::vector<char> const & reference_sequence, long ref_offset, int32_t pos, long count)
+{
+  IndelEvent new_event(pos, 'D');
+
+  assert(ref_offset >= 0);
+  assert(ref_offset + count < static_cast<long>(reference_sequence.size()));
+
+  new_event.sequence = std::vector<char>(reference_sequence.begin() + ref_offset,
+                                         reference_sequence.begin() + ref_offset + count);
+
+  return new_event;
+}
+
+
+IndelEvent
+make_insertion_event(int32_t pos, std::vector<char> && event_sequence)
+{
+  IndelEvent new_event(pos, 'I');
+  new_event.sequence = std::move(event_sequence);
+  return new_event;
+}
+
+
+} // namespace gyper
diff --git a/src/typer/genotype_paths.cpp b/src/typer/genotype_paths.cpp
index f434555..d882134 100644
--- a/src/typer/genotype_paths.cpp
+++ b/src/typer/genotype_paths.cpp
@@ -88,6 +88,8 @@ GenotypePaths::GenotypePaths()
   , flags(0)
   , longest_path_length(0)
   , original_pos(0)
+  , score_diff(0)
+//  , original_clipped_bp(0)
   , mapq(255)
   , ml_insert_size(INSERT_SIZE_WHEN_NOT_PROPER_PAIR)
 {
@@ -108,6 +110,8 @@ GenotypePaths::GenotypePaths(GenotypePaths const & b)
   , flags(b.flags)
   , longest_path_length(b.longest_path_length)
   , original_pos(b.original_pos)
+  , score_diff(b.score_diff)
+//  , original_clipped_bp(b.original_clipped_bp)
   , mapq(b.mapq)
   , ml_insert_size(b.ml_insert_size)
 {
@@ -130,6 +134,8 @@ GenotypePaths::GenotypePaths(GenotypePaths && b)
   , flags(b.flags)
   , longest_path_length(b.longest_path_length)
   , original_pos(b.original_pos)
+  , score_diff(b.score_diff)
+//  , original_clipped_bp(b.original_clipped_bp)
   , mapq(b.mapq)
   , ml_insert_size(b.ml_insert_size)
 {
@@ -152,6 +158,8 @@ GenotypePaths::operator=(GenotypePaths const & b)
   flags = b.flags;
   longest_path_length = b.longest_path_length;
   original_pos = b.original_pos;
+  score_diff = b.score_diff;
+//  original_clipped_bp = b.original_clipped_bp;
   mapq = b.mapq;
   ml_insert_size = b.ml_insert_size;
 
@@ -185,6 +193,8 @@ GenotypePaths::operator=(GenotypePaths && b)
   flags = b.flags;
   longest_path_length = b.longest_path_length;
   original_pos = b.original_pos;
+  score_diff = b.score_diff;
+//  original_clipped_bp = b.original_clipped_bp;
   mapq = b.mapq;
   ml_insert_size = b.ml_insert_size;
 
@@ -207,6 +217,8 @@ GenotypePaths::GenotypePaths(int16_t _flags, std::size_t _read_length)
   , flags(_flags)
   , longest_path_length(0)
   , original_pos(0)
+  , score_diff(0)
+//  , original_clipped_bp(0)
   , mapq(255)
   , ml_insert_size(INSERT_SIZE_WHEN_NOT_PROPER_PAIR)
 {
@@ -700,13 +712,13 @@ GenotypePaths::find_new_variants() const
     assert(reference.size() == read2.size());
 
     {
-      int matches = -1; // -1 means we have not found a mismatch yet
+      int matches{-1}; // -1 means we have not found a mismatch yet
       std::vector<char> ref;
       std::vector<char> alt;
-      int64_t const MIN_VAR_THRESHOLD = 5;
-      uint32_t var_pos = 0;
+      int64_t constexpr MIN_VAR_THRESHOLD{5};
+      uint32_t var_pos{0};
 
-      for (unsigned i = 0; i < reference.size(); ++i)
+      for (unsigned i{0}; i < reference.size(); ++i)
       {
         assert(i < read2.size());
         assert(i < qual2.size());
diff --git a/src/typer/read.cpp b/src/typer/read.cpp
new file mode 100644
index 0000000..c119e91
--- /dev/null
+++ b/src/typer/read.cpp
@@ -0,0 +1,103 @@
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include <boost/log/trivial.hpp>
+
+#include <parallel_hashmap/phmap.h>
+
+#include <graphtyper/constants.hpp>
+#include <graphtyper/typer/read.hpp>
+
+
+namespace gyper
+{
+
+bool
+Alignment::has_indel_event(Tindel_events::iterator indel_event) const
+{
+  for (auto const & e : indel_events)
+  {
+    if (e.event_it == indel_event)
+      return e.read_pos != READ_ANTI_SUPPORT;
+  }
+
+  return false;
+}
+
+
+bool
+Alignment::is_clipped() const
+{
+  return num_clipped_begin != 0 || num_clipped_end != 0;
+}
+
+
+void
+Alignment::add_indel_event(int32_t pos, Tindel_events::iterator indel_it)
+{
+  if (pos == READ_ANTI_SUPPORT)
+    ++indel_it->second.anti_count;
+  else if (pos == READ_MULTI_SUPPORT)
+    ++indel_it->second.multi_count;
+  else
+    ++indel_it->second.count;
+
+  indel_events.push_back({pos, indel_it});
+}
+
+
+void
+Alignment::replace_indel_events(std::vector<ReadIndelEvent> && new_indel_events)
+{
+  // Lower count on old events
+  for (auto & event_and_info : indel_events)
+  {
+    if (event_and_info.read_pos == READ_ANTI_SUPPORT)
+      --event_and_info.event_it->second.anti_count;
+    else if (event_and_info.read_pos == READ_MULTI_SUPPORT)
+      --event_and_info.event_it->second.multi_count;
+    else
+      --event_and_info.event_it->second.count;
+  }
+
+  // Increase count on new events
+  for (auto & event_and_info : new_indel_events)
+  {
+    if (event_and_info.read_pos == READ_ANTI_SUPPORT)
+      ++event_and_info.event_it->second.anti_count;
+    else if (event_and_info.read_pos == READ_MULTI_SUPPORT)
+      ++event_and_info.event_it->second.multi_count;
+    else
+      ++event_and_info.event_it->second.count;
+  }
+
+  indel_events = std::move(new_indel_events);
+}
+
+
+std::string
+Read::to_string() const
+{
+  std::ostringstream ss;
+  ss << name << " " << alignment.pos << " " << flags << " " << static_cast<long>(mapq) << "\n";
+  ss << std::string(sequence.begin(), sequence.end());
+
+  //for (auto const & q : qual)
+  //  ss << static_cast<char>(q + 33);
+
+  {
+    ss << "\nAlignment: score=" << alignment.score << " num_clipped_begin,_end=" << alignment.num_clipped_begin
+       << "," << alignment.num_clipped_end << " num_ins_begin=" << alignment.num_ins_begin;
+
+    for (auto const & support_event : alignment.indel_events)
+      ss << " pos=" << static_cast<long>(support_event.read_pos) << " " << &(*(support_event.event_it));
+
+    ss << " ";
+  }
+
+  return ss.str();
+}
+
+
+} // namespace gyper
diff --git a/src/typer/sample_call.cpp b/src/typer/sample_call.cpp
index 80b78e9..6cffe45 100644
--- a/src/typer/sample_call.cpp
+++ b/src/typer/sample_call.cpp
@@ -77,8 +77,7 @@ SampleCall::SampleCall(std::vector<uint8_t> && _phred,
 
   alt_total_depth = static_cast<uint16_t>(
     std::min(static_cast<uint32_t>(0xFFFFu),
-             static_cast<uint32_t>(alt_depth)
-             )
+             static_cast<uint32_t>(alt_depth))
     );
 
   assert(alt_total_depth >= ambiguous_depth);
@@ -100,6 +99,13 @@ SampleCall::get_unique_depth() const
 }
 
 
+uint32_t
+SampleCall::get_alt_depth() const
+{
+  return std::accumulate(coverage.begin() + 1, coverage.end(), static_cast<uint32_t>(ambiguous_depth));
+}
+
+
 std::pair<uint16_t, uint16_t>
 SampleCall::get_gt_call() const
 {
@@ -148,6 +154,34 @@ SampleCall::get_gq() const
 }
 
 
+uint8_t
+SampleCall::get_lowest_phred_not_with(uint16_t allele) const
+{
+  long i{0};
+  uint8_t min_phred{255};
+
+  for (long y{0}; y < static_cast<long>(coverage.size()); ++y)
+  {
+    if (y == allele)
+    {
+      i += y + 1;
+      continue;
+    }
+
+    for (long x = 0; x <= y; ++x, ++i)
+    {
+      if (x == allele)
+        continue;
+
+      if (phred[i] < min_phred)
+        min_phred = phred[i];
+    }
+  }
+
+  return min_phred;
+}
+
+
 int8_t
 SampleCall::check_filter(long gq) const
 {
diff --git a/src/typer/var_stats.cpp b/src/typer/var_stats.cpp
index 1d90b87..a2b6ed7 100644
--- a/src/typer/var_stats.cpp
+++ b/src/typer/var_stats.cpp
@@ -9,6 +9,9 @@
 #include <vector> // std::vector
 
 #include <boost/algorithm/string/split.hpp> // boost::split
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/binary_oarchive.hpp>
+#include <boost/serialization/vector.hpp>
 
 #include <graphtyper/graph/absolute_position.hpp> // gyper::absolute_pos
 #include <graphtyper/typer/var_stats.hpp> // gyper::VarStats
@@ -19,14 +22,11 @@
 namespace gyper
 {
 
-VarStats::VarStats(uint16_t allele_count) noexcept
+VarStats::VarStats(std::size_t const allele_count) noexcept
 //  : realignment_distance(allele_count)
 //  , realignment_count(allele_count)
-  : read_strand(allele_count)
-//  , r1_strand_forward(allele_count)
-//  , r1_strand_reverse(allele_count)
-//  , r2_strand_forward(allele_count)
-//  , r2_strand_reverse(allele_count)
+  : per_allele(allele_count)
+  , read_strand(allele_count)
 {}
 
 
@@ -53,122 +53,126 @@ VarStats::add_realignment_distance(uint8_t const allele_id,
  * CLASS INFORMATION
  */
 
-//std::string
-//VarStats::get_realignment_count() const
-//{
-//  return join_strand_bias(realignment_count); // TODO: Rename join_strand_bias()
-//}
-//
-//
-//std::string
-//VarStats::get_realignment_distance() const
-//{
-//  return join_strand_bias(realignment_distance); // TODO: Rename join_strand_bias()
-//}
-
-
-std::string
-VarStats::get_forward_strand_bias() const
+void
+VarStats::write_stats(std::map<std::string, std::string> & infos) const
 {
-  if (read_strand.size() == 0)
-    return ".";
+  assert(per_allele.size() == read_strand.size());
+  long const num_allele = per_allele.size();
 
-  std::stringstream ss;
-  ss << (read_strand[0].r1_forward + read_strand[0].r2_forward);
-
-  for (long i = 1; i < static_cast<long>(read_strand.size()); ++i)
-    ss << ',' << (read_strand[i].r1_forward + read_strand[i].r2_forward);
+  if (num_allele <= 1)
+    return;
 
-  return ss.str();
+  infos["CR"] = std::to_string(clipped_reads);
+  infos["MQsquared"] = std::to_string(mapq_squared);
+  write_read_strand_stats(infos);
+  write_per_allele_stats(infos);
 }
 
 
-std::string
-VarStats::get_reverse_strand_bias() const
+void
+VarStats::write_per_allele_stats(std::map<std::string, std::string> & infos) const
 {
-  if (read_strand.size() == 0)
-    return ".";
-
-  std::stringstream ss;
-  ss << (read_strand[0].r1_reverse + read_strand[0].r2_reverse);
-
-  for (long i = 1; i < static_cast<long>(read_strand.size()); ++i)
-    ss << ',' << (read_strand[i].r1_reverse + read_strand[i].r2_reverse);
-
-  return ss.str();
-}
+  long const num_allele = per_allele.size();
+  assert(num_allele > 1);
 
+  std::ostringstream cr_ss;
+  std::ostringstream mq_ss;
+  std::ostringstream sd_ss;
+  std::ostringstream mm_ss;
 
-std::string
-VarStats::get_r1_forward_strand_bias() const
-{
-  if (read_strand.size() == 0)
-    return ".";
-
-  std::stringstream ss;
-  ss << read_strand[0].r1_forward;
+  {
+    VarStatsPerAllele const & stats_per_al = per_allele[0];
+    cr_ss << stats_per_al.clipped_bp;
+    mq_ss << stats_per_al.mapq_squared;
+    sd_ss << stats_per_al.score_diff;
+    mm_ss << stats_per_al.mismatches;
+  }
 
-  for (long i = 1; i < static_cast<long>(read_strand.size()); ++i)
-    ss << ',' << read_strand[i].r1_forward;
+  for (long i{1}; i < num_allele; ++i)
+  {
+    VarStatsPerAllele const & stats_per_al = per_allele[i];
+    cr_ss << ',' << stats_per_al.clipped_bp;
+    mq_ss << ',' << stats_per_al.mapq_squared;
+    sd_ss << ',' << stats_per_al.score_diff;
+    mm_ss << ',' << stats_per_al.mismatches;
+  }
 
-  return ss.str();
+  infos["CRal"] = cr_ss.str();
+  infos["MQSal"] = mq_ss.str();
+  infos["SDal"] = sd_ss.str();
+  infos["MMal"] = mm_ss.str();
 }
 
 
-std::string
-VarStats::get_r2_forward_strand_bias() const
+void
+VarStats::write_read_strand_stats(std::map<std::string, std::string> & infos) const
 {
-  if (read_strand.size() == 0)
-    return ".";
-
-  std::stringstream ss;
-  ss << read_strand[0].r2_forward;
-
-  for (long i = 1; i < static_cast<long>(read_strand.size()); ++i)
-    ss << ',' << read_strand[i].r2_forward;
-
-  return ss.str();
-}
+  long const num_allele = read_strand.size();
+  assert(num_allele > 1);
 
+  std::ostringstream f_ss;
+  std::ostringstream r_ss;
+  std::ostringstream f1_ss;
+  std::ostringstream f2_ss;
+  std::ostringstream r1_ss;
+  std::ostringstream r2_ss;
 
-std::string
-VarStats::get_r1_reverse_strand_bias() const
-{
-  if (read_strand.size() == 0)
-    return ".";
-
-  std::stringstream ss;
-  ss << read_strand[0].r1_reverse;
+  {
+    auto const & stats = read_strand[0];
+    f_ss << (stats.r1_forward + stats.r2_forward);
+    r_ss << (stats.r1_reverse + stats.r2_reverse);
+    f1_ss << stats.r1_forward;
+    f2_ss << stats.r2_forward;
+    r1_ss << stats.r1_reverse;
+    r2_ss << stats.r2_reverse;
+  }
 
-  for (long i = 1; i < static_cast<long>(read_strand.size()); ++i)
-    ss << ',' << read_strand[i].r1_reverse;
+  for (long i{1}; i < num_allele; ++i)
+  {
+    auto const & stats = read_strand[i];
+    f_ss << ',' << (stats.r1_forward + stats.r2_forward);
+    r_ss << ',' << (stats.r1_reverse + stats.r2_reverse);
+    f1_ss << ',' << stats.r1_forward;
+    f2_ss << ',' << stats.r2_forward;
+    r1_ss << ',' << stats.r1_reverse;
+    r2_ss << ',' << stats.r2_reverse;
+  }
 
-  return ss.str();
+  infos["SBF"] = f_ss.str();
+  infos["SBR"] = r_ss.str();
+  infos["SBF1"] = f1_ss.str();
+  infos["SBF2"] = f2_ss.str();
+  infos["SBR1"] = r1_ss.str();
+  infos["SBR2"] = r2_ss.str();
 }
 
 
-std::string
-VarStats::get_r2_reverse_strand_bias() const
+void
+VarStats::add_stats(VarStats const & stats)
 {
-  if (read_strand.size() == 0)
-    return ".";
-
-  std::stringstream ss;
-  ss << read_strand[0].r2_reverse;
+  assert(per_allele.size() == stats.per_allele.size());
+  assert(read_strand.size() == stats.read_strand.size());
 
-  for (long i = 1; i < static_cast<long>(read_strand.size()); ++i)
-    ss << ',' << read_strand[i].r2_reverse;
+  clipped_reads += stats.clipped_reads;
+  mapq_squared += stats.mapq_squared;
 
-  return ss.str();
-}
-
-
-void
-VarStats::add_mapq(uint8_t const new_mapq)
-{
-  // Ignore MapQ == 255, that means MapQ is unavailable
-  if (new_mapq < 255u)
-    mapq_squared += static_cast<uint64_t>(new_mapq) * static_cast<uint64_t>(new_mapq);
+  for (long i{0}; i < static_cast<long>(per_allele.size()); ++i)
+  {
+    auto & new_a = per_allele[i];
+    auto & old_a = stats.per_allele[i];
+    auto & new_rs = read_strand[i];
+    auto & old_rs = stats.read_strand[i];
+
+    new_a.clipped_bp += old_a.clipped_bp;
+    new_a.mapq_squared += old_a.mapq_squared;
+    new_a.score_diff += old_a.score_diff;
+    new_a.mismatches += old_a.mismatches;
+
+    new_rs.r1_forward += old_rs.r1_forward;
+    new_rs.r1_reverse += old_rs.r1_reverse;
+    new_rs.r2_forward += old_rs.r2_forward;
+    new_rs.r2_reverse += old_rs.r2_reverse;
+  }
 }
 
 
@@ -183,7 +187,7 @@ join_strand_bias(std::vector<T> const & bias)
   std::stringstream ss;
   ss << bias[0];
 
-  for (std::size_t i = 1; i < bias.size(); ++i)
+  for (long i = 1; i < static_cast<long>(bias.size()); ++i)
     ss << ',' << bias[i];
 
   return ss.str();
@@ -296,4 +300,21 @@ get_list_of_uncalled_alleles(std::string const & ac)
 }
 
 
+template <typename Archive>
+void
+VarStats::serialize(Archive & ar, unsigned const int /*version*/)
+{
+  ar & per_allele;
+  ar & read_strand;
+  ar & clipped_reads;
+  ar & mapq_squared;
+}
+
+
+template void VarStats::serialize<boost::archive::binary_iarchive>(boost::archive::binary_iarchive &,
+                                                                   const unsigned int);
+template void VarStats::serialize<boost::archive::binary_oarchive>(boost::archive::binary_oarchive &,
+                                                                   const unsigned int);
+
+
 } // namespace gyper
diff --git a/src/typer/variant.cpp b/src/typer/variant.cpp
index f0c3d59..01b01e9 100644
--- a/src/typer/variant.cpp
+++ b/src/typer/variant.cpp
@@ -4,6 +4,8 @@
 #include <sstream> // std::stringstream
 #include <vector> // std::vector
 
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/binary_oarchive.hpp>
 #include <boost/log/trivial.hpp>
 #include <boost/functional/hash.hpp> // boost::hash_range
 #include <boost/serialization/map.hpp>
@@ -30,82 +32,53 @@ namespace
 {
 
 void
-update_strand_bias(std::size_t const num_seqs,
-                   std::size_t new_num_seqs,
-                   std::vector<uint16_t> const & old_phred_to_new_phred,
-                   gyper::Variant const & var,
-                   gyper::Variant & new_var)
+update_per_allele_stats(std::size_t const num_seqs,
+                        std::size_t new_num_seqs,
+                        std::vector<uint16_t> const & old_phred_to_new_phred,
+                        gyper::Variant const & var,
+                        gyper::Variant & new_var)
 {
-  using gyper::get_strand_bias;
-  using gyper::join_strand_bias;
-
-  std::vector<uint64_t> seq_depths = var.get_seq_depth_of_all_alleles();
+  //assert(var.infos.count("SBF1") > 0);
 
-  std::vector<uint32_t> sbf = get_strand_bias(var.infos, "SBF");
-  std::vector<uint32_t> sbr = get_strand_bias(var.infos, "SBR");
+  // Check if any infos to update
+  if (var.stats.per_allele.size() == 0)
+    return;
 
-  std::vector<uint32_t> sbf1 = get_strand_bias(var.infos, "SBF1");
-  std::vector<uint32_t> sbf2 = get_strand_bias(var.infos, "SBF2");
-  std::vector<uint32_t> sbr1 = get_strand_bias(var.infos, "SBR1");
-  std::vector<uint32_t> sbr2 = get_strand_bias(var.infos, "SBR2");
+  gyper::VarStats const & old_stats = var.stats;//(num_seqs);
+  //old_stats.read_stats(var.infos);
 
-  assert(sbf.size() > 0);
-  assert(sbf.size() == num_seqs);
-  assert(sbr.size() == num_seqs);
-  assert(sbf1.size() == num_seqs);
-  assert(sbf2.size() == num_seqs);
-  assert(sbr1.size() == num_seqs);
-  assert(sbr2.size() == num_seqs);
+  new_var.stats = gyper::VarStats(new_num_seqs);
+  gyper::VarStats & new_stats = new_var.stats;
+  new_stats.clipped_reads = old_stats.clipped_reads;
+  new_stats.mapq_squared = old_stats.mapq_squared;
 
-  if (sbf.size() > 0 && sbr.size() > 0)
+  for (uint32_t y = 0; y < num_seqs; ++y)
   {
-    std::vector<uint32_t> new_sbf(new_num_seqs, 0u);
-    std::vector<uint32_t> new_sbr(new_num_seqs, 0u);
-
-    std::vector<uint32_t> new_sbf1(new_num_seqs, 0u);
-    std::vector<uint32_t> new_sbf2(new_num_seqs, 0u);
-    std::vector<uint32_t> new_sbr1(new_num_seqs, 0u);
-    std::vector<uint32_t> new_sbr2(new_num_seqs, 0u);
-
-//    std::vector<uint32_t> new_ra_count(new_num_seqs, 0u);
-//    std::vector<uint32_t> new_ra_dist(new_num_seqs, 0u);
-
-    for (uint32_t y = 0; y < num_seqs; ++y)
-    {
-      uint32_t new_y = old_phred_to_new_phred[y];
-
-      assert(new_y < new_num_seqs);
-      assert(y < seq_depths.size());
-      assert(y < sbf.size());
-      assert(y < sbr.size());
-      assert(y < sbf1.size());
-      assert(y < sbf2.size());
-      assert(y < sbr1.size());
-      assert(y < sbr2.size());
-
-      new_sbf[new_y] += sbf[y];
-      new_sbr[new_y] += sbr[y];
-
-      new_sbf1[new_y] += sbf1[y];
-      new_sbf2[new_y] += sbf2[y];
-      new_sbr1[new_y] += sbr1[y];
-      new_sbr2[new_y] += sbr2[y];
-
-//      new_ra_count[new_y] += ra_count[y];
-//      new_ra_dist[new_y] += ra_dist[y];
-    }
-
-    new_var.infos["SBF"] = join_strand_bias(new_sbf);
-    new_var.infos["SBR"] = join_strand_bias(new_sbr);
-
-    new_var.infos["SBF1"] = join_strand_bias(new_sbf1);
-    new_var.infos["SBF2"] = join_strand_bias(new_sbf2);
-    new_var.infos["SBR1"] = join_strand_bias(new_sbr1);
-    new_var.infos["SBR2"] = join_strand_bias(new_sbr2);
-
-//    new_var.infos["RACount"] = join_strand_bias(new_ra_count);
-//    new_var.infos["RADist"] = join_strand_bias(new_ra_dist);
+    uint32_t new_y = old_phred_to_new_phred[y];
+
+    assert(new_y < new_num_seqs);
+    assert(new_y < new_stats.per_allele.size());
+    assert(new_y < new_stats.read_strand.size());
+    assert(y < old_stats.per_allele.size());
+    assert(y < old_stats.read_strand.size());
+
+    auto const & old_a = old_stats.per_allele[y];
+    auto & new_a = new_stats.per_allele[new_y];
+    auto const & old_rs = old_stats.read_strand[y];
+    auto & new_rs = new_stats.read_strand[new_y];
+
+    new_a.clipped_bp += old_a.clipped_bp;
+    new_a.mapq_squared += old_a.mapq_squared;
+    new_a.score_diff += old_a.score_diff;
+    new_a.mismatches += old_a.mismatches;
+
+    new_rs.r1_forward += old_rs.r1_forward;
+    new_rs.r2_forward += old_rs.r2_forward;
+    new_rs.r1_reverse += old_rs.r1_reverse;
+    new_rs.r2_reverse += old_rs.r2_reverse;
   }
+
+  //new_stats.add_stats(new_var.infos);
 }
 
 
@@ -117,24 +90,29 @@ namespace gyper
 
 Variant::Variant() noexcept
   : abs_pos(0)
+  , stats(0)
 {}
 
 Variant::Variant(Variant const & var) noexcept
   : abs_pos(var.abs_pos)
   , seqs(var.seqs)
   , calls(var.calls)
+  , stats(var.stats)
   , infos(var.infos)
   , suffix_id(var.suffix_id)
   , is_info_generated(var.is_info_generated)
+  , type(var.type)
 {}
 
 Variant::Variant(Variant && var) noexcept
   : abs_pos(std::forward<uint32_t>(var.abs_pos))
   , seqs(std::forward<std::vector<std::vector<char> > >(var.seqs))
   , calls(std::forward<std::vector<SampleCall> >(var.calls))
+  , stats(std::forward<VarStats>(var.stats))
   , infos(std::forward<std::map<std::string, std::string> >(var.infos))
   , suffix_id(std::forward<std::string>(var.suffix_id))
   , is_info_generated(var.is_info_generated)
+  , type(var.type)
 {}
 
 
@@ -144,9 +122,11 @@ Variant::operator=(Variant const & o) noexcept
   abs_pos = o.abs_pos;
   seqs = o.seqs;
   calls = o.calls;
+  stats = o.stats;
   infos = o.infos;
   suffix_id = o.suffix_id;
   is_info_generated = o.is_info_generated;
+  type = o.type;
   return *this;
 }
 
@@ -157,27 +137,33 @@ Variant::operator=(Variant && o) noexcept
   abs_pos = o.abs_pos;
   seqs = std::move(o.seqs);
   calls = std::move(o.calls);
+  stats = std::move(o.stats);
   infos = std::move(o.infos);
   suffix_id = std::move(o.suffix_id);
   is_info_generated = o.is_info_generated;
+  type = o.type;
   return *this;
 }
 
 
 Variant::Variant(Genotype const & gt)
+  : stats(0)
 {
   seqs = graph.get_all_sequences_of_a_genotype(gt);
+  stats = VarStats(seqs.size());
   abs_pos = gt.id - 1; // -1 cause we always fetch one position back as well
 }
 
 
 Variant::Variant(std::vector<Genotype> const & gts, std::vector<uint16_t> const & hap_calls)
+  : stats(0)
 {
   assert(gts.size() > 0);
 
   for (long i = 0; i < static_cast<long>(hap_calls.size()); ++i)
     seqs.push_back(graph.get_sequence_of_a_haplotype_call(gts, hap_calls[i]));
 
+  stats = VarStats(seqs.size());
   abs_pos = gts[0].id - 1; // -1 cause we always fetch one position back as well
 }
 
@@ -185,6 +171,7 @@ Variant::Variant(std::vector<Genotype> const & gts, std::vector<uint16_t> const
 Variant::Variant(VariantCandidate const & var_candidate) noexcept
   : abs_pos(var_candidate.abs_pos)
   , seqs(var_candidate.seqs)
+  , stats(0)
 {}
 
 
@@ -226,7 +213,7 @@ Variant::update_camou_phred(long const ploidy)
       {
         assert(y < static_cast<long>(normalized_cov.size()));
 
-        long constexpr ERROR = 4;
+        long constexpr ERROR{4};
         long const norm_cov = normalized_cov[y];
         long phred00 = norm_cov * ERROR;
         long phred01_or_11 = cov[0];
@@ -262,12 +249,15 @@ void
 Variant::generate_infos()
 {
   // First check if INFO had already been generated
-  if (is_info_generated)
-    return;
+  //if (is_info_generated)
+  //  return;
 
   assert(seqs.size() >= 2);
   infos["RefLen"] = std::to_string(seqs[0].size());
 
+  // Write stats from VarStat object
+  stats.write_stats(infos);
+
   // Calculate AC, AN, SeqDepth, MaxVS, ABHom, ABHet, ABHomMulti, ABHetMulti
   std::vector<uint32_t> ac(seqs.size() - 1, 0u);
   std::vector<uint32_t> pass_ac(seqs.size() - 1, 0u);
@@ -276,10 +266,11 @@ Variant::generate_infos()
   long pass_an{0};
   long genotyped{0};
   uint64_t seqdepth{0};
+  std::vector<uint32_t> seqdepth_alt(seqs.size() - 1, 0u);
   std::pair<uint32_t, uint32_t> het_allele_depth = {0ul, 0ul};   // First is the first call, second is the second call
   std::pair<uint32_t, uint32_t> hom_allele_depth = {0ul, 0ul};   // First is the called allele, second is not the called one
 
-  std::vector<std::pair<uint32_t, uint32_t> > het_multi_allele_depth(seqs.size() - 1, {0u, 0u});
+  std::vector<std::pair<uint32_t, uint32_t> > het_multi_allele_depth(seqs.size(), {0u, 0u});
   std::vector<std::pair<uint32_t, uint32_t> > hom_multi_allele_depth(seqs.size(), {0u, 0u});
 
   // Max variant support and variant support ratio
@@ -292,10 +283,10 @@ Variant::generate_infos()
   uint64_t n_alt_alt{0u};
 
   // Maximum number of alt proper pairs (MaxAltPP)
-  uint8_t n_max_alt_proper_pairs = 0u;
+  uint8_t n_max_alt_proper_pairs{0u};
 
   // For calculation how many calls passed
-  long n_passed_calls = 0;
+  long n_passed_calls{0};
 
   for (auto const & sample_call : calls)
   {
@@ -312,15 +303,13 @@ Variant::generate_infos()
     n_max_alt_proper_pairs = std::max(n_max_alt_proper_pairs, sample_call.alt_proper_pair_depth);
     uint32_t const total_depth = std::accumulate(sample_call.coverage.cbegin(),
                                                  sample_call.coverage.cend(),
-                                                 0u
-                                                 );
+                                                 0u);
 
     // Skip zero (the reference)
     for (uint16_t c = 1; c < sample_call.coverage.size(); ++c)
     {
       maximum_variant_support[c - 1] = std::max(maximum_variant_support[c - 1],
-                                                sample_call.coverage[c]
-                                                );
+                                                sample_call.coverage[c]);
 
       if (total_depth > 0)
       {
@@ -328,8 +317,7 @@ Variant::generate_infos()
                                      static_cast<double>(total_depth);
         maximum_alternative_support_ratio[c - 1] =
           std::max(maximum_alternative_support_ratio[c - 1],
-                   current_ratio
-                   );
+                   current_ratio);
       }
     }
 
@@ -369,36 +357,47 @@ Variant::generate_infos()
       }
     }
 
-    // ABHetMulti and ABHomMulti is calculated on multiallelic sites only
-    if (seqs.size() > 2)
+    // ABHetMulti and ABHomMulti is calculated on multiallelic sites only - NO LONGER
+    //if (seqs.size() > 2)
     {
+      uint32_t const call_depth = sample_call.get_unique_depth();
+
       // Check if heterozygous
       if (call.first != call.second)
       {
         // Only consider heterozygous reference calls
-        if (call.first == 0u)
+        //if (call.first == 0u)
         {
-          auto const & c = call.second;
-          assert((c - 1) < static_cast<long>(het_multi_allele_depth.size()));
-          het_multi_allele_depth[c - 1].first += sample_call.coverage[0];
-          het_multi_allele_depth[c - 1].second += sample_call.coverage[c];
+          auto const & c1 = call.first;
+          auto const & c2 = call.second;
+          assert(c1 < static_cast<long>(het_multi_allele_depth.size()));
+          assert(c2 < static_cast<long>(het_multi_allele_depth.size()));
+
+          het_multi_allele_depth[c1].first += sample_call.coverage[c1];
+          het_multi_allele_depth[c1].second += call_depth - sample_call.coverage[c1];
+
+          het_multi_allele_depth[c2].first += sample_call.coverage[c2];
+          het_multi_allele_depth[c2].second += call_depth - sample_call.coverage[c2];
         }
       }
       else
       {
         auto const & c = call.first;
-        assert(c < hom_multi_allele_depth.size());
+        assert(static_cast<long>(c) < static_cast<long>(hom_multi_allele_depth.size()));
         hom_multi_allele_depth[c].first += sample_call.coverage[c];
-        hom_multi_allele_depth[c].second += std::accumulate(sample_call.coverage.cbegin(),
-                                                            sample_call.coverage.cend(),
-                                                            0u
-                                                            ) - sample_call.coverage[c];
+        hom_multi_allele_depth[c].second += call_depth - sample_call.coverage[c];
       }
     }
 
     // SeqDepth
     if (sample_call.coverage.size() > 0)
+    {
       seqdepth += sample_call.get_depth();
+      assert(sample_call.coverage.size() == seqdepth_alt.size() + 1);
+
+      for (long c{1}; c < static_cast<long>(sample_call.coverage.size()); ++c)
+        seqdepth_alt[c - 1] += sample_call.coverage[c];
+    }
 
     // AN
     {
@@ -435,7 +434,7 @@ Variant::generate_infos()
       else
         ++n_alt_alt;
     }
-  }   // for sample_call
+  } // LOOP over sample_call
 
   // Write MaxAAS
   {
@@ -486,7 +485,6 @@ Variant::generate_infos()
   // Write AN
   infos["AN"] = std::to_string(an);
 
-
   // Write AF
   {
     std::ostringstream af_ss;
@@ -573,7 +571,7 @@ Variant::generate_infos()
     infos["ABHet"] = ss_abhet.str();
   }
 
-  double info_abhom{0.0};
+  double info_abhom{0.985};
 
   // Write ABHom
   {
@@ -633,78 +631,62 @@ Variant::generate_infos()
     infos["SBAlt"] = ss_sb.str();
   }
 
-  if (seqs.size() > 2)
+  // Write ABHetMulti
   {
-    // Write ABHetMulti
-    {
-      assert(het_multi_allele_depth.size() > 0);
-      assert(het_multi_allele_depth.size() == seqs.size() - 1);
-      std::stringstream ss_abhet;
-      ss_abhet.precision(4);
+    assert(het_multi_allele_depth.size() > 0);
+    assert(het_multi_allele_depth.size() == seqs.size());
+    std::stringstream ss_abhet;
+    ss_abhet.precision(4);
 
-      for (unsigned i = 0; i < het_multi_allele_depth.size(); ++i)
-      {
-        if (i > 0)
-          ss_abhet << ",";
+    for (unsigned i = 0; i < het_multi_allele_depth.size(); ++i)
+    {
+      if (i > 0)
+        ss_abhet << ",";
 
-        auto const & het_depth_pair = het_multi_allele_depth[i];
-        uint32_t const total_het_depth = het_depth_pair.first + het_depth_pair.second;
+      auto const & het_depth_pair = het_multi_allele_depth[i];
+      uint32_t const total_het_depth = het_depth_pair.first + het_depth_pair.second;
 
-        if (total_het_depth > 0)
-        {
-          ss_abhet << (static_cast<double>(het_depth_pair.second) /
-                       static_cast<double>(total_het_depth));
-        }
-        else
-        {
-          ss_abhet << "-1";
-        }
+      if (total_het_depth > 0)
+      {
+        ss_abhet << (static_cast<double>(het_depth_pair.second) /
+                     static_cast<double>(total_het_depth));
       }
-
-      infos["ABHetMulti"] = ss_abhet.str();
-    }
-
-    // Write ABHomMulti
-    {
-      assert(hom_multi_allele_depth.size() > 0);
-      assert(hom_multi_allele_depth.size() == seqs.size());
-      std::stringstream ss_abhom;
-      ss_abhom.precision(4);
-
-      for (unsigned i = 0; i < hom_multi_allele_depth.size(); ++i)
+      else
       {
-        if (i > 0)
-          ss_abhom << ",";
-
-        auto const & hom_depth_pair = hom_multi_allele_depth[i];
-        uint32_t const total_hom_depth = hom_depth_pair.first + hom_depth_pair.second;
-
-        if (total_hom_depth > 0)
-        {
-          ss_abhom << (static_cast<double>(hom_depth_pair.first) /
-                       static_cast<double>(total_hom_depth));
-        }
-        else
-        {
-          ss_abhom << "-1";
-        }
+        ss_abhet << "-1";
       }
-
-      infos["ABHomMulti"] = ss_abhom.str();
     }
+
+    infos["ABHetMulti"] = ss_abhet.str();
   }
-  else
+
+  // Write ABHomMulti
   {
-    // Erase ABH[et|om]Multi if it is there
-    auto find_it = infos.find("ABHetMulti");
+    assert(hom_multi_allele_depth.size() > 0);
+    assert(hom_multi_allele_depth.size() == seqs.size());
+    std::stringstream ss_abhom;
+    ss_abhom.precision(4);
+
+    for (unsigned i = 0; i < hom_multi_allele_depth.size(); ++i)
+    {
+      if (i > 0)
+        ss_abhom << ",";
 
-    if (find_it != infos.end())
-      infos.erase(find_it);
+      auto const & hom_depth_pair = hom_multi_allele_depth[i];
+      uint32_t const total_hom_depth = hom_depth_pair.first + hom_depth_pair.second;
 
-    find_it = infos.find("ABHomMulti");
+      if (total_hom_depth > 0)
+      {
+        ss_abhom << (static_cast<double>(hom_depth_pair.first) /
+                     static_cast<double>(total_hom_depth));
+      }
+      else
+      {
+        ss_abhom << "-1";
+      }
+    }
 
-    if (find_it != infos.end())
-      infos.erase(find_it);
+    infos["ABHomMulti"] = ss_abhom.str();
   }
 
   // Write VarType
@@ -723,6 +705,23 @@ Variant::generate_infos()
     infos["QD"] = ss.str();
   }
 
+  std::vector<double> qd_alt = get_qual_by_depth_per_alt_allele();
+
+  // Calculate QDalt
+  {
+    std::stringstream ss;
+    ss.precision(4);
+    ss << qd_alt[0];
+
+    for (long q{1}; q < static_cast<long>(qd_alt.size()); ++q)
+    {
+      ss.precision(4);
+      ss << ',' << qd_alt[q];
+    }
+
+    infos["QDalt"] = ss.str();
+  }
+
   long info_mq{60};
 
   // Calculate MQ
@@ -736,43 +735,140 @@ Variant::generate_infos()
       if (seqdepth > 0)
       {
         double mapq = std::sqrt(mapq_squared / static_cast<double>(seqdepth));
-        info_mq = static_cast<long>(mapq + 0.499999);
+        info_mq = static_cast<long>(mapq + 0.5);
         infos["MQ"] = std::to_string(info_mq);
       }
       else
       {
-        infos["MQ"] = "255";
+        infos["MQ"] = "0";
       }
     }
   }
 
-  // Logistic regression quality filter (LOGF). Work in progress
+  assert(stats.read_strand.size() == seqs.size());
+  assert(stats.per_allele.size() == seqs.size());
+  assert(seqdepth_alt.size() + 1 == seqs.size());
+
+  // Calculate SDalt, MMalt, CRalt and MQalt
   {
-    long const info_cr = infos.count("CR") ? std::stol(infos.at("CR")) : 0;
-    long const ab_het_bin = static_cast<long>(info_ab_het * 10.0 + 0.00001);
-    long const sbalt_bin = static_cast<long>(info_sbalt * 10.0 + 0.00001);
-    double const cr_by_seqdepth = static_cast<double>(info_cr) / static_cast<double>(seqdepth);
-    double const gt_yield = static_cast<double>(genotyped) / static_cast<double>(an);
-
-    // variables:
-    //  crBySeqdepth=info_cr/seqdepth
-    //  info_qd
-    //  info_abhom
-    //  info_mq
-    //  info_pass_ratio
-    //  gtYield=genotyped/an
-    assert(ab_het_bin >= 0l);
-    assert(ab_het_bin <= 10l);
-    assert(sbalt_bin >= 0l);
-    assert(sbalt_bin <= 10l);
-
-    double const logf = get_logf(info_abhom, cr_by_seqdepth, info_mq,
-                                 info_pass_ratio, gt_yield, info_qd, ab_het_bin, sbalt_bin);
-
-    std::ostringstream ss;
-    ss.precision(4);
-    ss << logf;
-    infos["LOGF"] = ss.str();
+    std::ostringstream sd_ss;
+    std::ostringstream mm_ss;
+    std::ostringstream cr_ss;
+    std::ostringstream mq_ss;
+
+    for (long s{0}; s < static_cast<long>(seqdepth_alt.size()); ++s)
+    {
+      uint64_t const alt_depth{seqdepth_alt[s]};
+
+      if (s > 0)
+      {
+        sd_ss << ',';
+        mm_ss << ',';
+        cr_ss << ',';
+        mq_ss << ',';
+      }
+
+      if (alt_depth > 0)
+      {
+        auto const & per_al = stats.per_allele[s + 1];
+
+        sd_ss << (static_cast<double>(per_al.score_diff) / static_cast<double>(alt_depth));
+        mm_ss << (static_cast<double>(per_al.mismatches) / static_cast<double>(alt_depth) / 10.0);
+        cr_ss << (static_cast<double>(per_al.clipped_bp) / static_cast<double>(alt_depth) / 10.0);
+        mq_ss << static_cast<long>(std::sqrt(static_cast<double>(per_al.mapq_squared) /
+                                             static_cast<double>(alt_depth)) + 0.5);
+      }
+      else
+      {
+        sd_ss << "0.0";
+        mm_ss << "0.0";
+        cr_ss << "0.0";
+        mq_ss << "0";
+      }
+    }
+
+    infos["SDalt"] = sd_ss.str();
+    infos["MMalt"] = mm_ss.str();
+    infos["CRalt"] = cr_ss.str();
+    infos["MQalt"] = mq_ss.str();
+  }
+
+  std::vector<uint64_t> sb_alt(seqs.size() - 1, 0);
+
+  for (long s{0}; s < static_cast<long>(sb_alt.size()); ++s)
+    sb_alt[s] = stats.read_strand[s + 1].get_reverse_count();
+
+  // Alternative allele score (AAScore)
+  if (!graph.is_sv_graph)
+  {
+    {
+      std::vector<double> aa_score(seqdepth_alt.size(), 0);
+
+      for (long s{0}; s < static_cast<long>(seqdepth_alt.size()); ++s)
+      {
+        if (seqdepth_alt[s] > 0)
+        {
+          auto const & per_al = stats.per_allele[s + 1];
+
+          assert(s < static_cast<long>(qd_alt.size()));
+          double const alt_seq_depth = seqdepth_alt[s];
+          double const _sb = 2.0 * ((static_cast<double>(sb_alt[s]) / alt_seq_depth) - 0.5);
+          double const sb = _sb >= 0.0 ? _sb : -_sb;
+          assert(sb >= 0.0);
+          double const mm = static_cast<double>(per_al.mismatches) / alt_seq_depth / 10.0;
+          long const sd = static_cast<double>(per_al.score_diff) / alt_seq_depth + 0.5;
+          double const cr = static_cast<double>(per_al.clipped_bp) / alt_seq_depth / 10.0;
+          long const mq = std::sqrt(static_cast<double>(per_al.mapq_squared) / alt_seq_depth) + 0.5;
+
+          aa_score[s] = get_aa_score(info_abhom, sb, mm, sd, qd_alt[s], cr, mq);
+        }
+        else
+        {
+          aa_score[s] = 0.0;
+        }
+      }
+
+      std::ostringstream ss;
+      ss.precision(4);
+      ss << aa_score[0];
+
+      for (long s{1}; s < static_cast<long>(aa_score.size()); ++s)
+      {
+        ss.precision(4);
+        ss << "," << aa_score[s];
+      }
+
+      infos["AAScore"] = ss.str();
+    }
+
+    // Logistic regression quality filter (LOGF).
+    {
+      long const info_cr = infos.count("CR") ? std::stol(infos.at("CR")) : 0;
+      long const ab_het_bin = static_cast<long>(info_ab_het * 10.0 + 0.00001);
+      long const sbalt_bin = static_cast<long>(info_sbalt * 10.0 + 0.00001);
+      double const cr_by_seqdepth = static_cast<double>(info_cr) / static_cast<double>(seqdepth);
+      double const gt_yield = static_cast<double>(genotyped) / static_cast<double>(an);
+
+      // variables:
+      //  crBySeqdepth=info_cr/seqdepth
+      //  info_qd
+      //  info_abhom
+      //  info_mq
+      //  info_pass_ratio
+      //  gtYield=genotyped/an
+      assert(ab_het_bin >= 0l);
+      assert(ab_het_bin <= 10l);
+      assert(sbalt_bin >= 0l);
+      assert(sbalt_bin <= 10l);
+
+      double const logf = get_logf(info_abhom, cr_by_seqdepth, info_mq,
+                                   info_pass_ratio, gt_yield, info_qd, ab_het_bin, sbalt_bin);
+
+      std::ostringstream ss;
+      ss.precision(4);
+      ss << logf;
+      infos["LOGF"] = ss.str();
+    }
   }
 
   is_info_generated = true;
@@ -1220,18 +1316,21 @@ Variant::get_qual() const
 double
 Variant::get_qual_by_depth() const
 {
-  long total_qual = 0;
-  long total_depth = 0;
+  long total_qual{0};
+  long total_depth{0};
 
   for (auto const & sample_call : calls)
   {
     // Skip homozygous ref calls
     if (sample_call.phred[0] > 0)
     {
-      auto const & cov = sample_call.coverage;
-      total_qual += static_cast<long>(sample_call.phred[0]);
-      long const depth = std::min(10l, std::accumulate(cov.begin() + 1, cov.end(), 0l));
-      total_depth += depth;
+      long const depth = std::min(10l, static_cast<long>(sample_call.get_alt_depth()));
+
+      if (depth > 0)
+      {
+        total_qual += std::min(25l * depth, static_cast<long>(sample_call.phred[0]));
+        total_depth += depth;
+      }
     }
   }
 
@@ -1242,6 +1341,64 @@ Variant::get_qual_by_depth() const
 }
 
 
+std::vector<double>
+Variant::get_qual_by_depth_per_alt_allele() const
+{
+  assert(seqs.size() > 0);
+  long const num_alts = seqs.size() - 1;
+  std::vector<double> qd(num_alts, 0.0);
+  std::vector<long> total_qual(num_alts, 0);
+  std::vector<long> total_depth(num_alts, 0);
+
+  for (auto const & sample_call : calls)
+  {
+    // Skip homozygous ref calls
+    if (sample_call.phred[0] > 0)
+    {
+      std::pair<uint16_t, uint16_t> gt_call = sample_call.get_gt_call();
+      auto const & cov = sample_call.coverage;
+      assert(gt_call.first < cov.size());
+      assert(gt_call.second < cov.size());
+
+      if (gt_call.first > 0)
+      {
+        long const depth = std::min(10l, static_cast<long>(cov[gt_call.first] + sample_call.ambiguous_depth));
+
+        if (depth > 0)
+        {
+          total_qual[gt_call.first - 1] +=
+            std::min(25l * depth, static_cast<long>(sample_call.get_lowest_phred_not_with(gt_call.first)));
+
+          total_depth[gt_call.first - 1] += depth;
+        }
+      }
+
+      if (gt_call.first != gt_call.second)
+      {
+        assert(gt_call.second > 0);
+        long const depth = std::min(10l, static_cast<long>(cov[gt_call.second] + sample_call.ambiguous_depth));
+
+        if (depth > 0)
+        {
+          total_qual[gt_call.second - 1] +=
+            std::min(25l * depth, static_cast<long>(sample_call.get_lowest_phred_not_with(gt_call.second)));
+
+          total_depth[gt_call.second - 1] += depth;
+        }
+      }
+    }
+  }
+
+  for (long s{0}; s < num_alts; ++s)
+  {
+    if (total_depth[s] > 0)
+      qd[s] = static_cast<double>(total_qual[s]) / static_cast<double>(total_depth[s]);
+  }
+
+  return qd;
+}
+
+
 std::vector<Variant>
 make_biallelic(Variant && var)
 {
@@ -1310,7 +1467,7 @@ make_biallelic(Variant && var)
       new_var.calls.push_back(std::move(new_sample_call));
     }
 
-    update_strand_bias(var.seqs.size(), new_var.seqs.size(), old_phred_to_new_phred, var, new_var);
+    update_per_allele_stats(var.seqs.size(), new_var.seqs.size(), old_phred_to_new_phred, var, new_var);
     out.push_back(std::move(new_var));
   }
 
@@ -1326,6 +1483,7 @@ break_down_variant(Variant && var,
                    bool const is_all_biallelic)
 {
   std::vector<Variant> broken_down_vars;
+  //assert(var.stats.per_allele.size() == var.seqs.size());
 
   if (Options::const_instance()->no_decompose ||
       (var.seqs.size() == 2 &&
@@ -1624,11 +1782,11 @@ find_variant_sequences(gyper::Variant & new_var, gyper::Variant const & old_var)
     }
 
     // Update strand bias
-    update_strand_bias(old_var.seqs.size(),
-                       new_seqs.size(),
-                       old_phred_to_new_phred,
-                       old_var,
-                       new_var);
+    update_per_allele_stats(old_var.seqs.size(),
+                            new_seqs.size(),
+                            old_phred_to_new_phred,
+                            old_var,
+                            new_var);
 
     new_var.calls.push_back(std::move(new_call));
   }
@@ -1643,6 +1801,7 @@ break_multi_snps(Variant const && var)
   uint32_t const pos = var.abs_pos;
   std::vector<std::vector<char> > const & seqs = var.seqs;
   std::vector<Variant> new_vars;
+  //assert(var.infos.count("SBF1") == 1);
 
   for (long j = 0; j < static_cast<long>(seqs[0].size()); ++j)
   {
@@ -1726,8 +1885,9 @@ break_multi_snps(Variant const && var)
       new_var.calls.push_back(std::move(new_sample_call));
     }
 
-    if (var.infos.count("SBF1") == 1)
-      update_strand_bias(seqs.size(), new_var.seqs.size(), old_phred_to_new_phred, var, new_var);
+    //if (var.infos.count("SBF1") == 1)
+    //assert(var.infos.count("SBF1") == 1);
+    update_per_allele_stats(seqs.size(), new_var.seqs.size(), old_phred_to_new_phred, var, new_var);
 
     new_vars.push_back(std::move(new_var));
   }
@@ -1748,7 +1908,7 @@ break_down_skyr(Variant && var, long const reach)
                                   std::min(OPTIMAL_EXTRA, static_cast<long>(var.abs_pos) - reach - 1l) :
                                   OPTIMAL_EXTRA;
 
-  long constexpr extra_bases_after = OPTIMAL_EXTRA;
+  long constexpr extra_bases_after{OPTIMAL_EXTRA};
 
   for (long i = 0; i < extra_bases_before && var.add_base_in_front(false); ++i)
   {}
@@ -1825,7 +1985,7 @@ break_down_skyr(Variant && var, long const reach)
       new_var.calls.push_back(std::move(new_sample_call));
     }
 
-    update_strand_bias(seqs.size(), new_var.seqs.size(), old_phred_to_new_phred, var, new_var);
+    update_per_allele_stats(seqs.size(), new_var.seqs.size(), old_phred_to_new_phred, var, new_var);
     new_vars.push_back(std::move(new_var));
   }
 
@@ -1866,6 +2026,7 @@ Variant::serialize(Archive & ar, unsigned const int /*version*/)
   ar & abs_pos;
   ar & seqs;
   ar & calls;
+  ar & stats;
   ar & infos;
   ar & suffix_id;
 }
diff --git a/src/typer/variant_map.cpp b/src/typer/variant_map.cpp
index 7bfdbd4..afe03c2 100644
--- a/src/typer/variant_map.cpp
+++ b/src/typer/variant_map.cpp
@@ -415,9 +415,11 @@ VariantMap::filter_varmap_for_all()
     long const reach{-1};
     bool const is_no_variant_overlapping{true}; // Force it to be true so we wont use skyr
     bool const is_all_biallelic{false};
+    Variant new_var(var_cp);
+    new_var.infos["SBF1"] = ".";
 
     std::vector<Variant> new_broken_down_vars =
-      break_down_variant(Variant(var_cp), reach, is_no_variant_overlapping, is_all_biallelic);
+      break_down_variant(std::move(new_var), reach, is_no_variant_overlapping, is_all_biallelic);
 
     assert(new_broken_down_vars.size() != 0);
 
diff --git a/src/typer/vcf.cpp b/src/typer/vcf.cpp
index d6a5e5f..503a79f 100644
--- a/src/typer/vcf.cpp
+++ b/src/typer/vcf.cpp
@@ -25,6 +25,7 @@
 
 #include "tbx.h"
 
+
 namespace
 {
 
@@ -293,19 +294,17 @@ Vcf::read_record(bool const SITES_ONLY)
 
       std::unordered_set<std::string> const keys_to_parse(
         {
-//          "AC",
-          "CR",
+          "CR", "CRal", "CRalt",
           "END",
           "HOMSEQ"
 //          "INV3", "INV5",
           "LEFT_SVINSSEQ",
           "NCLUSTERS", "NUM_MERGED_SVS",
-          "MQsquared",
+          "MMal", "MMalt", "MQ", "MQSal", "MQsquared", "MQal",
           "OLD_VARIANT_ID", "OREND", "ORSTART",
-          "PS",
           "RELATED_SV_ID", "RIGHT_SVINSSEQ",
           "SBF", "SBF1", "SBF2",
-          "SBR", "SBR1", "SBR2",
+          "SBR", "SBR1", "SBR2", "SDal", "SDalt",
           "SVLEN", "SVTYPE", "SVSIZE", "SVMODEL", "SEQ", "SVINSSEQ", "SV_ID"
         }
         );
@@ -589,23 +588,26 @@ Vcf::write_header()
   // INFO definitions
   {
     bgzf_stream.ss
+      << "##INFO=<ID=AAScore,Number=A,Type=Float,Description=\"Alternative allele confidence score in range [0.0,1.0]."
+       " The score is determined by a logistic regression model which was trained on GIAB truth data using other INFOs"
+       " metrics as covariates.\">\n"
       << "##INFO=<ID=ABHet,Number=1,Type=Float,Description=\"Allele Balance for heterozygous"
        "calls (read count of call2/(call1+call2)) where the called genotype is call1/call2. "
        "-1 if no heterozygous calls.\">\n"
       << "##INFO=<ID=ABHom,Number=1,Type=Float,Description=\"Allele Balance for homozygous calls"
        "(read count of A/(A+O)) where A is the called allele and O is anything else. -1 if no "
        "homozygous calls.\">\n"
-      << "##INFO=<ID=ABHetMulti,Number=A,Type=Float,Description=\"List of Allele Balance values for"
-       "multiallelic heterozygous calls (alt/(ref+alt)). Each value corresponds to an alt in the "
-       "same order as they appear. -1 if not available.\">\n"
+      << "##INFO=<ID=ABHetMulti,Number=R,Type=Float,Description=\"List of Allele Balance values for"
+       " heterozygous calls (alt/(ref+alt)). -1 if not available.\">\n"
       << "##INFO=<ID=ABHomMulti,Number=R,Type=Float,Description=\"List of Allele Balance values for"
-       "multiallelic homozygous calls (A/(A+0)) where A is the called allele and O is anything "
-       "else. Each value corresponds to a ref or alt in the same order as they appear. -1 if not "
-       "available.\">\n"
+       " homozygous calls (A/(A+0)) where A is the called allele and O is anything "
+       "else. -1 if not available.\">\n"
       << "##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Number of alternate alleles in called genotypes.\">\n"
       << "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele frequency.\">\n"
       << "##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Number of alleles in called genotypes.\">\n"
-      << "##INFO=<ID=CR,Number=1,Type=Integer,Description=\"Number of clipped reads by Graphtyper.\">\n"
+      << "##INFO=<ID=CR,Number=1,Type=Integer,Description=\"Number of clipped reads in the graph alignment.\">\n"
+      << "##INFO=<ID=CRal,Number=.,Type=String,Description=\"Number of clipped reads per allele.\">\n"
+      << "##INFO=<ID=CRalt,Number=A,Type=Float,Description=\"Percent of clipped reads per allele.\">\n"
       << "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of an SV.\">\n"
       << "##INFO=<ID=HOMSEQ,Number=.,Type=String,Description=\"Sequence of base pair identical "
        "homology at event breakpoints.\">\n"
@@ -620,11 +622,16 @@ Vcf::write_header()
        "ratio per alt. allele.\">\n"
       << "##INFO=<ID=MaxAltPP,Number=1,Type=Integer,Description=\"Maximum number of proper pairs "
        "support the alternative allele.\">\n"
+      << "##INFO=<ID=MMal,Number=.,Type=String,Description=\"Scaled mismatch count per allele.\">\n"
+      << "##INFO=<ID=MMalt,Number=A,Type=Float,Description=\"Mismatch percent per alternative allele.\">\n"
       << "##INFO=<ID=MQ,Number=1,Type=Integer,Description=\"Root-mean-square mapping quality.\">\n"
-      << "##INFO=<ID=MQsquared,Number=1,Type=Integer,Description=\"Sum of squared mapping qualities. "
+      << "##INFO=<ID=MQalt,Number=A,Type=Integer,Description=\"Mapping qualities per alternative allele.\">\n"
+      << "##INFO=<ID=MQSal,Number=.,Type=String,Description=\"Sum of squared mapping qualities per allele.\">\n"
+
+      << "##INFO=<ID=MQsquared,Number=.,Type=String,Description=\"Sum of squared mapping qualities. "
        "Used to calculate MQ.\">\n"
       << "##INFO=<ID=NCLUSTERS,Number=1,Type=Integer,Description=\"Number of SV candidates in "
-       "cluster, as reported by popSVar.\">\n"
+       "cluster.\">\n"
       << "##INFO=<ID=NGT,Number=3,Type=Integer,Description=\"Number of REF/REF, REF/ALT and ALT/ALT"
        "genotypes, respectively.\">\n"
       << "##INFO=<ID=NHet,Number=1,Type=Integer,Description=\"Number of heterozygous genotype "
@@ -636,15 +643,17 @@ Vcf::write_header()
       << "##INFO=<ID=ORSTART,Number=1,Type=Integer,Description=\"Start coordinate of sequence origin.\">\n"
       << "##INFO=<ID=OREND,Number=1,Type=Integer,Description=\"End coordinate of sequence origin.\">\n"
       << "##INFO=<ID=QD,Number=1,Type=Float,Description=\"QUAL divided by NonReferenceSeqDepth.\">\n"
+      << "##INFO=<ID=QDalt,Number=A,Type=Float,Description=\"Simplified QD calculated separately for each "
+       "allele against all other alleles.\">\n"
       << "##INFO=<ID=PASS_AC,Number=A,Type=Integer,Description=\"Number of alternate alleles in "
        "called genotyped that have FT = PASS.\">\n"
       << "##INFO=<ID=PASS_AN,Number=1,Type=Integer,Description=\"Number of genotype calls that have"
        "FT = PASS.\">\n"
       << "##INFO=<ID=PASS_ratio,Number=1,Type=Float,Description=\"Ratio of genotype calls that have"
        "FT = PASS.\">\n"
-      << "##INFO=<ID=PS,Number=1,Type=Integer,Description=\"Unique ID of the phase set this variant"
-       "is a member of. If the calls are unphased, it is used to represent which haplotype set "
-       "the variant is a member of.\">\n"
+//      << "##INFO=<ID=PS,Number=1,Type=Integer,Description=\"Unique ID of the phase set this variant"
+//       "is a member of. If the calls are unphased, it is used to represent which haplotype set "
+//       "the variant is a member of.\">\n"
       << "##INFO=<ID=RefLen,Number=1,Type=Integer,Description=\"Length of the reference "
        "allele.\">\n"
       << "##INFO=<ID=RELATED_SV_ID,Number=1,Type=Integer,Description=\"GraphTyper ID of a related "
@@ -667,11 +676,14 @@ Vcf::write_header()
        "reads per allele.\">\n"
       << "##INFO=<ID=SBR2,Number=R,Type=Integer,Description=\"Number of second reverse stranded "
        "reads per allele.\">\n"
+      << "##INFO=<ID=SDal,Number=.,Type=String,Description=\"Score difference of AS and XS tags per allele.\">\n"
+      << "##INFO=<ID=SDalt,Number=A,Type=Float,Description=\"Avergae score difference of AS and XS "
+       "tags per alternative allele.\">\n"
       << "##INFO=<ID=SEQ,Number=1,Type=String,Description=\"Inserted sequence at variant site.\">\n"
       << "##INFO=<ID=SeqDepth,Number=1,Type=Integer,Description=\"Total accumulated sequencing "
        "depth over all the samples.\">\n"
-      << "##INFO=<ID=SV_ID,Number=1,Type=Integer,Description=\"GraphTyper's ID on SV\">\n"
-      << "##INFO=<ID=SVINSSEQ,Number=.,Type=String,Description=\"Sequence of insertion\">\n"
+      << "##INFO=<ID=SV_ID,Number=1,Type=Integer,Description=\"GraphTyper's ID on SV.\">\n"
+      << "##INFO=<ID=SVINSSEQ,Number=.,Type=String,Description=\"Sequence of insertion.\">\n"
       << "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of structural variant in bp."
        " Negative lengths indicate a deletion.\">\n"
       << "##INFO=<ID=SVMODEL,Number=1,Type=String,Description=\"Model used for SV genotyping.\">\n"
@@ -1185,7 +1197,11 @@ Vcf::close_vcf_file()
 void
 Vcf::write_tbi_index() const
 {
-  int ret = tbx_index_build(filename.c_str(), 0, &tbx_conf_vcf);
+  bool const is_csi = Options::const_instance()->is_csi;
+
+  int ret = is_csi ?
+            tbx_index_build(filename.c_str(), 14, &tbx_conf_vcf) :
+            tbx_index_build(filename.c_str(), 0, &tbx_conf_vcf);
 
   if (ret < 0)
     BOOST_LOG_TRIVIAL(warning) << __HERE__ << " Could not build VCF index";
@@ -1219,7 +1235,7 @@ Vcf::add_segment(Segment && segment)
 
 
 void
-Vcf::add_haplotype(Haplotype & haplotype, bool const clear_haplotypes, uint32_t const phase_set)
+Vcf::add_haplotype(Haplotype & haplotype, bool const clear_haplotypes, uint32_t const /*phase_set*/)
 {
   assert(haplotype.gts.size() > 0);
 
@@ -1236,18 +1252,12 @@ Vcf::add_haplotype(Haplotype & haplotype, bool const clear_haplotypes, uint32_t
   // Add variant stats
   for (long i = 0; i < static_cast<long>(new_vars.size()); ++i)
   {
-    auto const & stat = haplotype.var_stats[i];
+    VarStats const & stats = haplotype.var_stats[i];
     auto & new_var = new_vars[i];
 
-    new_var.infos["CR"] = std::to_string(stat.clipped_reads);
-    new_var.infos["PS"] = std::to_string(phase_set);
-    new_var.infos["MQsquared"] = std::to_string(stat.mapq_squared);
-    new_var.infos["SBF"] = stat.get_forward_strand_bias();
-    new_var.infos["SBR"] = stat.get_reverse_strand_bias();
-    new_var.infos["SBF1"] = stat.get_r1_forward_strand_bias();
-    new_var.infos["SBF2"] = stat.get_r2_forward_strand_bias();
-    new_var.infos["SBR1"] = stat.get_r1_reverse_strand_bias();
-    new_var.infos["SBR2"] = stat.get_r2_reverse_strand_bias();
+    //new_var.infos["PS"] = std::to_string(phase_set);
+    //stats.add_stats(new_var.infos);
+    new_var.stats = stats;
   }
 
   //long const ploidy = Options::const_instance()->ploidy;
@@ -1653,8 +1663,7 @@ save_vcf(Vcf const & vcf, std::string const & filename)
   if (!ofs.is_open())
   {
     BOOST_LOG_TRIVIAL(error) << __HERE__ << " Could not save VCF to location '"
-                             << filename
-                             << "'";
+                             << filename << "'";
     std::exit(1);
   }
 
diff --git a/src/typer/vcf_operations.cpp b/src/typer/vcf_operations.cpp
index b101fc1..b76fd2f 100644
--- a/src/typer/vcf_operations.cpp
+++ b/src/typer/vcf_operations.cpp
@@ -73,82 +73,8 @@ vcf_merge(std::vector<std::string> & vcfs, std::string const & output)
   // For each variant in the first VCF, add the calls from the other VCFs
   for (auto & var : vcf.variants)
   {
-    // Only output variant if it is in the genotyping region
-    // CR
-    uint32_t number_of_clipped_reads = 0;
-
-    {
-      auto find_it = var.infos.find("CR");
-
-      if (find_it != var.infos.end())
-        number_of_clipped_reads = std::strtoull(find_it->second.c_str(), NULL, 10);
-    }
-
-    // MQ. Keep track of the total mapping quality rooted/squared
-    uint64_t mapq_squared = 0;
-
-    {
-      auto find_it = var.infos.find("MQsquared");
-
-      if (find_it != var.infos.end())
-        mapq_squared = std::strtoull(find_it->second.c_str(), NULL, 10);
-    }
-
-    // SBF and SBR
-    std::vector<uint32_t> strand_forward;
-    std::vector<uint32_t> strand_reverse;
-
-    std::vector<uint32_t> r1_strand_forward;
-    std::vector<uint32_t> r1_strand_reverse;
-    std::vector<uint32_t> r2_strand_forward;
-    std::vector<uint32_t> r2_strand_reverse;
-
-//    std::vector<uint32_t> realignment_distance;
-//    std::vector<uint32_t> realignment_count;
-
-    {
-      auto find_it = var.infos.find("SBF");
-
-      if (find_it != var.infos.end())
-        strand_forward = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBR");
-
-      if (find_it != var.infos.end())
-        strand_reverse = split_bias_to_numbers(find_it->second);
-
-      // Read specific strand bias
-      find_it = var.infos.find("SBF1");
-
-      if (find_it != var.infos.end())
-        r1_strand_forward = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBF2");
-
-      if (find_it != var.infos.end())
-        r2_strand_forward = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBR1");
-
-      if (find_it != var.infos.end())
-        r1_strand_reverse = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBR2");
-
-      if (find_it != var.infos.end())
-        r2_strand_reverse = split_bias_to_numbers(find_it->second);
-
-//      // Realignment count and distance
-//      find_it = var.infos.find("RACount");
-//
-//      if (find_it != var.infos.end())
-//        realignment_count = split_bias_to_numbers(find_it->second);
-//
-//      find_it = var.infos.find("RADist");
-//
-//      if (find_it != var.infos.end())
-//        realignment_distance = split_bias_to_numbers(find_it->second);
-    }
+    //VarStats stats(var.seqs.size());
+    //stats.read_stats(var.infos);
 
     for (auto & next_vcf : next_vcfs)
     {
@@ -166,54 +92,7 @@ vcf_merge(std::vector<std::string> & vcfs, std::string const & output)
       }
 
       assert(next_vcf.variants.size() == 1);
-
-      // Get CR
-      {
-        auto find_it = next_vcf.variants[0].infos.find("CR");
-
-        if (find_it != next_vcf.variants[0].infos.end())
-          number_of_clipped_reads += std::strtoull(find_it->second.c_str(), NULL, 10);
-      }
-
-      // Get MQ
-      {
-        auto find_it = next_vcf.variants[0].infos.find("MQsquared");
-
-        if (find_it != next_vcf.variants[0].infos.end())
-          mapq_squared += std::strtoull(find_it->second.c_str(), NULL, 10);
-      }
-
-
-      // Get SBF and SBR
-      {
-        auto add_to_bias_lambda = [&](std::string id, std::vector<uint32_t> & bias)
-                                  {
-                                    auto find_it = next_vcf.variants[0].infos.find(id);
-
-                                    if (find_it != next_vcf.variants[0].infos.end())
-                                    {
-                                      std::vector<uint32_t> split_nums =
-                                        split_bias_to_numbers(find_it->second);
-
-                                      for (std::size_t i = 0; i < split_nums.size(); ++i)
-                                      {
-                                        if (i < strand_forward.size())
-                                          bias[i] += split_nums[i];
-                                        else
-                                          bias.push_back(split_nums[i]);
-                                      }
-                                    }
-                                  };
-
-        add_to_bias_lambda("SBF", strand_forward);
-        add_to_bias_lambda("SBR", strand_reverse);
-        add_to_bias_lambda("SBF1", r1_strand_forward);
-        add_to_bias_lambda("SBF2", r2_strand_forward);
-        add_to_bias_lambda("SBR1", r1_strand_reverse);
-        add_to_bias_lambda("SBR2", r2_strand_reverse);
-//        add_to_bias_lambda("RACount", realignment_count);
-//        add_to_bias_lambda("RADist", realignment_distance);
-      }
+      var.stats.add_stats(next_vcf.variants[0].stats);
 
       std::move(next_vcf.variants[0].calls.begin(),
                 next_vcf.variants[0].calls.end(),
@@ -222,23 +101,7 @@ vcf_merge(std::vector<std::string> & vcfs, std::string const & output)
       next_vcf.variants.clear();
     }
 
-    // Add strand bias, this must happend before the INFO is generated
-    var.infos["SBF"] = join_strand_bias(strand_forward);
-    var.infos["SBR"] = join_strand_bias(strand_reverse);
-
-    var.infos["SBF1"] = join_strand_bias(r1_strand_forward);
-    var.infos["SBF2"] = join_strand_bias(r2_strand_forward);
-    var.infos["SBR1"] = join_strand_bias(r1_strand_reverse);
-    var.infos["SBR2"] = join_strand_bias(r2_strand_reverse);
-
-//    var.infos["RACount"] = join_strand_bias(realignment_count);
-//    var.infos["RADist"] = join_strand_bias(realignment_distance);
-
-    // Add MQsquared
-    var.infos["MQsquared"] = std::to_string(mapq_squared);
-
-    // Add CR
-    var.infos["CR"] = std::to_string(number_of_clipped_reads);
+    //stats.add_stats(var.infos);
 
     // generate the rest of the INFOs
     var.generate_infos();
@@ -298,8 +161,7 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
   long const ploidy = copts.ploidy;
   GenomicRegion genomic_region(region);
   uint32_t const region_begin = 1 + absolute_pos.get_absolute_position(genomic_region.chr,
-                                                                       genomic_region.begin
-                                                                       );
+                                                                       genomic_region.begin);
 
   uint32_t const region_end = absolute_pos.get_absolute_position(genomic_region.chr,
                                                                  genomic_region.end);
@@ -334,8 +196,19 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
     vcf.sample_names.insert(vcf.sample_names.end(),
                             next_vcf.sample_names.begin(),
                             next_vcf.sample_names.end());
+  }
 
+  // Check whether the sample names are unique
+  {
+    std::vector<std::string> sample_names_cp(vcf.sample_names);
+    std::sort(sample_names_cp.begin(), sample_names_cp.end());
+    auto uniq_it = std::unique(sample_names_cp.begin(), sample_names_cp.end());
 
+    if (uniq_it != sample_names_cp.end())
+    {
+      BOOST_LOG_TRIVIAL(warning) << __HERE__ << " Sample names are not unique. "
+                                 << "The output VCF file will contain duplicated sample names.";
+    }
   }
 
   // We have all the samples, write the header now
@@ -361,68 +234,8 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
   for (long v = 0; v < static_cast<long>(vcf.variants.size()); ++v)
   {
     auto & var = vcf.variants[v];
-
-    // CR
-    uint32_t number_of_clipped_reads = 0;
-
-    {
-      auto find_it = var.infos.find("CR");
-
-      if (find_it != var.infos.end())
-        number_of_clipped_reads = std::strtoull(find_it->second.c_str(), NULL, 10);
-    }
-
-    // MQ. Keep track of the total mapping quality rooted/squared
-    uint64_t mapq_squared = 0;
-
-    {
-      auto find_it = var.infos.find("MQsquared");
-
-      if (find_it != var.infos.end())
-        mapq_squared = std::strtoull(find_it->second.c_str(), NULL, 10);
-    }
-
-    // SBF and SBR
-    std::vector<uint32_t> strand_forward;
-    std::vector<uint32_t> strand_reverse;
-
-    std::vector<uint32_t> r1_strand_forward;
-    std::vector<uint32_t> r1_strand_reverse;
-    std::vector<uint32_t> r2_strand_forward;
-    std::vector<uint32_t> r2_strand_reverse;
-
-    {
-      auto find_it = var.infos.find("SBF");
-
-      if (find_it != var.infos.end())
-        strand_forward = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBR");
-
-      if (find_it != var.infos.end())
-        strand_reverse = split_bias_to_numbers(find_it->second);
-
-      // Read specific strand bias
-      find_it = var.infos.find("SBF1");
-
-      if (find_it != var.infos.end())
-        r1_strand_forward = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBF2");
-
-      if (find_it != var.infos.end())
-        r2_strand_forward = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBR1");
-
-      if (find_it != var.infos.end())
-        r1_strand_reverse = split_bias_to_numbers(find_it->second);
-
-      find_it = var.infos.find("SBR2");
-
-      if (find_it != var.infos.end())
-        r2_strand_reverse = split_bias_to_numbers(find_it->second);
-    }
+    var.is_info_generated = false;
+    //VarStats & stats = var.stats;
 
     // Trigger read next batch
     if (next_vcfs.size() > 0 && (v - v_next) == static_cast<long>(next_vcfs[0].variants.size()))
@@ -453,52 +266,7 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
       assert((v - v_next) >= 0);
       assert((v - v_next) < static_cast<long>(next_vcf.variants.size()));
       auto & next_vcf_var = next_vcf.variants[v - v_next];
-
-      // Get CR
-      {
-        auto find_it = next_vcf_var.infos.find("CR");
-
-        if (find_it != next_vcf_var.infos.end())
-          number_of_clipped_reads += std::strtoull(find_it->second.c_str(), NULL, 10);
-      }
-
-      // Get MQ
-      {
-        auto find_it = next_vcf_var.infos.find("MQsquared");
-
-        if (find_it != next_vcf_var.infos.end())
-          mapq_squared += std::strtoull(find_it->second.c_str(), NULL, 10);
-      }
-
-
-      // Get SBF and SBR
-      {
-        auto add_to_bias_lambda =
-          [&](std::string id, std::vector<uint32_t> & bias)
-          {
-            auto find_it = next_vcf_var.infos.find(id);
-
-            if (find_it != next_vcf_var.infos.end())
-            {
-              std::vector<uint32_t> split_nums = split_bias_to_numbers(find_it->second);
-
-              for (long i = 0; i < static_cast<long>(split_nums.size()); ++i)
-              {
-                if (i < static_cast<long>(strand_forward.size()))
-                  bias[i] += split_nums[i];
-                else
-                  bias.push_back(split_nums[i]);
-              }
-            }
-          };
-
-        add_to_bias_lambda("SBF", strand_forward);
-        add_to_bias_lambda("SBR", strand_reverse);
-        add_to_bias_lambda("SBF1", r1_strand_forward);
-        add_to_bias_lambda("SBF2", r2_strand_forward);
-        add_to_bias_lambda("SBR1", r1_strand_reverse);
-        add_to_bias_lambda("SBR2", r2_strand_reverse);
-      }
+      var.stats.add_stats(next_vcf_var.stats);
 
       std::move(next_vcf_var.calls.begin(),
                 next_vcf_var.calls.end(),
@@ -506,19 +274,7 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
     }
 
     // Add strand bias, this must happend before the INFO is generated
-    var.infos["SBF"] = join_strand_bias(strand_forward);
-    var.infos["SBR"] = join_strand_bias(strand_reverse);
-
-    var.infos["SBF1"] = join_strand_bias(r1_strand_forward);
-    var.infos["SBF2"] = join_strand_bias(r2_strand_forward);
-    var.infos["SBR1"] = join_strand_bias(r1_strand_reverse);
-    var.infos["SBR2"] = join_strand_bias(r2_strand_reverse);
-
-    // Add MQsquared
-    var.infos["MQsquared"] = std::to_string(mapq_squared);
-
-    // Add CR
-    var.infos["CR"] = std::to_string(number_of_clipped_reads);
+    //stats.add_stats(var.infos);
 
     if (var.calls.size() != vcf.sample_names.size())
     {
@@ -527,6 +283,9 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
       std::exit(1);
     }
 
+    //assert(var.infos.count("SBF1") == 1);
+    assert(var.stats.per_allele.size() == var.seqs.size());
+
     // break down the merged variants
     bool const is_no_variant_overlapping{copts.no_variant_overlapping ||
                                          force_no_variant_overlapping};
@@ -546,7 +305,7 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
     for (auto & new_var : new_variants)
     {
       // If we have not processed this variant before, do so now
-      if (!new_var.is_info_generated)
+      //if (!new_var.is_info_generated)
       {
         new_var.normalize();
 
@@ -555,38 +314,58 @@ vcf_merge_and_break(std::vector<std::string> const & vcfs,
 
         new_var.generate_infos();
 
-        // Remove MQsquared
-        new_var.infos.erase("MQsquared");
-        new_var.infos.erase("PS");
+        // Remove uneeded infos
+        //new_var.infos.erase("CRal");
+        //new_var.infos.erase("MMal");
+        //new_var.infos.erase("MQal");
+        //new_var.infos.erase("MQsquared");
+        //new_var.infos.erase("MQSal");
+        //new_var.infos.erase("SDal");
       }
     }
 
     update_reach(new_variants);
     std::move(new_variants.begin(), new_variants.end(), std::back_inserter(broken_vars));
 
-    long constexpr W = 500; // Print variants that are more than W bp before the newest one
+    long constexpr W{700}; // Print variants that are more than W bp before the newest one
 
-    if ((broken_vars[0].abs_pos + 2 * W) < broken_vars[broken_vars.size() - 1].abs_pos)
+    auto min_max_it_pair =
+      std::minmax_element(broken_vars.begin(),
+                          broken_vars.end(),
+                          [](Variant const & a, Variant const & b)
+      {
+        return a.abs_pos < b.abs_pos;
+      });
+
+    assert(min_max_it_pair.first != broken_vars.end());
+    assert(min_max_it_pair.second != broken_vars.end());
+    long const min_abs_pos = min_max_it_pair.first->abs_pos;
+    long const max_abs_pos = min_max_it_pair.second->abs_pos;
+
+    if ((min_abs_pos + 2l * W) < max_abs_pos)
     {
       // Make sure we do no print outside of the region
-      uint32_t const reg_end =
-        std::min(region_end, static_cast<uint32_t>(broken_vars[broken_vars.size() - 1].abs_pos - W));
-
-      vcf.write_records(region_begin,
-                        reg_end,
-                        FILTER_ZERO_QUAL,
-                        broken_vars);
+      long const reg_end =
+        std::min(static_cast<long>(region_end), max_abs_pos - static_cast<long>(W));
 
-      // Remove variants that were written (or before the region, since they will never be printed)
-      broken_vars.erase(
-        std::remove_if(broken_vars.begin(),
-                       broken_vars.end(),
-                       [&](Variant const & v)
-        {
-          return v.abs_pos <= reg_end || v.abs_pos < region_begin;
-        }),
-        broken_vars.end()
-        );
+      if (reg_end >= static_cast<long>(region_begin))
+      {
+        vcf.write_records(region_begin,
+                          static_cast<uint32_t>(reg_end),
+                          FILTER_ZERO_QUAL,
+                          broken_vars);
+
+        // Remove variants that were written (or before the region, since they will never be printed)
+        broken_vars.erase(
+          std::remove_if(broken_vars.begin(),
+                         broken_vars.end(),
+                         [&](Variant const & v)
+          {
+            return static_cast<long>(v.abs_pos) <= reg_end;
+          }),
+          broken_vars.end()
+          );
+      }
     }
 
     var = Variant(); // Free memory
@@ -608,6 +387,7 @@ vcf_concatenate(std::vector<std::string> const & vcfs,
                 std::string const & output,
                 bool const SKIP_SORT,
                 bool const SITES_ONLY,
+                bool const WRITE_TBI,
                 std::string const & region)
 {
   gyper::Vcf vcf;
@@ -701,10 +481,9 @@ vcf_concatenate(std::vector<std::string> const & vcfs,
     if (SITES_ONLY)
       vcf.sample_names.clear();
 
-    BOOST_LOG_TRIVIAL(debug) << "[graphtyper::vcf_operations] Total number of samples read is "
-                             << vcf.sample_names.size();
+    BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Total number of samples read is " << vcf.sample_names.size();
 
-    for (std::size_t i = 0; i < vcfs.size(); ++i)
+    for (long i{0}; i < static_cast<long>(vcfs.size()); ++i)
     {
       // Skip if the filename contains '*'
       if (std::count(vcfs[i].begin(), vcfs[i].end(), '*') > 0)
@@ -730,9 +509,9 @@ vcf_concatenate(std::vector<std::string> const & vcfs,
       // Merge with the other VCF
       if (next_vcf.sample_names.size() != vcf.sample_names.size())
       {
-        BOOST_LOG_TRIVIAL(error) << "[graphtyper::vcf_operations] The VCF file "
+        BOOST_LOG_TRIVIAL(error) << __HERE__ << " The VCF file "
                                  << vcfs[i]
-                                 << " has different amount of samples! ("
+                                 << " has unexpected number of sample names! ("
                                  << next_vcf.sample_names.size()
                                  << " but not " << vcf.sample_names.size() << ")";
         std::exit(1);
@@ -753,6 +532,9 @@ vcf_concatenate(std::vector<std::string> const & vcfs,
 
     vcf.write(region);
   }
+
+  if (WRITE_TBI)
+    vcf.write_tbi_index();
 }
 
 
@@ -771,33 +553,27 @@ vcf_break_down(std::string const & vcf, std::string const & output, std::string
 
   // Read the sample names and add them
   vcf_in.read_samples();
-  BOOST_LOG_TRIVIAL(debug) << "[graphtyper::vcf_break_down] "
-                           << "Total number of samples read is "
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Total number of samples read is "
                            << vcf_in.sample_names.size();
 
   // Copy sample names
   std::copy(vcf_in.sample_names.begin(),
             vcf_in.sample_names.end(),
-            std::back_inserter(vcf_out.sample_names)
-            );
+            std::back_inserter(vcf_out.sample_names));
 
   GenomicRegion genomic_region(region);
   uint32_t const region_begin = 1 + absolute_pos.get_absolute_position(genomic_region.chr,
-                                                                       genomic_region.begin
-                                                                       );
+                                                                       genomic_region.begin);
 
   uint32_t const region_end = absolute_pos.get_absolute_position(genomic_region.chr,
-                                                                 genomic_region.end
-                                                                 );
+                                                                 genomic_region.end);
 
   // Read first record
   bool not_at_end = vcf_in.read_record();
 
   // Write header to output
   vcf_out.write_header();
-  BOOST_LOG_TRIVIAL(debug) << "[graphtyper::vcf_break_down] VCF header written";
-
-  long reach = -1; // Indicates how long the previous variants reached
+  long reach{-1}; // Indicates how long the previous variants reached
 
   auto update_reach =
     [&reach](std::vector<Variant> const & new_variants)
@@ -820,8 +596,8 @@ vcf_break_down(std::string const & vcf, std::string const & output, std::string
     // Make sure the number of calls matches the number of samples
     if (vcf_in.variants[0].calls.size() != vcf_in.sample_names.size())
     {
-      BOOST_LOG_TRIVIAL(error) << "[graphtyper::vcf_operations::vcf_break_down]"
-                               << "The number of calls, "
+      BOOST_LOG_TRIVIAL(error) << __HERE__
+                               << " The number of calls, "
                                << vcf_in.variants[0].calls.size()
                                << " did not match the number of samples, "
                                << vcf_in.sample_names.size();
@@ -831,6 +607,7 @@ vcf_break_down(std::string const & vcf, std::string const & output, std::string
 
     bool const is_no_variant_overlapping{Options::const_instance()->no_variant_overlapping};
     bool const is_all_biallelic{Options::const_instance()->is_all_biallelic};
+    assert(vcf_in.variants[0].infos.count("SBF1") == 1);
     std::vector<Variant> new_variants = break_down_variant(std::move(vcf_in.variants[0]),
                                                            reach,
                                                            is_no_variant_overlapping,
diff --git a/src/typer/vcf_writer.cpp b/src/typer/vcf_writer.cpp
index 55c56f6..521c733 100644
--- a/src/typer/vcf_writer.cpp
+++ b/src/typer/vcf_writer.cpp
@@ -73,11 +73,8 @@ namespace gyper
 VcfWriter::VcfWriter(uint32_t variant_distance)
 {
   haplotypes = gyper::graph.get_all_haplotypes(variant_distance);
-  BOOST_LOG_TRIVIAL(debug) << "[graphtyper::vcf_writer] Number of variant nodes in graph "
-                           << graph.var_nodes.size();
-  BOOST_LOG_TRIVIAL(debug) << "[graphtyper::vcf_writer] Got "
-                           << haplotypes.size()
-                           << " haplotypes.";
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Number of variant nodes in graph " << graph.var_nodes.size();
+  BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Got " << haplotypes.size() << " haplotypes.";
 }
 
 
@@ -107,20 +104,25 @@ VcfWriter::set_samples(std::vector<std::string> const & samples)
 void
 VcfWriter::update_haplotype_scores_geno(GenotypePaths & geno, long const pn_index, Primers const * primers)
 {
+#ifndef NDEBUG
+  if (Options::instance()->stats.size() > 0)
+    update_statistics(geno, pn_index);
+#endif
+
   if (are_genotype_paths_good(geno))
   {
     if (primers)
       primers->check(geno);
 
     push_to_haplotype_scores(geno, pn_index);
-
-#ifndef NDEBUG
-    if (Options::instance()->stats.size() > 0)
-      update_statistics(geno, pn_index);
-  }
-#else
   }
-#endif //NDEBUG
+//#ifndef NDEBUG
+//    if (Options::instance()->stats.size() > 0)
+//      update_statistics(geno, pn_index);
+//  }
+//#else
+//  }
+//#endif //NDEBUG
 }
 
 
@@ -156,7 +158,6 @@ VcfWriter::print_variant_group_details() const
     }
   }
 
-  //std::lock_guard<std::mutex> lock(io_mutex);
   write_gzipped_to_file(hap_file, hap_stats_fn);
 }
 
@@ -209,7 +210,6 @@ VcfWriter::print_variant_details() const
     variant_file << '\n';
   }
 
-  //std::lock_guard<std::mutex> lock(io_mutex);
   write_gzipped_to_file(variant_file, variant_details_fn);
 }
 
@@ -411,10 +411,14 @@ VcfWriter::push_to_haplotype_scores(GenotypePaths & geno, long const pn_index)
   assert(are_genotype_paths_good(geno));
 
   // Quality metrics
-  bool const fully_aligned = geno.all_paths_fully_aligned();
+  assert(geno.longest_path_length <= geno.read_length);
+  int const clipped_bp = geno.read_length - geno.longest_path_length;
+  bool const fully_aligned = clipped_bp == 0;
+  assert(fully_aligned == geno.all_paths_fully_aligned());
+
   bool const non_unique_paths = !geno.all_paths_unique();
   std::size_t const mismatches = geno.paths[0].mismatches;
-  bool has_low_quality_snp = false;
+  bool has_low_quality_snp{false};
 
   std::unordered_map<uint32_t, bool> recent_ids; // maps to is_overlapping
 
@@ -434,7 +438,7 @@ VcfWriter::push_to_haplotype_scores(GenotypePaths & geno, long const pn_index)
       auto & hap = haplotypes[type_ids.first];
       auto & num = p_it->nums[i];
 
-      long constexpr MIN_OFFSET = 3;
+      long constexpr MIN_OFFSET{3};
       bool is_overlapping = (p_it->start_ref_reach_pos() + MIN_OFFSET) <= p_it->var_order[i] &&
                             (p_it->end_ref_reach_pos() - MIN_OFFSET) > p_it->var_order[i];
       recent_ids[type_ids.first] |= is_overlapping;
@@ -487,9 +491,12 @@ VcfWriter::push_to_haplotype_scores(GenotypePaths & geno, long const pn_index)
 
     // Move INFO to variant statistics.
     // This needs to called before 'coverage_to_gts', because it uses the coverage of each variant.
-    haplotype.clipped_reads_to_stats(fully_aligned);
+    haplotype.clipped_reads_to_stats(clipped_bp, geno.read_length);
     haplotype.mapq_to_stats(geno.mapq);
     haplotype.strand_to_stats(geno.flags);
+    haplotype.mismatches_to_stats(mismatches, geno.read_length);
+    haplotype.score_diff_to_stats(geno.score_diff);
+    // TODO display these new stats
 
     // Update the likelihood scores
     haplotype.explain_to_score(pn_index,
diff --git a/src/utilities/bamshrink.cpp b/src/utilities/bamshrink.cpp
index ceffec8..16f758d 100644
--- a/src/utilities/bamshrink.cpp
+++ b/src/utilities/bamshrink.cpp
@@ -83,14 +83,17 @@ is_one_end_clipped(seqan::String<seqan::CigarElement<> > const & cigar, long con
 bool
 process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, bamshrink::Options const & opts)
 {
-  unsigned i = 0;
-  int64_t as = -1;
-  int64_t xs = -1;
+  unsigned i{0};
+  int64_t as{-1};
+  int64_t xs{-1};
+  auto const tags_length = seqan::length(record.tags);
 
-  while (i < seqan::length(record.tags))
+  while (i < tags_length)
   {
     //std::string curr_tag(2, record.tags[i]);
     //curr_tag[1] = record.tags[i+1];
+    auto begin_it = begin(record.tags) + i;
+    auto end_it = begin_it;
     i += 3;
     char type = record.tags[i - 1];
     bool is_as = false;
@@ -120,16 +123,14 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       // Check for RG
       if (record.tags[i - 3] == 'R' && record.tags[i - 2] == 'G')
       {
-        auto begin_it = begin(record.tags) + i - 3;
-
         while (record.tags[i] != '\0' && record.tags[i] != '\n')
           ++i;
 
         ++i;
-        auto end_it = begin(record.tags) + i;
-        //unsigned const old_length = length(new_tags);
-        resize(new_tags, std::distance(begin_it, end_it), seqan::Exact());
-        seqan::arrayCopyForward(begin_it, end_it, begin(new_tags));
+        end_it = begin(record.tags) + i;
+        unsigned const old_length = length(new_tags);
+        resize(new_tags, old_length + std::distance(begin_it, end_it), seqan::Exact());
+        seqan::arrayCopyForward(begin_it, end_it, begin(new_tags) + old_length);
       }
       else
       {
@@ -158,6 +159,7 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       }
 
       i += sizeof(int8_t);
+      end_it = begin(record.tags) + i;
       break;
     }
 
@@ -177,6 +179,7 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       }
 
       i += sizeof(uint8_t);
+      end_it = begin(record.tags) + i;
       break;
     }
 
@@ -196,6 +199,7 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       }
 
       i += sizeof(int16_t);
+      end_it = begin(record.tags) + i;
       break;
     }
 
@@ -215,6 +219,7 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       }
 
       i += sizeof(uint16_t);
+      end_it = begin(record.tags) + i;
       break;
     }
 
@@ -234,6 +239,7 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       }
 
       i += sizeof(int32_t);
+      end_it = begin(record.tags) + i;
       break;
     }
 
@@ -253,21 +259,30 @@ process_tags(seqan::BamAlignmentRecord & record, seqan::CharString & new_tags, b
       }
 
       i += sizeof(uint32_t);
+      end_it = begin(record.tags) + i;
       break;
     }
 
     case 'f':
     {
       i += sizeof(float);
+      end_it = begin(record.tags) + i;
       break;
     }
 
     default:
     {
-      i = length(record.tags);       // Unkown tag, stop
+      i = tags_length;       // Unkown tag, stop
       break;
     }
     }
+
+    if (is_as || is_xs)
+    {
+      unsigned const old_length = length(new_tags);
+      resize(new_tags, old_length + std::distance(begin_it, end_it), seqan::Exact());
+      seqan::arrayCopyForward(begin_it, end_it, begin(new_tags) + old_length);
+    }
   }
 
   if (as != -1 && xs != -1 && (!seqan::hasFlagMultiple(record) || seqan::hasFlagNextUnmapped(record)))
@@ -818,7 +833,9 @@ qualityFilterSlice2(Options const & opts,
       if (!removeNsAtEnds(rec, opts))
         return;
 
+      // replace tags
       rec.tags = std::move(new_tags);
+
       long const bin = (rec.beginPos - first_pos) / 50l;
 
       if (bin >= static_cast<long>(bin_counts.size()))
diff --git a/src/utilities/genotype.cpp b/src/utilities/genotype.cpp
index fc8c0dc..834887c 100644
--- a/src/utilities/genotype.cpp
+++ b/src/utilities/genotype.cpp
@@ -28,6 +28,7 @@
 #include <utility>
 #include <vector>
 
+//#define GT_DEV 1
 
 namespace
 {
@@ -276,7 +277,9 @@ genotype_only_with_a_vcf(std::string const & ref_path,
   BOOST_LOG_TRIVIAL(info) << "Genotyping using an input VCF.";
   std::string const output_vcf = tmp + "/it1/final.vcf.gz";
   std::string const out_dir = tmp + "/it1";
+  gyper::Options const & copts = *(Options::const_instance());
   mkdir(out_dir.c_str(), 0755);
+
   bool const is_sv_graph{false};
   bool const use_absolute_positions{true};
   bool const check_index{true};
@@ -285,7 +288,7 @@ genotype_only_with_a_vcf(std::string const & ref_path,
   bool const is_writing_hap{false};
 
   gyper::construct_graph(ref_path,
-                         Options::const_instance()->vcf,
+                         copts.vcf,
                          padded_region.to_string(),
                          is_sv_graph,
                          use_absolute_positions,
@@ -302,16 +305,18 @@ genotype_only_with_a_vcf(std::string const & ref_path,
 
   {
     PHIndex ph_index = index_graph(gyper::graph);
+    std::vector<double> avg_cov_by_readlen(shrinked_sams.size(), -1.0);
 
     paths = gyper::call(shrinked_sams,
-                        "",                          // graph_path
+                        avg_cov_by_readlen,
+                        "", // graph_path
                         ph_index,
                         out_dir,
-                        "",                          // reference
-                        ".",                          // region
+                        "", // reference
+                        ".", // region
                         primers,
-                        5,                          //minimum_variant_support,
-                        0.25,                          //minimum_variant_support_ratio,
+                        5, // minimum_variant_support,
+                        0.25, // minimum_variant_support_ratio,
                         is_writing_calls_vcf,
                         is_discovery,
                         is_writing_hap);
@@ -323,9 +328,21 @@ genotype_only_with_a_vcf(std::string const & ref_path,
   // Append _calls.vcf.gz
   //for (auto & path : paths)
   //  path += "_calls.vcf.gz";
+  bool constexpr is_filter_zero_qual{false};
 
   //> FILTER_ZERO_QUAL, force_no_variant_overlapping, force_no_break_down
-  vcf_merge_and_break(paths, tmp + "/graphtyper.vcf.gz", region.to_string(), true, false, false);
+  vcf_merge_and_break(paths, tmp + "/graphtyper.vcf.gz", region.to_string(), is_filter_zero_qual, false, false);
+
+  if (copts.normal_and_no_variant_overlapping)
+  {
+    //> FILTER_ZERO_QUAL, force_no_variant_overlapping
+    vcf_merge_and_break(paths,
+                        tmp + "/graphtyper.no_variant_overlapping.vcf.gz",
+                        region.to_string(),
+                        is_filter_zero_qual,
+                        true,
+                        false);
+  }
 
   // free memory
   graph = Graph();
@@ -442,6 +459,7 @@ genotype(std::string ref_path,
     minimum_variant_support = copts.genotype_aln_min_support;
     minimum_variant_support_ratio = copts.genotype_aln_min_support_ratio;
     is_writing_calls_vcf = false; // Skip writing calls vcf in release mode in all iterations except the last one
+    std::vector<double> avg_cov_by_readlen(shrinked_sams.size(), -1.0);
 
     // Iteration 1
     {
@@ -449,11 +467,88 @@ genotype(std::string ref_path,
       std::string const output_vcf = tmp + "/it1/final.vcf.gz";
       std::string const out_dir = tmp + "/it1";
       mkdir(out_dir.c_str(), 0755);
+
+#ifdef GT_DEV
       gyper::construct_graph(ref_path, "", padded_region.to_string(), false, true, false);
+
+
+      Vcf indel_sites;
+      gyper::streamlined_discovery(shrinked_sams,
+                                   ref_path,
+                                   padded_region.to_string(),
+                                   tmp,
+                                   indel_sites);
+#endif // GT_DEV
+
+#ifndef NDEBUG
+      /*
+      BOOST_LOG_TRIVIAL(info) << __HERE__ << " WIP Third pass starting.";
+
+      // THIRD PASS
+      {
+        bool const is_sv_graph{false};
+        bool const use_absolute_positions{true};
+        bool const check_index{false};
+        bool const is_writing_calls_vcf{true};
+        bool const is_discovery{false};
+        bool const is_writing_hap{false};
+
+        gyper::construct_graph(ref_path,
+                               tmp + "/indel_sites.vcf.gz",
+                               padded_region.to_string(),
+                               is_sv_graph,
+                               use_absolute_positions,
+                               check_index);
+
+        absolute_pos.calculate_offsets(gyper::graph.contigs);
+
+//#ifndef NDEBUG
+        // Save graph in debug mode
+        save_graph(tmp + "/graph_indels");
+//#endif // NDEBUG
+
+        std::vector<std::string> paths;
+
+        {
+          PHIndex ph_index = index_graph(gyper::graph);
+          std::vector<double> avg_cov_by_readlen(shrinked_sams.size(), -1.0);
+
+          paths = gyper::call(shrinked_sams,
+                              avg_cov_by_readlen,
+                              "", // graph_path
+                              ph_index,
+                              tmp,
+                              "", // reference
+                              ".", // region
+                              primers.get(),
+                              5, // minimum_variant_support,
+                              0.25, // minimum_variant_support_ratio,
+                              is_writing_calls_vcf,
+                              is_discovery,
+                              is_writing_hap);
+        }
+
+        vcf_merge_and_break(paths,
+                            tmp + "/graphtyper.test.vcf.gz",
+                            region.to_string(),
+                            true,
+                            false,
+                            false);
+
+        graph = Graph(); // free memory reserved for graph
+        Options::instance()->stats.clear();
+      }
+      //*/
+      ///// WIP ends
+#endif // NDEBUG
+
+      gyper::construct_graph(ref_path, "", padded_region.to_string(), false, true, false);
+
 #ifndef NDEBUG
       // Save graph in debug mode
       save_graph(out_dir + "/graph");
-#endif // NDEBUG
+#endif
+
       absolute_pos.calculate_offsets(gyper::graph.contigs);
       auto output_paths = gyper::discover_directly_from_bam("",
                                                             shrinked_sams,
@@ -467,6 +562,11 @@ genotype(std::string ref_path,
       Vcf final_vcf;
       varmap.get_vcf(final_vcf, output_vcf);
 
+#ifdef GT_DEV
+      // Add indel sites
+      std::move(indel_sites.variants.begin(), indel_sites.variants.end(), std::back_inserter(final_vcf.variants));
+#endif // GT_DEV
+
       if (copts.prior_vcf.size() > 0)
       {
         BOOST_LOG_TRIVIAL(info) << "Inserting prior variant sites.";
@@ -495,8 +595,11 @@ genotype(std::string ref_path,
     }
 #endif // NDEBUG
 
-    long FIRST_CALLONLY_ITERATION = 3;
-    long LAST_ITERATION = 4;
+    long FIRST_CALLONLY_ITERATION{3};
+    long LAST_ITERATION{4};
+
+    if (copts.is_extra_call_only_iteration)
+      ++LAST_ITERATION;
 
     // Iteration 2
     if (copts.is_only_cigar_discovery)
@@ -528,6 +631,7 @@ genotype(std::string ref_path,
         minimum_variant_support_ratio = copts.genotype_dis_min_support_ratio;
 
         paths = gyper::call(shrinked_sams,
+                            avg_cov_by_readlen,
                             "", // graph_path
                             ph_index,
                             out_dir,
@@ -608,6 +712,7 @@ genotype(std::string ref_path,
         PHIndex ph_index = index_graph(gyper::graph);
 
         paths = gyper::call(shrinked_sams,
+                            avg_cov_by_readlen,
                             "", // graph_path
                             ph_index,
                             out_dir,
@@ -672,7 +777,7 @@ genotype(std::string ref_path,
     // Copy sites to system
     {
       std::ostringstream ss_cmd;
-      ss_cmd << "cp -p " << tmp << "/it" << (LAST_ITERATION - 1) << "/final.vcf.gz" << " " // Change to (LAST_ITERATION - 1)
+      ss_cmd << "cp -p " << tmp << "/it" << (LAST_ITERATION - 1) << "/final.vcf.gz" << " "
              << output_path << "/input_sites/" << region.chr << "/"
              << std::setw(9) << std::setfill('0') << (region.begin + 1)
              << '-'
@@ -689,6 +794,8 @@ genotype(std::string ref_path,
     }
   }
 
+  std::string const index_ext = copts.is_csi ? ".vcf.gz.csi" : ".vcf.gz.tbi";
+
   // Copy final VCFs
   auto copy_to_results =
     [&](std::string const & basename_no_ext, std::string const & extension, std::string const & id)
@@ -704,7 +811,7 @@ genotype(std::string ref_path,
 
       int ret = system(ss_cmd.str().c_str());
 
-      if (extension != ".vcf.gz.tbi" && ret != 0)
+      if (extension != index_ext && ret != 0)
       {
         BOOST_LOG_TRIVIAL(error) << __HERE__ << " This command failed '" << ss_cmd.str() << "'";
         std::exit(ret);
@@ -716,15 +823,46 @@ genotype(std::string ref_path,
       }
     };
 
-  copy_to_results("graphtyper", ".vcf.gz", ""); // Copy final VCF
-  copy_to_results("graphtyper", ".vcf.gz.tbi", ""); // Copy tabix index for final VCF
+  std::string basename_no_ext{"graphtyper"};
+
+  // Check if tabix file exists
+  if (!is_file(tmp + "/graphtyper.vcf.gz.tbi") && !is_file(tmp + "/graphtyper.vcf.gz.csi"))
+  {
+    BOOST_LOG_TRIVIAL(warning) << "Tabix creation appears to have failed, "
+                               << "I will retry sorting the VCF by reading it in whole.";
+
+    bool const no_sort{false};
+    bool const sites_only{false};
+    bool const write_tbi{true};
+
+    std::string region{"."}; // already extracted region
+
+    vcf_concatenate({tmp + "/graphtyper.vcf.gz"},
+                    tmp + "/graphtyper.sorted.vcf.gz",
+                    no_sort,
+                    sites_only,
+                    write_tbi,
+                    region);
+
+    basename_no_ext = "graphtyper.sorted";
+  }
+
+  copy_to_results(basename_no_ext, ".vcf.gz", ""); // Copy final VCF
+  copy_to_results(basename_no_ext, index_ext, ""); // Copy tabix index for final VCF
 
   if (copts.normal_and_no_variant_overlapping)
   {
     copy_to_results("graphtyper.no_variant_overlapping", ".vcf.gz", ".no_variant_overlapping");
-    copy_to_results("graphtyper.no_variant_overlapping", ".vcf.gz.tbi", ".no_variant_overlapping");
+    copy_to_results("graphtyper.no_variant_overlapping", index_ext, ".no_variant_overlapping");
   }
 
+#ifndef NDEBUG
+  /*copy_to_results("indel_sites", ".vcf.gz", ".indel_sites");*/
+  //copy_to_results("indel_sites", index_ext, ".indel_sites");
+  //copy_to_results("graphtyper.test", ".vcf.gz", ".test");
+  //copy_to_results("graphtyper.test", index_ext, ".test");
+#endif // NDEBUG
+
   if (!copts.no_cleanup)
   {
     BOOST_LOG_TRIVIAL(info) << "Cleaning up temporary files.";
diff --git a/src/utilities/genotype_camou.cpp b/src/utilities/genotype_camou.cpp
index 289eac3..dedd79c 100644
--- a/src/utilities/genotype_camou.cpp
+++ b/src/utilities/genotype_camou.cpp
@@ -129,18 +129,12 @@ genotype_camou(std::string const & interval_fn,
     run_samtools_merge(shrinked_sams, tmp_bams);
   }
 
-  bool constexpr is_writing_hap {
-    false
-  };
+  bool constexpr is_writing_hap{false};
 
   for (auto const & interval : intervals)
   {
-    bool is_writing_calls_vcf {
-      false
-    };
-    bool is_discovery {
-      true
-    };
+    bool is_writing_calls_vcf{false};
+    bool is_discovery{true};
 
     BOOST_LOG_TRIVIAL(info) << "Genotyping interval " << interval;
     GenomicRegion genomic_region(interval);
@@ -180,6 +174,7 @@ genotype_camou(std::string const & interval_fn,
         double minimum_variant_support_ratio = 0.35 / static_cast<double>(num_intervals);
 
         paths = gyper::call(shrinked_sams,
+                            avg_cov_by_readlen,
                             "", // graph_path
                             ph_index,
                             out_dir,
@@ -239,6 +234,7 @@ genotype_camou(std::string const & interval_fn,
         PHIndex ph_index = index_graph(gyper::graph);
 
         paths = gyper::call(shrinked_sams,
+                            avg_cov_by_readlen,
                             "", // graph_path
                             ph_index,
                             out_dir,
diff --git a/src/utilities/genotype_sv.cpp b/src/utilities/genotype_sv.cpp
index 78d8b57..e1c0b1c 100644
--- a/src/utilities/genotype_sv.cpp
+++ b/src/utilities/genotype_sv.cpp
@@ -28,27 +28,23 @@ void
 genotype_sv(std::string ref_path,
             std::string const & sv_vcf,
             std::vector<std::string> const & sams,
+            std::vector<double> const & avg_cov_by_readlen,
             GenomicRegion const & genomic_region,
             std::string const & output_path,
             bool const is_copy_reference)
 {
   // TODO: If the reference is only Ns then output an empty vcf with the sample names
   // TODO: Extract the reference sequence and use that to discover directly from BAM
-  bool constexpr is_writing_calls_vcf {
-    true
-  };
-  bool constexpr is_writing_hap {
-    false
-  };
-  bool constexpr is_discovery {
-    false
-  };
+  gyper::Options const & copts = *(Options::const_instance());
+  bool constexpr is_writing_calls_vcf{true};
+  bool constexpr is_writing_hap{false};
+  bool constexpr is_discovery{false};
 
   long const NUM_SAMPLES = sams.size();
 
   BOOST_LOG_TRIVIAL(info) << "SV genotyping region " << genomic_region.to_string();
   BOOST_LOG_TRIVIAL(info) << "Path to genome is '" << ref_path << "'";
-  BOOST_LOG_TRIVIAL(info) << "Running with up to " << Options::const_instance()->threads << " threads.";
+  BOOST_LOG_TRIVIAL(info) << "Running with up to " << copts.threads << " threads.";
   BOOST_LOG_TRIVIAL(info) << "Copying data from " << NUM_SAMPLES << " input SAM/BAM/CRAMs to local disk.";
   std::string tmp = create_temp_dir(genomic_region);
 
@@ -132,13 +128,18 @@ genotype_sv(std::string ref_path,
 #endif // NDEBUG
 
     PHIndex ph_index = index_graph(gyper::graph);
+    std::string reference_fn{}; // empty by default
+
+    if (Options::const_instance()->force_use_input_ref_for_cram_reading)
+      reference_fn = ref_path;
 
     std::vector<std::string> paths =
       gyper::call(sams,
+                  avg_cov_by_readlen,
                   "",   // graph_path
                   ph_index,
                   out_dir,
-                  "", // reference
+                  reference_fn, // reference
                   unpadded_region.to_string(), // region
                   nullptr,
                   5,//minimum_variant_support,
@@ -190,10 +191,12 @@ genotype_sv(std::string ref_path,
       }
     };
 
+  std::string const index_ext = copts.is_csi ? ".csi" : ".tbi";
+
   copy_vcf_to_system(""); // Copy final VCF
-  copy_vcf_to_system(".tbi"); // Copy tabix index for final VCF
+  copy_vcf_to_system(index_ext); // Copy tabix index for final VCF
 
-  if (!Options::instance()->no_cleanup)
+  if (!copts.no_cleanup)
   {
     BOOST_LOG_TRIVIAL(info) << "Cleaning up temporary files.";
     remove_file_tree(tmp.c_str());
@@ -224,13 +227,14 @@ void
 genotype_sv_regions(std::string ref_path,
                     std::string const & sv_vcf,
                     std::vector<std::string> const & sams,
+                    std::vector<double> const & avg_cov_by_readlen,
                     std::vector<gyper::GenomicRegion> const & regions,
                     std::string const & output_path,
                     bool const is_copy_reference)
 {
   for (auto const & region : regions)
   {
-    genotype_sv(ref_path, sv_vcf, sams, region, output_path, is_copy_reference);
+    genotype_sv(ref_path, sv_vcf, sams, avg_cov_by_readlen, region, output_path, is_copy_reference);
   }
 }
 
diff --git a/src/utilities/hts_parallel_reader.cpp b/src/utilities/hts_parallel_reader.cpp
index e45115e..c584c42 100644
--- a/src/utilities/hts_parallel_reader.cpp
+++ b/src/utilities/hts_parallel_reader.cpp
@@ -42,13 +42,13 @@ check_if_maps_are_empty(std::vector<TMapGPaths> const & maps)
 
     if (map.size() > 0)
     {
-      BOOST_LOG_TRIVIAL(warning)
+      BOOST_LOG_TRIVIAL(info)
         << __HERE__ << " Found a non-empty read map for with size "
         << map.size()
         << "! This likely means these reads have the BAM_FPAIRED flag set but have no mate read.";
 
-      for (auto it = map.begin(); it != map.end(); ++it)
-        BOOST_LOG_TRIVIAL(debug) << "Leftover read name: " << it->first;
+      //for (auto it = map.begin(); it != map.end(); ++it)
+      //  BOOST_LOG_TRIVIAL(debug) << "Leftover read name: " << it->first;
     }
   }
 }
@@ -76,11 +76,7 @@ HtsParallelReader::open(std::vector<std::string> const & hts_file_paths,
   for (auto const & bam : hts_file_paths)
   {
     HtsReader f(store);
-    f.open(bam, region);
-
-    if (!reference.empty())
-      f.set_reference(reference);
-
+    f.open(bam, region, reference);
     f.set_sample_index_offset(samples.size());
     std::copy(f.samples.begin(), f.samples.end(), std::back_inserter(samples));
     f.set_rg_index_offset(num_rg);
@@ -94,7 +90,7 @@ HtsParallelReader::open(std::vector<std::string> const & hts_file_paths,
   // Read the first record of each hts file
   for (long i = 0; i < n; ++i)
   {
-    bam1_t * hts_rec = hts_files[i].get_next_read();
+    bam1_t * hts_rec = hts_files[i].get_next_read_in_order();
 
     if (hts_rec)
     {
@@ -127,9 +123,9 @@ HtsParallelReader::read_record(HtsRecord & hts_record)
 
   // reuse an old bam1_t record if it is available
   if (old_record)
-    hts_rec = hts_files[i].get_next_read(old_record);
+    hts_rec = hts_files[i].get_next_read_in_order(old_record);
   else
-    hts_rec = hts_files[i].get_next_read();
+    hts_rec = hts_files[i].get_next_read_in_order();
 
   std::pop_heap(heap.begin(), heap.end(), Cmp_gt_pair_bam1_t_fun);
   assert(hts_record.record == heap.back().record);
@@ -283,8 +279,8 @@ genotype_only(HtsParallelReader const & hts_preader,
   assert(hts_rec.file_index >= 0);
   assert(hts_rec.file_index < static_cast<long>(maps.size()));
 
-  long rg_i = 0;
-  long sample_i = 0;
+  long rg_i{0};
+  long sample_i{0};
   hts_preader.get_sample_and_rg_index(sample_i, rg_i, hts_rec);
 
   assert(maps.size() > 0);
@@ -301,7 +297,6 @@ genotype_only(HtsParallelReader const & hts_preader,
   }
 
   std::pair<GenotypePaths, GenotypePaths> geno_paths(prev_paths);
-
   auto find_it = map_gpaths.find(read_name);
 
   if (find_it == map_gpaths.end())
@@ -309,17 +304,21 @@ genotype_only(HtsParallelReader const & hts_preader,
     if (hts_rec.record->core.flag & IS_PAIRED)
     {
       // Read is in a pair and we need to wait for its mate
-#ifndef NDEBUG
-      update_paths(geno_paths, seq, rseq, hts_rec.record);
-#else
+//#ifndef NDEBUG
+//      update_paths(geno_paths, seq, rseq, hts_rec.record);
+//#else
       update_paths(geno_paths, hts_rec.record);
-#endif
+//#endif
 
       map_gpaths[read_name] = std::move(geno_paths); // Did not find the read name
     }
     else
     {
+//#ifndef NDEBUG
+//      GenotypePaths * selected = update_unpaired_read_paths(geno_paths, seq, rseq, hts_rec.record);
+//#else
       GenotypePaths * selected = update_unpaired_read_paths(geno_paths, hts_rec.record);
+//#endif
 
       if (selected)
       {
@@ -336,11 +335,11 @@ genotype_only(HtsParallelReader const & hts_preader,
   else
   {
     // Do stuff with paired reads....
-#ifndef NDEBUG
-    update_paths(geno_paths, seq, rseq, hts_rec.record);
-#else
+//#ifndef NDEBUG
+//    update_paths(geno_paths, seq, rseq, hts_rec.record);
+//#else
     update_paths(geno_paths, hts_rec.record);
-#endif
+//#endif
 
     if ((geno_paths.first.flags & IS_FIRST_IN_PAIR) == (find_it->second.first.flags & IS_FIRST_IN_PAIR))
     {
@@ -355,6 +354,8 @@ genotype_only(HtsParallelReader const & hts_preader,
 
     if (better_paths.first)
     {
+      assert(better_paths.second);
+
       if (graph.is_sv_graph)
       {
         // Add reference depth
@@ -413,13 +414,9 @@ genotype_and_discover(HtsParallelReader const & hts_preader,
     if (hts_rec.record->core.flag & IS_PAIRED)
     {
       // Read is in a pair and we need to wait for its mate
-#ifndef NDEBUG
-      update_paths(geno_paths, seq, rseq, hts_rec.record);
-#else
       update_paths(geno_paths, hts_rec.record);
-#endif
 
-      further_update_paths_for_discovery(geno_paths, seq, rseq, hts_rec.record);
+      further_update_paths_for_discovery(geno_paths, hts_rec.record);
       map_gpaths[read_name] = std::move(geno_paths); // Did not find the read name
     }
     else
@@ -428,7 +425,7 @@ genotype_and_discover(HtsParallelReader const & hts_preader,
 
       if (selected)
       {
-        further_update_unpaired_read_paths_for_discovery(*selected, seq, rseq, hts_rec.record);
+        further_update_unpaired_read_paths_for_discovery(*selected, hts_rec.record);
 
         // Discover new variants
         {
@@ -449,11 +446,7 @@ genotype_and_discover(HtsParallelReader const & hts_preader,
   else
   {
     // Do stuff with paired reads....
-#ifndef NDEBUG
-    update_paths(geno_paths, seq, rseq, hts_rec.record);
-#else
     update_paths(geno_paths, hts_rec.record);
-#endif
 
     if ((geno_paths.first.flags & IS_FIRST_IN_PAIR) == (find_it->second.first.flags & IS_FIRST_IN_PAIR))
     {
@@ -462,7 +455,7 @@ genotype_and_discover(HtsParallelReader const & hts_preader,
       std::exit(1);
     }
 
-    further_update_paths_for_discovery(geno_paths, seq, rseq, hts_rec.record);
+    further_update_paths_for_discovery(geno_paths, hts_rec.record);
 
     // Find the better pair of geno paths
     std::pair<GenotypePaths *, GenotypePaths *> better_paths =
@@ -499,6 +492,7 @@ genotype_and_discover(HtsParallelReader const & hts_preader,
 void
 parallel_reader_genotype_only(std::string * out_path,
                               std::vector<std::string> const * hts_paths_ptr,
+                              std::vector<double> const * avg_cov_ptr,
                               std::string const * output_dir_ptr,
                               std::string const * reference_fn_ptr,
                               std::string const * region_ptr,
@@ -508,12 +502,14 @@ parallel_reader_genotype_only(std::string * out_path,
                               bool const is_writing_hap)
 {
   assert(hts_paths_ptr);
+  assert(avg_cov_ptr);
   assert(ph_index_ptr);
   assert(output_dir_ptr);
   assert(reference_fn_ptr);
   assert(region_ptr);
 
   auto const & hts_paths = *hts_paths_ptr;
+  auto const & avg_cov_by_readlen = *avg_cov_ptr;
   auto const & ph_index = *ph_index_ptr;
   auto const & output_dir = *output_dir_ptr;
   auto const & reference = *reference_fn_ptr;
@@ -537,13 +533,9 @@ parallel_reader_genotype_only(std::string * out_path,
 
   // Set up reference depth tracks and bin counts if we are SV calling
   ReferenceDepth reference_depth;
-  //std::vector<std::vector<long> > bin_counts;
 
   if (graph.is_sv_graph)
-  {
     reference_depth.set_depth_sizes(writer.pns.size());
-    //  bin_counts.resize(writer.pns.size());
-  }
 
   std::vector<TMapGPaths> maps; // One map for each file. Each map relates read names to their graph alignments
   maps.resize(hts_preader.get_num_rg());
@@ -562,11 +554,58 @@ parallel_reader_genotype_only(std::string * out_path,
   std::pair<GenotypePaths, GenotypePaths> prev_paths;
   HtsRecord prev;
 
+  auto is_good_read =
+    [](bam1_t * record) -> bool
+    {
+      auto const & core = record->core;
+
+      if ((core.flag & IS_UNMAPPED) != 0u)
+        return false;
+
+      std::array<char, 16> static constexpr CIGAR_MAP = {{
+        'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X', 'B', '*', '*', '*', '*', '*', '*'
+      }};
+
+      auto cigar_it = record->data + core.l_qname;
+      long const N_CIGAR = core.n_cigar;
+
+      // Check if both ends are clipped
+      if (N_CIGAR >= 2)
+      {
+        uint32_t opAndCnt;
+        memcpy(&opAndCnt, cigar_it, sizeof(uint32_t));
+        char cigar_operation_front = CIGAR_MAP[opAndCnt & 15];
+        uint32_t count_front = opAndCnt >> 4;
+
+        cigar_it += sizeof(uint32_t) * (N_CIGAR - 1);
+        memcpy(&opAndCnt, cigar_it, sizeof(uint32_t));
+        char cigar_operation_back = CIGAR_MAP[opAndCnt & 15];
+        uint32_t count_back = opAndCnt >> 4;
+
+        bool const is_one_clipped = (cigar_operation_front == 'S' && count_front >= 12) ||
+                                    (cigar_operation_back == 'S' && count_back >= 12);
+
+        bool const is_mate_far_away = core.tid != core.mtid || std::abs(core.pos - core.mpos) > 200000;
+
+        if ((cigar_operation_front == 'S' && cigar_operation_back == 'S') ||
+            (is_mate_far_away && is_one_clipped) ||
+            (core.qual <= 15 && is_one_clipped) ||
+            (core.qual <= 15 && is_mate_far_away))
+        {
+          return false;
+        }
+      }
+
+      return true;
+    };
+
   // Read the first record
   bool is_done = !hts_preader.read_record(prev);
   assert(is_done || prev.record);
+  bool const IS_SV_CALLING{graph.is_sv_graph};
 
-  while (!is_done && (prev.record->core.flag & Options::const_instance()->sam_flag_filter) != 0u)
+  while (!is_done && ((prev.record->core.flag & Options::const_instance()->sam_flag_filter) != 0u ||
+                      (IS_SV_CALLING && !is_good_read(prev.record))))
   {
     BOOST_LOG_TRIVIAL(debug) << __HERE__ << " Reread first record";
     is_done = !hts_preader.read_record(prev);
@@ -578,6 +617,54 @@ parallel_reader_genotype_only(std::string * out_path,
   }
   else
   {
+    // bin counts for the (extreme) coverage filter
+    long const first_pos{prev.record->core.pos};
+
+    std::vector<std::vector<uint16_t> > bin_counts;
+    bool const IS_COVERAGE_FILTER{IS_SV_CALLING && !Options::const_instance()->no_filter_on_coverage};
+
+    if (IS_COVERAGE_FILTER)
+      bin_counts.resize(hts_preader.get_samples().size());
+
+    // Lambda function to update bin counts for the first record. Returns false when there are too many reads in bin
+    auto update_bin_count =
+      [&](HtsRecord const & hts_rec) -> bool
+      {
+        if (!IS_COVERAGE_FILTER)
+          return true;
+
+        long sample_i{0};
+        long rg_i{0};
+        hts_preader.get_sample_and_rg_index(sample_i, rg_i, hts_rec);
+
+        if (sample_i >= static_cast<long>(avg_cov_by_readlen.size()) || avg_cov_by_readlen[sample_i] <= 0.0)
+          return true;
+
+        uint16_t max_bin_count = std::min(static_cast<long>(std::numeric_limits<uint16_t>::max()),
+                                          static_cast<long>(avg_cov_by_readlen[sample_i] * 50.0 * 3.0 + 0.5));
+
+        assert(sample_i < static_cast<long>(bin_counts.size()));
+        auto & sample_bin_counts = bin_counts[sample_i];
+        long const bin = (hts_rec.record->core.pos - first_pos) / 50l;
+
+        if (bin >= static_cast<long>(sample_bin_counts.size()))
+        {
+          sample_bin_counts.resize(bin + 1, 0u);
+          ++sample_bin_counts[bin]; // increment read count in new bin
+          return true;
+        }
+        else if (sample_bin_counts[bin] > max_bin_count)
+        {
+          return false;
+        }
+        else
+        {
+          ++sample_bin_counts[bin]; // increment read count in bin
+          return true;
+        }
+      };
+
+    update_bin_count(prev);
     ++num_records;
     seqan::IupacString seq;
     seqan::IupacString rseq;
@@ -589,20 +676,30 @@ parallel_reader_genotype_only(std::string * out_path,
     while (hts_preader.read_record(curr))
     {
       // Ignore reads with the following bits set
-      if ((curr.record->core.flag & Options::const_instance()->sam_flag_filter) != 0u)
+      if ((curr.record->core.flag & Options::const_instance()->sam_flag_filter) != 0u ||
+          (IS_SV_CALLING && !is_good_read(curr.record)))
+      {
         continue;
+      }
 
       ++num_records;
 
       if (equal_pos_seq(prev.record, curr.record))
       {
         // The two records are equal
+        update_bin_count(curr);
         ++num_duplicated_records;
         genotype_only(hts_preader, writer, reference_depth, maps, prev_paths, ph_index, primers, curr, seq, rseq,
                       false /*update prev_geno_paths*/);
       }
       else
       {
+        if (!update_bin_count(curr))
+        {
+          --num_records; // Skip record
+          continue;
+        }
+
         genotype_only(hts_preader, writer, reference_depth, maps, prev_paths, ph_index, primers, curr, seq, rseq,
                       true /*update prev_geno_paths*/);
         hts_preader.move_record(prev, curr); // move curr to prev
diff --git a/src/utilities/hts_reader.cpp b/src/utilities/hts_reader.cpp
index 01f98be..9badcd3 100644
--- a/src/utilities/hts_reader.cpp
+++ b/src/utilities/hts_reader.cpp
@@ -19,20 +19,21 @@ HtsReader::HtsReader(HtsStore & _store)
 
 
 void
-HtsReader::open(std::string const & path, std::string const & region)
+HtsReader::open(std::string const & path, std::string const & region, std::string const & reference)
 {
   fp = hts_open(path.c_str(), "r");
 
   if (!fp)
   {
-    std::cerr << "ERROR: Could not open BAM file  " << path << std::endl;
+    BOOST_LOG_TRIVIAL(error) << __HERE__ << " Could not open BAM/CRAM file " << path;
     std::exit(1);
   }
 
+  set_reference(reference);
   fp->bam_header = sam_hdr_read(fp);
 
   // Read sample from header
-  if (!Options::instance()->get_sample_names_from_filename)
+  if (!Options::const_instance()->get_sample_names_from_filename)
   {
     std::string const header_text(fp->bam_header->text, fp->bam_header->l_text);
     std::vector<std::string> header_lines;
@@ -49,8 +50,7 @@ HtsReader::open(std::string const & path, std::string const & region)
 
         if (pos_samp == std::string::npos || pos_id == std::string::npos)
         {
-          std::cerr << "[graphtyper::utilities::hts_reader] ERROR: Could not parse RG and sample from header line:"
-                    << line_it << std::endl;
+          BOOST_LOG_TRIVIAL(error) << __HERE__ << " Could not parse RG and sample from header line: " << line_it;
           std::exit(1);
         }
 
@@ -68,10 +68,7 @@ HtsReader::open(std::string const & path, std::string const & region)
 
         std::string new_id = line_it.substr(pos_id + 4, pos_id_ends - pos_id - 4);
         std::string new_sample = line_it.substr(pos_samp + 4, pos_samp_ends - pos_samp - 4);
-
-        BOOST_LOG_TRIVIAL(debug) << "[graphtyper::utilities::hts_reader] Added RG: '"
-                                 << new_id << "' => '" << new_sample << "'";
-
+        BOOST_LOG_TRIVIAL(debug) << __HERE__ << "Added RG: '" << new_id << "' => '" << new_sample << "'";
         rg2index[new_id] = rg2sample_i.size();
         auto find_it = std::find(samples.begin(), samples.end(), new_sample);
 
@@ -98,15 +95,11 @@ HtsReader::open(std::string const & path, std::string const & region)
       sample = sample.substr(0, sample.find('.'));
 
     samples.push_back(std::move(sample));
-
-    // No read group
-    //rg2sample_i.push_back(0);
-    //rg2index[""] = 0;
   }
 
   rec = store.get();
 
-  if (region == ".")
+  if (region.size() <= 1)
   {
     ret = sam_read1(fp, fp->bam_header, rec);
   }
@@ -142,19 +135,19 @@ HtsReader::close()
 }
 
 
-int
+void
 HtsReader::set_reference(std::string const & reference_path)
 {
+  if ((reference_path.size() == 0) || (reference_path.size() == 1 && reference_path[0] == '.'))
+    return;
+
   int ret2 = hts_set_fai_filename(fp, reference_path.c_str());
 
-  if (ret2 < 0)
+  if (ret2 != 0)
   {
-    BOOST_LOG_TRIVIAL(error) << "[graphtyper::utilities::hts_reader] ERROR: "
-                             << "Could not open reference FASTA file with filename " << reference_path;
+    BOOST_LOG_TRIVIAL(error) << __HERE__ << " Could not open reference FASTA file with filename " << reference_path;
     std::exit(1);
   }
-
-  return ret2;
 }
 
 
@@ -173,7 +166,7 @@ HtsReader::set_rg_index_offset(int new_rg_index_offset)
 
 
 bam1_t *
-HtsReader::get_next_read(bam1_t * old_record)
+HtsReader::get_next_read_in_order(bam1_t * old_record)
 {
   assert(old_record);
 
@@ -189,6 +182,12 @@ HtsReader::get_next_read(bam1_t * old_record)
   // We do not have any records ready and we have reached the end of the file, stop now
   if (ret < 0)
   {
+    if (ret < -1)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " htslib failed BAM/CRAM reading and returned " << ret;
+      std::exit(1);
+    }
+
     // Deallocate memory when we reach the end of the file
     if (rec)
       store.push(rec);
@@ -240,7 +239,7 @@ HtsReader::get_next_read(bam1_t * old_record)
 
 
 bam1_t *
-HtsReader::get_next_read()
+HtsReader::get_next_read_in_order()
 {
   // If we have some records ready in the vector, return those first
   if (records.size() > 0)
@@ -253,6 +252,12 @@ HtsReader::get_next_read()
   // We do not have any records ready and we have reached the end of the file, stop now
   if (ret < 0)
   {
+    if (ret < -1)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " htslib failed BAM/CRAM reading and returned " << ret;
+      std::exit(1);
+    }
+
     // Deallocate memory when we reach the end of the file
     if (rec)
       store.push(rec);
@@ -304,12 +309,20 @@ HtsReader::get_next_read()
 
 
 bam1_t *
-HtsReader::get_next_pos()
+HtsReader::get_next_read()
 {
   if (ret < 0)
   {
+    if (ret < -1)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " htslib failed BAM/CRAM reading and returned " << ret;
+      std::exit(1);
+    }
+
     // Deallocate memory when we reach the end of the file
-    bam_destroy1(rec);
+    if (rec)
+      store.push(rec);
+
     return nullptr;
   }
 
@@ -320,6 +333,34 @@ HtsReader::get_next_pos()
 }
 
 
+bam1_t *
+HtsReader::get_next_read(bam1_t * old_record)
+{
+  assert(old_record);
+
+  if (ret < 0)
+  {
+    if (ret < -1)
+    {
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " htslib failed BAM/CRAM reading and returned " << ret;
+      std::exit(1);
+    }
+
+    // Deallocate memory when we reach the end of the file
+    if (rec)
+      store.push(rec);
+
+    store.push(old_record);
+    return nullptr;
+  }
+
+  bam1_t * record = rec;
+  rec = old_record;
+  ret = hts_iter ? sam_itr_next(fp, hts_iter, rec) : sam_read1(fp, fp->bam_header, rec);
+  return record;
+}
+
+
 void
 HtsReader::get_sample_and_rg_index(long & sample_i, long & rg_i, bam1_t * rec) const
 {
@@ -337,7 +378,7 @@ HtsReader::get_sample_and_rg_index(long & sample_i, long & rg_i, bam1_t * rec) c
 
     if (!rg_tag)
     {
-      std::cerr << "[graphtyper::utilities::hts_reader] ERROR: Unable to find RG tag in read." << std::endl;
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unable to find RG tag in read.";
       std::exit(1);
     }
 
@@ -346,7 +387,7 @@ HtsReader::get_sample_and_rg_index(long & sample_i, long & rg_i, bam1_t * rec) c
 
     if (find_rg_it == rg2index.end())
     {
-      std::cerr << "[graphtyper::utilities::hts_reader] ERROR: Unable to find read group. " << read_group << std::endl;
+      BOOST_LOG_TRIVIAL(error) << __HERE__ << " Unable to find read group. " << read_group;
       std::exit(1);
     }
 
diff --git a/src/utilities/type_conversions.cpp b/src/utilities/type_conversions.cpp
index 81de8e3..69d4147 100644
--- a/src/utilities/type_conversions.cpp
+++ b/src/utilities/type_conversions.cpp
@@ -91,22 +91,31 @@ to_uint64(std::string const & s, int i)
 }
 
 
+/** WIP
 uint16_t
 to_uint16(char const c)
 {
   switch (c)
   {
+  case 'a':
   case 'A':
     return 0u;
 
+  case 'c':
   case 'C':
     return 1u;
 
+  case 'g':
   case 'G':
     return 2u;
 
+  case 't':
+  case 'T':
+    return 3u;
+
   default:
-    return 3u;   // 'T' or anything else
+    BOOST_LOG_TRIVIAL(warning) << "[graphtyper::type_conversions] Invalid character " << c;
+    return 4ul;
   }
 }
 
@@ -127,6 +136,83 @@ to_uint16(std::vector<char> const & s, std::size_t i)
 }
 
 
+
+int32_t
+to_uint16_with_check(std::vector<char> const & s, long i)
+{
+  assert(static_cast<long>(s.size()) >= 8 + i);
+  int32_t d{0};
+
+  for (long const j = i + 8; i < j; ++i)
+  {
+    d <<= 2;
+    uint16_t ret = to_uint16(s[i]);
+
+    if (ret == 4)
+      return -1;
+    else
+      d += ret;
+  }
+
+  return d;
+}
+
+
+std::vector<uint16_t>
+to_uint16_vec(std::string const & s)
+{
+  assert(s.size() >= 8); // Otherwise its impossible to read 8 bases from read!
+  uint16_t current{0u}
+  std::vector<uint16_t> uints;
+  long const sequence_size = s.size();
+  long const max_8mers{sequence_size - 7};
+  uints.reserve(max_8mers);
+  int bp_since_N{0};
+
+  for (long i = 0; i < max_8mers; ++i)
+  {
+    char const c = s[i];
+    ++bp_since_N;
+
+    switch (c)
+    {
+    case 'a':
+    case 'A':
+      current *= 4;
+      //current += 0;
+      break;
+
+    case 'c':
+    case 'C':
+      current *= 4;
+      current += 1;
+
+    case 'g':
+    case 'G':
+      current *= 4;
+      current += 2;
+      break;
+
+    case 't':
+    case 'T':
+      current *= 4;
+      current += 3;
+      break;
+
+    default:
+      bp_since_N = 0;
+      break;
+    }
+
+    if (bp_since_N >= 8)
+      uints.push_back(current);
+  }
+
+  return uints;
+}
+*/
+
+
 template <typename TSeq>
 std::vector<uint64_t>
 to_uint64_vec(TSeq const & s, std::size_t i)
@@ -190,60 +276,9 @@ to_uint64_vec(TSeq const & s, std::size_t i)
 }
 
 
-template <>
-std::vector<uint64_t>
-to_uint64_vec(std::string const & s, std::size_t i)
-{
-  assert(s.size() >= 32 + i); // Otherwise its impossible to read 32 bases from read!
-  std::vector<uint64_t> uints(1, 0u);
-
-  for (unsigned const j = i + 32; i < j; ++i)
-  {
-    long const origin_size = uints.size();
-
-    if (origin_size > 97)
-      return std::vector<uint64_t>();
-
-    char const c = s[i];
-
-    for (long u = 0; u < origin_size; ++u)
-    {
-      switch (c)
-      {
-      case 'A':
-        uints[u] += 4 + 0;
-        break;
-
-      case 'C':
-        uints[u] += 4 + 1;
-        break;
-
-      case 'G':
-        uints[u] += 4 + 2;
-        break;
-
-      case 'T':
-        uints[u] += 4 + 3;
-        break;
-
-      default:
-        uints.push_back(uints[u] * 4 + 0);  // A
-        uints.push_back(uints[u] * 4 + 1);  // C
-        uints.push_back(uints[u] * 4 + 2);  // G
-        uints[u] <<= 2; uints[u] += 3;      // T
-        break;
-      }
-    }
-  }
-
-  return uints;
-}
-
-
 // Explicit instantation
 template std::vector<uint64_t> to_uint64_vec<seqan::Dna5String>(seqan::Dna5String const & s, std::size_t i);
 template std::vector<uint64_t> to_uint64_vec<seqan::IupacString>(seqan::IupacString const & s, std::size_t i);
-template std::vector<uint64_t> to_uint64_vec<std::string>(std::string const & s, std::size_t i);
 
 
 std::array<uint64_t, 96>