diff --git a/src/asmode.h b/src/asmode.h index 37b2a5a..b54a256 100644 --- a/src/asmode.h +++ b/src/asmode.h @@ -36,6 +36,7 @@ namespace torali { struct AsmConfig { bool hasVcfFile; + bool skipGenotyping; uint16_t minMapQual; uint32_t minClip; uint32_t minCliqueSize; diff --git a/src/assemble.h b/src/assemble.h index 8efd5f2..ddfffdb 100644 --- a/src/assemble.h +++ b/src/assemble.h @@ -855,7 +855,7 @@ namespace torali tmpCons = svs[svid].consensus; svs[svid].consensus = svs[svid].consensus.substr(offsetTmpCons, svSize); } - if (alignConsensus(c, hdr, seq, NULL, svs[svid], true)) msaSuccess = true; + if ((c.skipGenotyping) || (alignConsensus(c, hdr, seq, NULL, svs[svid], true))) msaSuccess = true; if (!tmpCons.empty()) { svs[svid].consensus = tmpCons; svs[svid].consBp += offsetTmpCons; @@ -865,7 +865,7 @@ namespace torali std::string suffix = boost::to_upper_copy(std::string(seq + svs[svid].svStart, seq + std::min(seqlen, svs[svid].svStart + c.minConsWindow))); msaWfa(c, seqStore[svid], svs[svid].consensus, prefix, suffix); if ((int32_t) svs[svid].consensus.size() < svs[svid].insLen + 4 * c.minConsWindow) { - if (alignConsensus(c, hdr, seq, NULL, svs[svid], false)) msaSuccess = true; + if ((c.skipGenotyping) || (alignConsensus(c, hdr, seq, NULL, svs[svid], false))) msaSuccess = true; } } //std::cerr << msaSuccess << std::endl; @@ -921,7 +921,7 @@ namespace torali tmpCons = svs[svid].consensus; svs[svid].consensus = svs[svid].consensus.substr(offsetTmpCons, svSize); } - if (alignConsensus(c, hdr, seq, sndSeq, svs[svid], true)) msaSuccess = true; + if ((c.skipGenotyping) || (alignConsensus(c, hdr, seq, sndSeq, svs[svid], true))) msaSuccess = true; if (!tmpCons.empty()) { svs[svid].consensus = tmpCons; svs[svid].consBp += offsetTmpCons; @@ -931,7 +931,7 @@ namespace torali std::string suffix = boost::to_upper_copy(std::string(seq + svs[svid].svStart, seq + std::min(seqlen, svs[svid].svStart + c.minConsWindow))); msaWfa(c, seqStore[svid], svs[svid].consensus, prefix, suffix); if ((int32_t) svs[svid].consensus.size() < svs[svid].insLen + 4 * c.minConsWindow) { - if (alignConsensus(c, hdr, seq, NULL, svs[svid], false)) msaSuccess = true; + if ((c.skipGenotyping) || (alignConsensus(c, hdr, seq, NULL, svs[svid], false))) msaSuccess = true; } } //std::cerr << msaSuccess << std::endl; diff --git a/src/delly.h b/src/delly.h index f138953..a7524c1 100644 --- a/src/delly.h +++ b/src/delly.h @@ -65,6 +65,7 @@ namespace torali bool hasExcludeFile; bool hasVcfFile; bool hasDumpFile; + bool skipGenotyping; std::set svtset; DnaScore aliscore; boost::filesystem::path outfile; diff --git a/src/modvcf.h b/src/modvcf.h index d696f66..bce49fe 100644 --- a/src/modvcf.h +++ b/src/modvcf.h @@ -319,7 +319,7 @@ vcfParse(TConfig const& c, bam_hdr_t* hd, std::vector& template inline void -vcfOutput(TConfig const& c, std::vector const& svs, TJunctionCountMap const& jctCountMap, TReadCountMap const& readCountMap, TCountMap const& spanCountMap) +vcfOutput(TConfig const& c, std::vector& svs, TJunctionCountMap const& jctCountMap, TReadCountMap const& readCountMap, TCountMap const& spanCountMap) { // BoLog class BoLog bl; @@ -416,10 +416,10 @@ vcfOutput(TConfig const& c, std::vector const& svs, TJ now = boost::posix_time::second_clock::local_time(); std::cerr << '[' << boost::posix_time::to_simple_string(now) << "] " << "Genotyping" << std::endl; bcf1_t *rec = bcf_init(); - for(typename TSVs::const_iterator svIter = svs.begin(); svIter!=svs.end(); ++svIter) { + for(typename TSVs::iterator svIter = svs.begin(); svIter!=svs.end(); ++svIter) { if ((svIter->srSupport == 0) && (svIter->peSupport == 0)) continue; // In discovery mode, skip SVs that have less than 2 reads support after genotyping - if (!c.hasVcfFile) { + if ((!c.hasVcfFile) && (!c.skipGenotyping)) { uint32_t totalGtSup = 0; for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) { totalGtSup += spanCountMap[file_c][svIter->id].alt.size() + jctCountMap[file_c][svIter->id].alt.size(); @@ -449,6 +449,7 @@ vcfOutput(TConfig const& c, std::vector const& svs, TJ padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0'); id += padNumber; bcf_update_id(hdr, rec, id.c_str()); + if (c.skipGenotyping) svIter->alleles = _addAlleles("N", std::string(bamhd->target_name[svIter->chr2]), *svIter, svIter->svt); std::string alleles = _replaceIUPAC(svIter->alleles); bcf_update_alleles_str(hdr, rec, alleles.c_str()); bcf_update_filter(hdr, rec, &tmpi, 1); @@ -461,6 +462,7 @@ vcfOutput(TConfig const& c, std::vector const& svs, TJ dellyVersion += dellyVersionNumber; bcf_update_info_string(hdr,rec, "SVMETHOD", dellyVersion.c_str()); if (svIter->svt < DELLY_SVT_TRANS) { + if (svEndPos <= svStartPos) svEndPos = svStartPos + 1; tmpi = svEndPos; bcf_update_info_int32(hdr, rec, "END", &tmpi, 1); } else { diff --git a/src/tegua.h b/src/tegua.h index 8d4e08c..9452f72 100644 --- a/src/tegua.h +++ b/src/tegua.h @@ -39,6 +39,7 @@ namespace torali { bool hasExcludeFile; bool hasVcfFile; bool hasAltFile; + bool skipGenotyping; uint16_t minMapQual; uint16_t minGenoQual; uint32_t minClip; @@ -169,7 +170,7 @@ namespace torali { } // SV Genotyping - genotypeLR(c, svs, jctMap, rcMap); + if (!c.skipGenotyping) genotypeLR(c, svs, jctMap, rcMap); // VCF Output vcfOutput(c, svs, jctMap, rcMap, spanMap); @@ -235,6 +236,7 @@ namespace torali { ("pruning,j", boost::program_options::value(&c.graphPruning)->default_value(1000), "graph pruning cutoff") ("extension,e", boost::program_options::value(&c.indelExtension)->default_value(0.5), "enforce indel extension") ("scoring,s", boost::program_options::value(&scoring)->default_value("3,-2,-3,-1"), "alignment scoring") + ("skipGT,k", "skip consensus realignment and genotyping (useful for complex SVs)") ; boost::program_options::positional_options_description pos_args; @@ -382,6 +384,10 @@ namespace torali { } } + // Consensus alignment? + if (vm.count("skipGT")) c.skipGenotyping = true; + else c.skipGenotyping = false; + // Show cmd boost::posix_time::ptime now = boost::posix_time::second_clock::local_time(); std::cerr << '[' << boost::posix_time::to_simple_string(now) << "] ";