Skip to content

Commit

Permalink
global head/tail patching works with quirks
Browse files Browse the repository at this point in the history
  • Loading branch information
ekg committed Aug 25, 2024
1 parent 4d41477 commit 5659380
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 25 deletions.
109 changes: 88 additions & 21 deletions src/common/wflign/src/wflign_patch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,68 @@ std::vector<alignment_t> do_progressive_wfa_patch_alignment(
return alignments;
}

void erode_head(std::vector<char>& unpatched, uint64_t& query_pos, uint64_t& target_pos, int erode_k) {
int match_count = 0;
auto it = unpatched.begin();
uint64_t query_erased = 0, target_erased = 0;

while (it != unpatched.end()) {
if (*it == 'M' || *it == 'X') {
match_count++;
if (match_count >= erode_k) {
break;
}
query_pos++;
target_pos++;
query_erased++;
target_erased++;
} else {
//match_count = 0;
if (*it == 'I') {
query_pos++;
query_erased++;
}
if (*it == 'D') {
target_pos++;
target_erased++;
}
}
it++;
}

std::cerr << "erode_head: eroded " << query_erased << " query and " << target_erased << " target" << std::endl;
// Erase the eroded part
unpatched.erase(unpatched.begin(), it);
}

void erode_tail(std::vector<char>& unpatched, uint64_t& query_end, uint64_t& target_end, int erode_k) {
int match_count = 0;
auto it = unpatched.rbegin();
uint64_t q_offset = 0, t_offset = 0;

while (it != unpatched.rend()) {
if (*it == 'M' || *it == 'X') {
match_count++;
if (match_count >= erode_k) {
break;
}
q_offset++;
t_offset++;
} else {
//match_count = 0;
if (*it == 'I') q_offset++;
if (*it == 'D') t_offset++;
}
it++;
}

// Erase the eroded part
std::cerr << "erode_tail: eroded " << q_offset << " query and " << t_offset << " target" << std::endl;
unpatched.erase(it.base(), unpatched.end());
query_end -= q_offset;
target_end -= t_offset;
}

void write_merged_alignment(
std::ostream &out,
const std::vector<alignment_t *> &trace,
Expand Down Expand Up @@ -859,20 +921,20 @@ void write_merged_alignment(
};

auto patching = [&query, &query_name, &query_length, &query_start,
&query_offset, &target, &target_name,
&target_length, &target_start, &target_offset,
&target_total_length, &target_end,
&target_pointer_shift,
&wflign_max_len_major,
&wflign_max_len_minor,
&distance_close_big_enough_indels, &min_wf_length,
&max_dist_threshold, &wf_aligner,
&multi_patch_alns,
&convex_penalties,
&chain_gap, &max_patching_score, &min_inversion_length, &erode_k
&query_offset, &query_end, &target, &target_name,
&target_length, &target_start, &target_offset,
&target_total_length, &target_end,
&target_pointer_shift,
&wflign_max_len_major,
&wflign_max_len_minor,
&distance_close_big_enough_indels, &min_wf_length,
&max_dist_threshold, &wf_aligner,
&multi_patch_alns,
&convex_penalties,
&chain_gap, &max_patching_score, &min_inversion_length, &erode_k
#ifdef WFA_PNG_TSV_TIMING
,&emit_patching_tsv,
&out_patching_tsv
,&emit_patching_tsv,
&out_patching_tsv
#endif
](std::vector<char> &unpatched,
std::vector<char> &patched,
Expand All @@ -883,14 +945,20 @@ void write_merged_alignment(

auto q = unpatched.begin();

uint64_t query_pos = query_start;
uint64_t target_pos = target_start;

uint64_t query_delta = 0;
uint64_t target_delta = 0;

bool got_alignment = false;

// trim back small matches at the start
erode_head(unpatched, query_start, target_start, 7);

uint64_t query_pos = query_start;
uint64_t target_pos = target_start;

// Trim spurious matches off the tail of the alignment
erode_tail(unpatched, query_end, target_end, 7);

// Head patching
if (query_start > 0 || target_start > 0) {
alignment_t head_aln;
Expand Down Expand Up @@ -1262,7 +1330,7 @@ void write_merged_alignment(
query_length - query_pos,
target,
target_pos,
target_length - target_pos,
std::min(target_length, target_length - target_pos + (query_length - query_pos)),
wf_aligner,
convex_penalties,
tail_aln,
Expand All @@ -1281,7 +1349,6 @@ void write_merged_alignment(
}
}


#ifdef WFLIGN_DEBUG
std::cerr << "[wflign::wflign_affine_wavefront] got unsorted "
"patched traceback: ";
Expand Down Expand Up @@ -1486,15 +1553,15 @@ void write_merged_alignment(
std::cerr << std::endl;
#endif

#ifdef VALIDATE_WFA_WFLIGN
//#ifdef VALIDATE_WFA_WFLIGN
// std::cerr << "query_length: " << query_length << std::endl;
// std::cerr << "target_length: " << target_length <<
// std::endl; std::cerr << "query_start: " << query_start <<
// std::endl; std::cerr << "target_start: " << target_start <<
// std::endl;

if (!validate_trace(tracev, query, target - target_pointer_shift,
query_length, target_length, query_start,
query_length, target_length + 2 * wflign_max_len_minor, query_start,
target_start)) {
std::cerr
<< "cigar failure at alignment (before head/tail del trimming) "
Expand All @@ -1510,7 +1577,7 @@ void write_merged_alignment(
<< target_offset + target_end << std::endl;
exit(1);
}
#endif
//#endif

// trim deletions at start and end of tracev
uint64_t begin_offset;
Expand Down
3 changes: 2 additions & 1 deletion src/common/wflign/src/wflign_patch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ namespace wflign {
alignment_t& rev_aln,
const int64_t& chain_gap,
const int& max_patching_score,
const uint64_t& min_inversion_length);
const uint64_t& min_inversion_length,
bool ends_free);
void trim_alignment(alignment_t& aln);
std::vector<alignment_t> do_progressive_wfa_patch_alignment(
const char* query,
Expand Down
8 changes: 5 additions & 3 deletions src/map/include/computeMap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1698,7 +1698,7 @@ namespace skch
// tweak start and end positions of consecutive mappings
// TODO: XXX double check that the consecutive mappings are not overlapping!!!
// extra: force global alignment by patching head and tail to mapping start and end coordinates
adjustConsecutiveMappings(it, it_end, param.segLength);
//adjustConsecutiveMappings(it, it_end, param.segLength);

// First pass: Mark cuttable positions
const int consecutive_mappings_window = 4; // Configurable parameter
Expand All @@ -1716,13 +1716,15 @@ namespace skch
if (current == it || current == std::prev(it_end)) {
continue;
}
if (current->queryStartPos - std::prev(current)->queryEndPos > param.segLength / 2
|| current->refStartPos - std::prev(current)->refEndPos > param.segLength / 2) {
if (current->queryStartPos - std::prev(current)->queryEndPos > param.segLength / 5
|| current->refStartPos - std::prev(current)->refEndPos > param.segLength / 5) {
is_cuttable[std::distance(it, current) - 1] = false;
is_cuttable[std::distance(it, current)] = false;
}
}

adjustConsecutiveMappings(it, it_end, param.segLength);

auto fragment_start = it;
auto current = it;
offset_t accumulate_length = 0;
Expand Down

0 comments on commit 5659380

Please sign in to comment.