Skip to content

Commit

Permalink
Report link graph lengths and split graph transition length limit to …
Browse files Browse the repository at this point in the history
…add limit for read length
  • Loading branch information
adamnovak committed Feb 7, 2025
1 parent be112fc commit 976236f
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 77 deletions.
23 changes: 18 additions & 5 deletions src/algorithms/chain_items.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,11 @@ transition_iterator lookback_transition_iterator(size_t max_lookback_bases,
return iterator;
}

transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds, const ZipCodeTree& zip_code_tree, size_t max_lookback_bases) {
transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds,
const ZipCodeTree& zip_code_tree,
size_t max_graph_lookback_bases,
size_t max_read_lookback_bases) {

// TODO: Remove seeds because we only bring it here for debugging and it complicates the dependency relationships
return [&seeds, &zip_code_tree, max_lookback_bases](const VectorView<Anchor>& to_chain,
const SnarlDistanceIndex& distance_index,
Expand Down Expand Up @@ -236,6 +240,14 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
return;
}

if (read_distance > max_read_lookback_bases) {
// Too far in read to consider
#ifdef debug_transition
std::cerr << "\tToo far apart in read (" << read_distance << "/" << max_read_lookback_bases << ")." << std::endl;
#endif
return;
}

if (source_anchor.read_exclusion_end() > dest_anchor.read_exclusion_start()) {
// The actual core anchor part is reachable in the read, but we cut these down from overlapping minimizers.
#ifdef debug_transition
Expand Down Expand Up @@ -313,12 +325,12 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
std::cerr << "Destination seed S" << dest_seed.seed << " " << seeds[dest_seed.seed].pos << (dest_seed.is_reverse ? "rev" : "") << " is anchor #" << found_dest_anchor->second << std::endl;
#endif

for (ZipCodeTree::reverse_iterator source = zip_code_tree.look_back(dest, max_lookback_bases); source != zip_code_tree.rend(); ++source) {
for (ZipCodeTree::reverse_iterator source = zip_code_tree.look_back(dest, max_graph_lookback_bases); source != zip_code_tree.rend(); ++source) {
// For each source seed right to left
ZipCodeTree::seed_result_t source_seed = *source;

#ifdef debug_transition
std::cerr << "\tSource seed S" << source_seed.seed << " " << seeds[source_seed.seed].pos << (source_seed.is_reverse ? "rev" : "") << " at distance " << source_seed.distance << "/" << max_lookback_bases;
std::cerr << "\tSource seed S" << source_seed.seed << " " << seeds[source_seed.seed].pos << (source_seed.is_reverse ? "rev" : "") << " at distance " << source_seed.distance << "/" << max_graph_lookback_bases;
#endif

if (!source_seed.is_reverse && !dest_seed.is_reverse) {
Expand All @@ -343,7 +355,7 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
}
} else if (source_seed.is_reverse && dest_seed.is_reverse) {
// Both of these are in the same orientation but it is opposite to the read.
// We need to find source as an anchor *started*, and thensave them flipped for later.
// We need to find source as an anchor *started*, and then save them flipped for later.
auto found_source_anchor = seed_to_starting.find(source_seed.seed);
if (found_source_anchor != seed_to_starting.end()) {
// We can transition between these seeds without jumping to/from the middle of an anchor.
Expand All @@ -367,7 +379,8 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
}
}

// Sort the transitions so we handle them in akl allowed order for dynamic programming.
// Sort the transitions so we handle them in an allowed order for dynamic programming.
// TODO: Should we drop things obviously not reachable in the read before sorting to save time?
std::sort(all_transitions.begin(), all_transitions.end(), [&](const std::tuple<size_t, size_t, size_t>& a, const std::tuple<size_t, size_t, size_t>& b) {
// Return true if a's destination seed is before b's in the read, and false otherwise.
return to_chain[get<1>(a)].read_start() < to_chain[get<1>(b)].read_start();
Expand Down
19 changes: 13 additions & 6 deletions src/algorithms/chain_items.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,16 @@ transition_iterator lookback_transition_iterator(size_t max_lookback_bases,
size_t lookback_item_hard_cap);

/**
* Return a transition iterator that uses zip code tree iteration to select traversals.
* Return a transition iterator that uses zip code tree iteration to select
* traversals.
*
* Enumerates transitions under the max graph lookback bases, and filters them
* by the max read lookback bases.
*/
transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds, const ZipCodeTree& zip_code_tree, size_t max_lookback_bases);
transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds,
const ZipCodeTree& zip_code_tree,
size_t max_graph_lookback_bases,
size_t max_read_lookback_bases);

/**
* Fill in the given DP table for the explored chain scores ending with each
Expand All @@ -323,11 +330,11 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
*
* Input items must be sorted by start position in the read.
*
* Takes the given per-item bonus for each item collected, and scales item scores by the given scale.
* Takes the given per-item bonus for each item collected, and scales item
* scores by the given scale.
*
* Uses a finite lookback in items and in read bases when checking where we can
* come from to reach an item. Also, once a given number of good-looking
* predecessor items have been found, stop looking back.
* Uses a transition iterator to enumerate where we can come from to reach an
* item.
*
* Limits transitions to those involving indels of the given size or less, to
* avoid very bad transitions.
Expand Down
36 changes: 24 additions & 12 deletions src/minimizer_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,18 @@ class MinimizerMapper : public AlignerClient {
static constexpr size_t default_gapless_extension_limit = 0;
size_t gapless_extension_limit = default_gapless_extension_limit;

/// How many bases should we look back when making fragments?
static constexpr size_t default_fragment_max_lookback_bases = 300;
size_t fragment_max_lookback_bases = default_fragment_max_lookback_bases;
/// How many bases should we look back when making fragments, per base of read length?
static constexpr double default_fragment_max_lookback_bases_per_base = 0.03;
double fragment_max_lookback_bases_per_base = default_fragment_max_lookback_bases_per_base;
/// How many bases should we look back in the graph when making fragments?
static constexpr size_t default_fragment_max_graph_lookback_bases = 300;
size_t fragment_max_graph_lookback_bases = default_fragment_max_graph_lookback_bases;
/// How many bases should we look back in the graph when making fragments, per base of read length?
static constexpr double default_fragment_max_graph_lookback_bases_per_base = 0.03;
double fragment_max_graph_lookback_bases_per_base = default_fragment_max_graph_lookback_bases_per_base;
/// How many bases should we look back in the read when making fragments?
static constexpr size_t default_fragment_max_read_lookback_bases = std::numeric_limits<size_t>::max();
size_t fragment_max_read_lookback_bases = default_fragment_max_read_lookback_bases;
/// How many bases should we look back in the read when making fragments, per base of read length?
static constexpr double default_fragment_max_read_lookback_bases_per_base = 1.0;
double fragment_max_read_lookback_bases_per_base = default_fragment_max_read_lookback_bases_per_base;
/// How many fragments should we try and make when fragmenting something?
static constexpr size_t default_max_fragments = std::numeric_limits<size_t>::max();
size_t max_fragments = default_max_fragments;
Expand Down Expand Up @@ -322,12 +328,18 @@ class MinimizerMapper : public AlignerClient {
static constexpr size_t default_max_direct_to_chain = 0;
size_t max_direct_to_chain = default_max_direct_to_chain;

/// How many bases should we look back when chaining?
static constexpr size_t default_max_lookback_bases = 3000;
size_t max_lookback_bases = default_max_lookback_bases;
/// How many bases should we look back when chaining, per base of read length?
static constexpr double default_max_lookback_bases_per_base = 0.3;
double max_lookback_bases_per_base = default_max_lookback_bases_per_base;
/// How many bases should we look back in the graph when chaining?
static constexpr size_t default_max_graph_lookback_bases = 3000;
size_t max_graph_lookback_bases = default_max_graph_lookback_bases;
/// How many bases should we look back in the graph when chaining, per base of read length?
static constexpr double default_max_graph_lookback_bases_per_base = 0.3;
double max_graph_lookback_bases_per_base = default_max_graph_lookback_bases_per_base;
/// How many bases should we look back in the read when chaining?
static constexpr size_t default_max_read_lookback_bases = std::numeric_limits<size_t>::max();
size_t max_read_lookback_bases = default_max_read_lookback_bases;
/// How many bases should we look back in the read when chaining, per base of read length?
static constexpr double default_max_read_lookback_bases_per_base = 1.0;
double max_read_lookback_bases_per_base = default_max_read_lookback_bases_per_base;

/// How much of a bonus should we give to each item in
/// fragmenting/chaining?
Expand Down
Loading

0 comments on commit 976236f

Please sign in to comment.