Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better metadata for reversion push events #396

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions sc2ts/tree_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,14 +368,17 @@ def coalesce_mutations(ts, samples=None):
group_parent_time = max_sib_time + diff / 2
assert group_parent_time < parent_time

md_overlap = [(x.site, x.inherited_state, x.derived_state) for x in overlap]
md_overlap = [
f"{x.inherited_state}{int(ts.sites_position[x.site])}{x.derived_state}"
for x in overlap
]
md_sibs = [int(sib) for sib in sibs]
tables.nodes.add_row(
flags=core.NODE_IS_MUTATION_OVERLAP,
time=group_parent_time,
metadata={
"sc2ts": {
"overlap": md_overlap,
"mutations": md_overlap,
"sibs": md_sibs,
}
},
Expand Down Expand Up @@ -443,7 +446,11 @@ def push_up_reversions(ts, samples, date="1999-01-01"):
for site in child_muts:
if site in parent_muts:
if child_muts[site].is_reversion_of(parent_muts[site]):
reversions.append((site, child))
reversions.append(
ReversionDescriptor(
site, child, child_muts[site], parent_muts[site]
)
)
# Pick the maximum set of reversions per sib group so that we're not
# trying to resolve incompatible reversion sets.
if len(reversions) > len(sib_groups[parent]):
Expand All @@ -456,9 +463,9 @@ def push_up_reversions(ts, samples, date="1999-01-01"):
if len(reversions) == 0:
continue

sample = reversions[0][1]
assert all(x[1] == sample for x in reversions)
sites = [x[0] for x in reversions]
sample = reversions[0].child_node
assert all(x.child_node == sample for x in reversions)
sites = [x.site for x in reversions]
# Remove the edges above the sample and its parent
edges_to_delete.extend([tree.edge(sample), tree.edge(parent)])
# Create new node that is fractionally older than the current
Expand All @@ -468,16 +475,21 @@ def push_up_reversions(ts, samples, date="1999-01-01"):
# make it proportional to the number of mutations or something.
eps = tree.branch_length(parent) * 0.125
w_time = tree.time(parent) + eps
parent_summary = []
for r in reversions:
position = int(ts.sites_position[r.site])
parent_summary.append(
f"{r.parent_mutation.inherited_state}{position}{r.parent_mutation.derived_state}"
)
w = tables.nodes.add_row(
flags=core.NODE_IS_REVERSION_PUSH,
time=w_time,
metadata={
"sc2ts": {
# FIXME it's not clear how helpful the metadata is here
# If we had separate pass for each group, it would probably
# be easier to reason about.
"sites": [int(x) for x in sites],
"date_added": date,
# Store the parent mutation, that is the mutations we were trying
# to revert
"mutations": parent_summary,
}
},
)
Expand Down Expand Up @@ -524,6 +536,14 @@ def is_reversion_of(self, other) -> bool:
return self.derived_state == other.inherited_state


@dataclasses.dataclass
class ReversionDescriptor:
site: int
child_node: int
child_mutation: MutationDescriptor
parent_mutation: MutationDescriptor


def node_mutation_descriptors(ts, u):
"""
Return a mapping of unique mutations
Expand Down
19 changes: 18 additions & 1 deletion tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,22 @@ def test_2020_02_02_include_samples(
assert np.sum(ts.nodes_time[ts.samples()] == 0) == 5
assert ts.num_samples == 23

def test_2020_02_02_mutation_overlap(
self, tmp_path, fx_ts_map, fx_alignment_store, fx_metadata_db
):
base_ts = fx_ts_map["2020-02-01"]
ts = sc2ts.extend(
alignment_store=fx_alignment_store,
metadata_db=fx_metadata_db,
base_ts=fx_ts_map["2020-02-01"],
date="2020-02-02",
match_db=sc2ts.MatchDb.initialise(tmp_path / "match.db"),
)
assert ts.num_samples == 22
node = ts.node(27)
assert node.flags == sc2ts.NODE_IS_MUTATION_OVERLAP
assert node.metadata == {"sc2ts": {"mutations": ["C17373T"], "sibs": [11, 23]}}

@pytest.mark.parametrize("max_samples", range(1, 6))
def test_2020_02_02_max_samples(
self, tmp_path, fx_ts_map, fx_alignment_store, fx_metadata_db, max_samples
Expand Down Expand Up @@ -567,7 +583,8 @@ def test_2020_02_08(self, tmp_path, fx_ts_map, fx_alignment_store, fx_metadata_d
assert rp_node.flags == sc2ts.NODE_IS_REVERSION_PUSH
assert rp_node.metadata["sc2ts"] == {
"date_added": "2020-02-08",
"sites": [5019],
# The mutation we tried to revert is the inverse
"mutations": ["C5025T"],
}
ts.tables.assert_equals(fx_ts_map["2020-02-08"].tables, ignore_provenance=True)

Expand Down