Skip to content

Commit

Permalink
Edge array Python API
Browse files Browse the repository at this point in the history
  • Loading branch information
GertjanBisschop authored and mergify[bot] committed Jun 22, 2022
1 parent fb67c48 commit aa38338
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 5 deletions.
4 changes: 2 additions & 2 deletions docs/data-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ from IPython.display import HTML
def html_quintuple_table(ts, show_virtual_root=False, show_convenience_arrays=False):
tree = ts.first()
columns = ["node", "parent", "left_child", "right_child", "left_sib", "right_sib"]
convenience_arrays = ["num_children"]
convenience_arrays = ["num_children", "edge"]
if show_convenience_arrays:
columns += convenience_arrays
data = {k:[] for k in columns}
Expand Down Expand Up @@ -965,7 +965,7 @@ information on each node in the tree. These arrays are not essential to
represent the trees within a treesequence. However, they can be useful for
specific algorithms (e.g. when computing tree (im)balance metrics). The
convience arrays that have been implemented are:
{attr}`Tree.num_children_array`.
{attr}`Tree.num_children_array`, {attr}`Tree.edge_array`.

Adding convenience arrays to the example above results in this table:

Expand Down
2 changes: 2 additions & 0 deletions docs/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ Node information
Tree.right_child
Tree.left_child
Tree.children
Tree.edge
Descendant nodes
.. autosummary::
Expand Down Expand Up @@ -479,6 +480,7 @@ high performance interface which can be used in conjunction with the equivalent
Tree.left_sib_array
Tree.right_sib_array
Tree.num_children_array
Tree.edge_array
```


Expand Down
4 changes: 4 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
and maintainability. Please open an issue if this affects your application.
(:user:`jeromekelleher`, :user:`benjeffery`, :pr:`2120`).

- Add ``Tree.edge_array`` and ``Tree.edge``. Returns the edge id of the edge encoding
the relationship of each node with its parent.
(:user:`GertjanBisschop`, :issue:`2361`, :pr:`2357`)

**Breaking Changes**

- The JSON metadata codec now interprets the empty string as an empty object. This means
Expand Down
36 changes: 36 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -10311,6 +10311,22 @@ Tree_get_right_sib(Tree *self, PyObject *args)
return ret;
}

static PyObject *
Tree_get_edge(Tree *self, PyObject *args)
{
PyObject *ret = NULL;
tsk_id_t edge_id;
int node;

if (Tree_get_node_argument(self, args, &node) != 0) {
goto out;
}
edge_id = self->tree->edge[node];
ret = Py_BuildValue("i", (int) edge_id);
out:
return ret;
}

static PyObject *
Tree_get_children(Tree *self, PyObject *args)
{
Expand Down Expand Up @@ -11052,6 +11068,19 @@ Tree_get_num_children_array(Tree *self, void *closure)
return ret;
}

static PyObject *
Tree_get_edge_array(Tree *self, void *closure)
{
PyObject *ret = NULL;

if (Tree_check_state(self) != 0) {
goto out;
}
ret = Tree_make_array(self, NPY_INT32, self->tree->edge);
out:
return ret;
}

static PyGetSetDef Tree_getsetters[]
= { { .name = "parent_array",
.get = (getter) Tree_get_parent_array,
Expand All @@ -11071,6 +11100,9 @@ static PyGetSetDef Tree_getsetters[]
{ .name = "num_children_array",
.get = (getter) Tree_get_num_children_array,
.doc = "The num_children array in the quintuply linked tree." },
{ .name = "edge_array",
.get = (getter) Tree_get_edge_array,
.doc = "The edge array in the quintuply linked tree." },
{ NULL } };

static PyMethodDef Tree_methods[] = {
Expand Down Expand Up @@ -11182,6 +11214,10 @@ static PyMethodDef Tree_methods[] = {
.ml_meth = (PyCFunction) Tree_get_right_sib,
.ml_flags = METH_VARARGS,
.ml_doc = "Returns the right-most sib of node u" },
{ .ml_name = "get_edge",
.ml_meth = (PyCFunction) Tree_get_edge,
.ml_flags = METH_VARARGS,
.ml_doc = "Returns the edge id connecting node u to its parent" },
{ .ml_name = "get_children",
.ml_meth = (PyCFunction) Tree_get_children,
.ml_flags = METH_VARARGS,
Expand Down
12 changes: 10 additions & 2 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2364,7 +2364,8 @@ def modify(ts, func):
def test_tree_node_edges(self):
for ts in get_example_tree_sequences():
edge_visited = np.zeros(ts.num_edges, dtype=bool)
for mapping, tree in zip(ts._tree_node_edges(), ts.trees()):
for tree in ts.trees():
mapping = tree.edge_array
node_mapped = mapping >= 0
edge_visited[mapping[node_mapped]] = True
# Note that tree.nodes() does not necessarily list all the nodes
Expand Down Expand Up @@ -3711,13 +3712,15 @@ def verify_tree_arrays(self, tree):
assert tree.left_sib_array.shape == (N,)
assert tree.right_sib_array.shape == (N,)
assert tree.num_children_array.shape == (N,)
assert tree.edge_array.shape == (N,)
for u in range(N):
assert tree.parent(u) == tree.parent_array[u]
assert tree.left_child(u) == tree.left_child_array[u]
assert tree.right_child(u) == tree.right_child_array[u]
assert tree.left_sib(u) == tree.left_sib_array[u]
assert tree.right_sib(u) == tree.right_sib_array[u]
assert tree.num_children(u) == tree.num_children_array[u]
assert tree.edge(u) == tree.edge_array[u]

def verify_tree_arrays_python_ts(self, ts):
pts = tests.PythonTreeSequence(ts)
Expand All @@ -3730,6 +3733,7 @@ def verify_tree_arrays_python_ts(self, ts):
assert np.all(st1.left_sib_array == st2.left_sib)
assert np.all(st1.right_sib_array == st2.right_sib)
assert np.all(st1.num_children_array == st2.num_children)
assert np.all(st1.edge_array == st2.edge)

def test_tree_arrays(self):
ts = msprime.simulate(10, recombination_rate=1, random_seed=1)
Expand All @@ -3747,6 +3751,7 @@ def test_tree_arrays(self):
"left_sib",
"right_sib",
"num_children",
"edge",
],
)
def test_tree_array_properties(self, array):
Expand All @@ -3770,6 +3775,7 @@ def verify_empty_tree(self, tree):
assert tree.left_child(u) == tskit.NULL
assert tree.right_child(u) == tskit.NULL
assert tree.num_children(u) == 0
assert tree.edge(u) == tskit.NULL
if not ts.node(u).is_sample():
assert tree.left_sib(u) == tskit.NULL
assert tree.right_sib(u) == tskit.NULL
Expand Down Expand Up @@ -3817,6 +3823,7 @@ def verify_trees_identical(self, t1, t2):
assert np.all(t1.left_sib_array == t2.left_sib_array)
assert np.all(t1.right_sib_array == t2.right_sib_array)
assert np.all(t1.num_children_array == t2.num_children_array)
assert np.all(t1.edge_array == t2.edge_array)
assert list(t1.sites()) == list(t2.sites())

def test_copy_seek(self):
Expand Down Expand Up @@ -3924,7 +3931,8 @@ def test_node_edges(self):
for tree in ts.trees():
nodes = set(tree.nodes())
midpoint = sum(tree.interval) / 2
mapping = tree._node_edges()
# mapping = tree._node_edges()
mapping = tree.edge_array
for node, edge in enumerate(mapping):
if node in nodes and tree.parent(node) != tskit.NULL:
edge_above_node = np.where(
Expand Down
2 changes: 2 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2684,6 +2684,7 @@ class TestTree(LowLevelTestCase):
"left_sib",
"right_sib",
"num_children",
"edge",
]

def test_options(self):
Expand Down Expand Up @@ -3036,6 +3037,7 @@ def check_tree(tree):
assert tree.get_left_child(u) == _tskit.NULL
assert tree.get_right_child(u) == _tskit.NULL
assert tree.get_num_children(u) == 0
assert tree.get_edge(u) == _tskit.NULL

tree = _tskit.Tree(ts)
check_tree(tree)
Expand Down
2 changes: 1 addition & 1 deletion python/tskit/drawing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ def __init__(
tree_right = tree.interval.right
edge_left = ts.tables.edges.left
edge_right = ts.tables.edges.right
node_edges = tree._node_edges()
node_edges = tree.edge_array
# whittle mutations down so we only need look at those above the tree nodes
mut_t = ts.tables.mutations
focal_mutations = np.isin(mut_t.node, np.fromiter(nodes, mut_t.node.dtype))
Expand Down
30 changes: 30 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ def _make_arrays(self):
self._left_sib_array = self._ll_tree.left_sib_array
self._right_sib_array = self._ll_tree.right_sib_array
self._num_children_array = self._ll_tree.num_children_array
self._edge_array = self._ll_tree.edge_array

@property
def tree_sequence(self):
Expand Down Expand Up @@ -1199,6 +1200,35 @@ def num_children_array(self):
"""
return self._num_children_array

def edge(self, u):
"""
Returns the id of the edge encoding the relationship between ``u``
and its parent, or :data:`tskit.NULL` if ``u`` is a root, virtual root
or is not a node in the current tree.
:param int u: The node of interest.
:return: Id of edge connecting u to its parent.
:rtype: int
"""
return self._ll_tree.get_edge(u)

@property
def edge_array(self):
"""
A numpy array (dtype=np.int32) of edge ids encoding the relationship
between the child node ``u`` and its parent, such that
``tree.edge_array[u] == tree.edge(u)`` for all
``0 <= u <= ts.num_nodes``. See the :meth:`~.edge`
method for details on the semantics of tree edge and the
:ref:`sec_data_model_tree_structure` section for information on the
quintuply linked tree encoding.
.. include:: substitutions/virtual_root_array_note.rst
.. include:: substitutions/tree_array_warning.rst
"""
return self._edge_array

# Sample list.

def left_sample(self, u):
Expand Down

0 comments on commit aa38338

Please sign in to comment.