Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor skeleton.to_json to serialize skeleton object without jsonpickle #1934

Closed
wants to merge 20 commits into from
Closed
Changes from 9 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 134 additions & 23 deletions sleap/skeleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,7 +987,7 @@ def to_json(self, node_to_idx: Optional[Dict[Node, int]] = None) -> str:
"""Convert the :class:`Skeleton` to a JSON representation.

Args:
node_to_idx: optional dict which maps :class:`Node`sto index
node_to_idx: optional dict which maps :class:`Nodes`to index
in some list. This is used when saving
:class:`Labels`where we want to serialize the
:class:`Nodes` outside the :class:`Skeleton` object.
Expand All @@ -999,33 +999,144 @@ def to_json(self, node_to_idx: Optional[Dict[Node, int]] = None) -> str:
Returns:
A string containing the JSON representation of the skeleton.
"""
jsonpickle.set_encoder_options("simplejson", sort_keys=True, indent=4)
if node_to_idx is not None:
indexed_node_graph = nx.relabel_nodes(
G=self._graph, mapping=node_to_idx
) # map nodes to int
else:
indexed_node_graph = self._graph

# Encode to JSON
graph = json_graph.node_link_data(indexed_node_graph)
# Create global list of nodes with all nodes from all skeletons.
nodes_dicts = []
node_to_id = {}
for node in self.nodes:
if node not in node_to_id:
print(f'node: {node}')
# Note: This ID is not the same as the node index in the skeleton in
# legacy SLEAP, but we do not retain this information in the labels, so
# IDs will be different.
#
# The weight is also kept fixed here, but technically this is not
# modified or used in legacy SLEAP either.
#
# TODO: Store legacy metadata in labels to get byte-level compatibility?
node_to_id[node] = len(node_to_id)
talmo marked this conversation as resolved.
Show resolved Hide resolved
Comment on lines +1015 to +1016
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acknowledged the TODO for storing legacy metadata.

The TODO comment is a good reminder for a future enhancement to improve compatibility with legacy data.

Let me know if you need any help with implementing the storage of legacy metadata in labels to achieve byte-level compatibility. I can assist with the implementation or open a GitHub issue to track this task.

print(f'node_to_id: {node_to_id}')
nodes_dicts.append({"name": node.name, "weight": 1.0})
print(f'nodes_dicts: {nodes_dicts}')

# Build links dicts for normal edges.
edges_dicts = []
for edge_ind, edge in enumerate(self.edges):
print(f'edge_ind: {edge_ind}')
print(f'edge: {edge}')
if edge_ind == 0:
edge_type = {
"py/reduce": [
{"py/type": "sleap.skeleton.EdgeType"},
{"py/tuple": [1]}, # 1 = real edge, 2 = symmetry edge
]
}
print(f'edge_type: {edge_type}')
else:
edge_type = {"py/id": 1}
print(f'edge_type: {edge_type}')

# Edges are stored as a list of tuples of nodes
# The source and target are the nodes in the tuple (edge) are the first and
# second nodes respectively
source = edge[0]
print(f'source: {source}')
print(f'node_to_id[source]: {node_to_id[source]}')
target = edge[1]
print(f'target: {target}')
print(f'node_to_id[target]: {node_to_id[target]}')
edges_dicts.append(
{
# Note: Insert idx is not the same as the edge index in the skeleton
# in legacy SLEAP.
"edge_insert_idx": edge_ind,
"key": 0, # Always 0.
"source": {"py/id": node_to_id[source]},
"target": {"py/id": node_to_id[target]},
"type": edge_type,
}
)
print(f'edges_dicts: {edges_dicts}')

# Build links dicts for symmetry edges.
for symmetry_ind, symmetry in enumerate(self.symmetries):
print(f'symmetry_ind: {symmetry_ind}')
print(f'symmetry: {symmetry}')
if symmetry_ind == 0:
edge_type = {
"py/reduce": [
{"py/type": "sleap.skeleton.EdgeType"},
{"py/tuple": [2]}, # 1 = real edge, 2 = symmetry edge
]
}
else:
edge_type = {"py/id": 2}

src, dst = tuple(symmetry.nodes)
print(f'src: {src}')
print(f'dst: {dst}')
edges_dicts.append(
{
"key": 0,
"source": {"py/id": node_to_id[src]},
"target": {"py/id": node_to_id[dst]},
"type": edge_type,
}
)

# SLEAP v1.3.0 added `description` and `preview_image` to `Skeleton`, but saving
# these fields breaks data format compatibility. Currently, these are only
# added in our custom template skeletons. To ensure backwards data format
# compatibilty of user data, we only save these fields if they are not None.
# Create skeleton dict.
if self.is_template:
data = {
"nx_graph": graph,
skeleton_dict = {
"directed": True,
"graph": {
"name": self.name,
"num_edges_inserted": len(self.edges),
},
"links": edges_dicts,
"multigraph": True,
# In the order in Skeleton.nodes and must match up with nodes_dicts.
"nodes": [{"id": {"py/id": node_to_id[node]}} for node in self.nodes],
Copy link
Collaborator

@roomrys roomrys Sep 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming jsonpickle is decoding by reading things in order, we expect that all Nodes will only appear in the "nodes" list once. Therefore, we can (hopefully) move the "nodes" list before the "links" dictionary. The "nodes" list would need to define the nodes in the {py/object: ..., py/state: ...} dictionary. Then, the "links" dictionary should just reference nodes by "py/id" (referencing the Node's index in the "nodes" list).

Suggested change
"links": edges_dicts,
"multigraph": True,
# In the order in Skeleton.nodes and must match up with nodes_dicts.
"nodes": [{"id": {"py/id": node_to_id[node]}} for node in self.nodes],
"nodes": [{"id": {"py/object": "sleap.skeleton.Node", "py/state": {"name": node.name, "weight": node.weight}}} for node in self.nodes],
"links": edges_dicts,
"multigraph": True,

"description": self.description,
"preview_image": self.preview_image,
}
else:
data = graph

json_str = jsonpickle.encode(data)

return json_str
skeleton_dict ={
"directed": True,
"graph": {
"name": self.name,
"num_edges_inserted": len(self.edges),
},
"links": edges_dicts,
"multigraph": True,
# In the order in Skeleton.nodes and must match up with nodes_dicts.
"nodes": [{"id": {"py/id": node_to_id[node]}} for node in self.nodes],}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming jsonpickle is decoding by reading things in order, we can assume that all Nodes will only appear in the "nodes" list once. Therefore, we can (hopefully) move the "nodes" list before the "links" dictionary. The "nodes" list would need to define the nodes in the {py/object: ..., py/state: ...} dictionary. Then, the "links" dictionary should just reference nodes by "py/id" (referencing the Node's index in the "nodes" list).

Suggested change
"links": edges_dicts,
"multigraph": True,
# In the order in Skeleton.nodes and must match up with nodes_dicts.
"nodes": [{"id": {"py/id": node_to_id[node]}} for node in self.nodes],}
"nodes": [{"id": {"py/object": "sleap.skeleton.Node", "py/state": {"name": node.name, "weight": node.weight}}} for node in self.nodes],
"links": edges_dicts,
"multigraph": True,


Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Address the unused variable skeleton_dict.

The static analysis tool Ruff has correctly identified that the skeleton_dict variable is assigned but never used. This is a valid concern that should be addressed.

Consider the following options to address the unused variable:

  1. If the skeleton_dict is intended to be used later in the code, ensure that it is actually used. If not, remove the assignment.

  2. If the skeleton_dict is not needed, remove the entire code block that creates it.

Please let me know if you need any assistance with addressing this issue.

Tools
Ruff

1102-1102: Local variable skeleton_dict is assigned to but never used

Remove assignment to unused variable skeleton_dict

(F841)

# jsonpickle.set_encoder_options("simplejson", sort_keys=True, indent=4)
# if node_to_idx is not None:
# indexed_node_graph = nx.relabel_nodes(
# G=self._graph, mapping=node_to_idx
# ) # map nodes to int
# else:
# indexed_node_graph = self._graph

# # Encode to JSON
# graph = json_graph.node_link_data(indexed_node_graph)

# # SLEAP v1.3.0 added `description` and `preview_image` to `Skeleton`, but saving
# # these fields breaks data format compatibility. Currently, these are only
# # added in our custom template skeletons. To ensure backwards data format
# # compatibilty of user data, we only save these fields if they are not None.
# if self.is_template:
# data = {
# "nx_graph": graph,
# "description": self.description,
# "preview_image": self.preview_image,
# }
# else:
# data = graph

# json_str = jsonpickle.encode(data)

# return json_str

def save_json(self, filename: str, node_to_idx: Optional[Dict[Node, int]] = None):
"""
Expand Down Expand Up @@ -1280,4 +1391,4 @@ def __hash__(self):


cattr.register_unstructure_hook(Skeleton, lambda skeleton: Skeleton.to_dict(skeleton))
cattr.register_structure_hook(Skeleton, lambda dicts, cls: Skeleton.from_dict(dicts))
cattr.register_structure_hook(Skeleton, lambda dicts, cls: Skeleton.from_dict(dicts))
Loading