From c753dc4803e7dd67746b4439d1032d0c6d5bbc11 Mon Sep 17 00:00:00 2001 From: fgrunewald Date: Mon, 18 Oct 2021 16:14:17 +0200 Subject: [PATCH 1/6] custom tree function --- polyply/src/graph_utils.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py index 04f753ce..ec2ff470 100644 --- a/polyply/src/graph_utils.py +++ b/polyply/src/graph_utils.py @@ -183,3 +183,39 @@ def get_all_predecessors(graph, node, start_node=0): break predecessors.reverse() return predecessors + +def _find_longest_path(graph, source, target): + all_paths = list(nx.all_simple_edge_paths(graph, source, target=target)) + max_path_length = 0 + idx = 0 + for jdx, path in enumerate(all_paths): + if len(path) > max_path_length: + max_path_length = len(path) + idx = jdx + return all_paths[idx] + +def polyply_custom_tree(graph, source, target): + # find all simple paths from source to target + longest_path = _find_longest_path(graph, source, target) + path_nodes = { u for edge in longest_path for u in edge } + # substract path and extract the remaining graphs of the branches + graph_copy = graph.copy() + graph_copy.remove_edges_from(longest_path) + components = list(nx.connected_components(graph_copy)) + # loop over the edges in the longest path and + # see if they are part of a connected component + # then add the bfs tree edges of that component to the + # graph before the edge + tree_graph = nx.DiGraph() + for from_node, to_node in longest_path: + for c in components: + if from_node in c: + break + + subgraph = graph_copy.subgraph(c) + for u, v in nx.bfs_tree(subgraph, source=from_node).edges: + if u not in path_nodes or v not in path_nodes: + tree_graph.add_edge(u, v) + tree_graph.add_edge(from_node, to_node) + + return tree_graph From ce3c23e979b68b179bdcb64670a18de2daf20b98 Mon Sep 17 00:00:00 2001 From: fgrunewald Date: Fri, 22 Oct 2021 16:00:07 +0200 Subject: [PATCH 2/6] draft of generalized restraint algorithm --- polyply/src/restraints.py | 133 +++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 30 deletions(-) diff --git a/polyply/src/restraints.py b/polyply/src/restraints.py index ca0024b8..aea599d0 100644 --- a/polyply/src/restraints.py +++ b/polyply/src/restraints.py @@ -12,7 +12,52 @@ # See the License for the specific language governing permissions and # limitations under the License. import networkx as nx -from .graph_utils import compute_avg_step_length, get_all_predecessors +from polyply.src.graph_utils import compute_avg_step_length, get_all_predecessors + +def upper_bound(avg_step_length, distance, graph_dist, tolerance=0): + bound = graph_dist * avg_step_length + distance + tolerance + return bound + + +def lower_bound(distance, graph_dist_ref, graph_dist_target, tolerance=0): + avg_needed_step_length = distance / graph_dist_target + bound = avg_needed_step_length * graph_dist_ref - tolerance + return bound + + +def _set_restraint_on_path(graph, + path, + avg_step_length, + distance, + tolerance, + ref_node=None, + target_node=None): + + # graph distances can be computed from the path positions + graph_distances_ref = {node: path.index(node) for node in path} + graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path} + + if not target_node: + target_node = path[-1] + + if not ref_node: + ref_node = path[0] + + for node in path[1:]: + if node == target_node: + graph_dist_ref = 1.0 + else: + graph_dist_ref = graph_distances_target[node] + + graph_dist_target = graph_distances_target[node] + up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref) + low_bound = lower_bound(distance, graph_dist_ref, graph_dist_target, tolerance) + + current_restraints = graph.nodes[node].get('distance_restraints', []) + graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, + up_bound, + low_bound)] + def set_distance_restraint(molecule, target_node, @@ -50,37 +95,63 @@ def set_distance_restraint(molecule, tolerance: float absolute tolerance (nm) """ - # if the target node is a predecssor to the ref node the order - # needs to be reveresed because the target node will be placed - # before the ref node - # we get the path lying between target and reference node - try: - path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node) - except IndexError: - ref_node, target_node = target_node, ref_node - path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node) - - # graph distances can be computed from the path positions - graph_distances_ref = {node: path.index(node) for node in path} - graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path} + # First we need to figure out if the two nodes to be restrained are + # each others common ancestor. This breaks on cyclic graphs + ancestor = nx.find_lowest_common_ancestor( + molecule.search_tree, ref_node, target_node) + # if the ancestor is equal to the target node we have to switch the + # reference and target node + paths = [] + if ancestor == target_node: + ref_nodes = [target_node] + target_nodes = [ref_node] + distances = [distance] + # if ancestor is neither target nor ref_node, there is no continues path + # between the two. In this case we have to split the distance restraint + # into two parts + elif ancestor != ref_node: + # !!!!!!!!! THIS NEEDS TO HAVE THE APPROPIATE ORDER !!!!!!!!!!!!!!! + ref_nodes = [ancestor, ref_node] + target_nodes = [ref_node, target_node] - for node in path: - if node == target_node: - graph_distance = 1.0 - elif node == ref_node: - continue - else: - graph_distance = graph_distances_target[node] + # the first part is the average expected distance between the target node and the + # common ancestor + path = get_all_predecessors(molecule.search_tree, + node=target_node, + start_node=ancestor) - upper_bound = graph_distance * avg_step_length + distance + tolerance + graph_dist_ref_target = len(path) - 1 + up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref_target) + low_bound = lower_bound(distance, graph_dist_ref_target, graph_dist_ref_target, tolerance) + partial_distance = (up_bound + low_bound) / 2. + paths.append(path) + distances = [partial_distance] + # then we put the other distance restraint that acts like the full one but + # on a partial path between ancestor and target node + path = get_all_predecessors(molecule.search_tree, + node=target_node, + start_node=ref_node) + paths.append(path) + distances.append(distance) + # if ancestor is equal to ref node all order is full-filled and we just proceed + else: + ref_nodes = [ref_node] + target_nodes = [target_node] + distances = [distance] - avg_needed_step_length = distance / graph_distances_target[ref_node] - lower_bound = avg_needed_step_length * graph_distances_ref[node] - tolerance + if not paths: + paths = [get_all_predecessors(molecule.search_tree, + node=target_node, + start_node=ref_node)] - current_restraints = molecule.nodes[node].get('distance_restraints', []) - molecule.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, - upper_bound, - lower_bound)] + for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths): + _set_restraint_on_path(molecule, + path, + avg_step_length, + dist, + tolerance, + ref_node, + target_node) def set_restraints(topology, nonbond_matrix): """ @@ -97,7 +168,8 @@ def set_restraints(topology, nonbond_matrix): mol = topology.molecules[mol_idx] if set(dict(nx.degree(mol)).values()) != {1, 2}: - raise IOError("Distance restraints currently can only be applied to linear molecules.") + raise IOError( + "Distance restraints currently can only be applied to linear molecules.") for ref_node, target_node in distance_restraints: path = list(mol.search_tree.edges) @@ -107,4 +179,5 @@ def set_restraints(topology, nonbond_matrix): path) distance, tolerance = distance_restraints[(ref_node, target_node)] - set_distance_restraint(mol, target_node, ref_node, distance, avg_step_length, tolerance) + set_distance_restraint( + mol, target_node, ref_node, distance, avg_step_length, tolerance) From 9c011f1409d35a915e0e6eb5148c3333bf6cb296 Mon Sep 17 00:00:00 2001 From: fgrunewald Date: Mon, 8 Nov 2021 12:00:42 +0100 Subject: [PATCH 3/6] integrate and test framework for abitrary distance restraitns --- polyply/src/gen_coords.py | 4 +-- polyply/src/graph_utils.py | 8 ++++++ polyply/src/meta_molecule.py | 3 ++- polyply/src/restraints.py | 42 +++++++++++++++++--------------- polyply/tests/test_restraints.py | 31 +++++++++++++++++++++++ 5 files changed, 66 insertions(+), 22 deletions(-) diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py index ee6abdbf..0a79dc99 100644 --- a/polyply/src/gen_coords.py +++ b/polyply/src/gen_coords.py @@ -74,8 +74,8 @@ def _initialize_cylces(topology, cycles, tolerance): for mol_idx in topology.mol_idx_by_name[mol_name]: # initalize as dfs tree molecule = topology.molecules[mol_idx] - if set(dict(nx.degree(molecule)).values()) != {1, 2}: - raise IOError("Only linear cyclic molecules are allowed at the moment.") + #if set(dict(nx.degree(molecule)).values()) != {1, 2}: + # raise IOError("Only linear cyclic molecules are allowed at the moment.") molecule.dfs=True nodes = (list(molecule.search_tree.edges)[0][0], list(molecule.search_tree.edges)[-1][1]) diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py index ec2ff470..cb579b81 100644 --- a/polyply/src/graph_utils.py +++ b/polyply/src/graph_utils.py @@ -181,6 +181,7 @@ def get_all_predecessors(graph, node, start_node=0): predecessors.append(pre_node) if pre_node == start_node: break + predecessors.reverse() return predecessors @@ -219,3 +220,10 @@ def polyply_custom_tree(graph, source, target): tree_graph.add_edge(from_node, to_node) return tree_graph + +def annotate_hierarchy(graph): + hierarchy= {} + for idx, ndx in enumerate(graph.nodes()): + hierarchy[ndx] = idx + nx.set_node_attributes(graph, hierarchy, "hierarchy") + return graph diff --git a/polyply/src/meta_molecule.py b/polyply/src/meta_molecule.py index 889d7ebd..c6d3ae8e 100644 --- a/polyply/src/meta_molecule.py +++ b/polyply/src/meta_molecule.py @@ -18,7 +18,7 @@ from vermouth.graph_utils import make_residue_graph from vermouth.log_helpers import StyleAdapter, get_logger from .polyply_parser import read_polyply -from .graph_utils import find_nodes_with_attributes +from .graph_utils import find_nodes_with_attributes, annotate_hierarchy Monomer = namedtuple('Monomer', 'resname, n_blocks') LOGGER = StyleAdapter(get_logger(__name__)) @@ -196,6 +196,7 @@ def search_tree(self): else: self.__search_tree = nx.dfs_tree(self, source=self.root) + annotate_hierarchy(self.__search_tree) return self.__search_tree @staticmethod diff --git a/polyply/src/restraints.py b/polyply/src/restraints.py index aea599d0..fbc18338 100644 --- a/polyply/src/restraints.py +++ b/polyply/src/restraints.py @@ -18,13 +18,10 @@ def upper_bound(avg_step_length, distance, graph_dist, tolerance=0): bound = graph_dist * avg_step_length + distance + tolerance return bound - -def lower_bound(distance, graph_dist_ref, graph_dist_target, tolerance=0): - avg_needed_step_length = distance / graph_dist_target +def lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance=0): bound = avg_needed_step_length * graph_dist_ref - tolerance return bound - def _set_restraint_on_path(graph, path, avg_step_length, @@ -43,6 +40,8 @@ def _set_restraint_on_path(graph, if not ref_node: ref_node = path[0] + avg_needed_step_length = distance / graph_distances_target[ref_node] + for node in path[1:]: if node == target_node: graph_dist_ref = 1.0 @@ -50,8 +49,8 @@ def _set_restraint_on_path(graph, graph_dist_ref = graph_distances_target[node] graph_dist_target = graph_distances_target[node] - up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref) - low_bound = lower_bound(distance, graph_dist_ref, graph_dist_target, tolerance) + up_bound = upper_bound(avg_step_length, distance, graph_dist_ref, tolerance) + low_bound = lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance) current_restraints = graph.nodes[node].get('distance_restraints', []) graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, @@ -97,8 +96,7 @@ def set_distance_restraint(molecule, """ # First we need to figure out if the two nodes to be restrained are # each others common ancestor. This breaks on cyclic graphs - ancestor = nx.find_lowest_common_ancestor( - molecule.search_tree, ref_node, target_node) + ancestor = nx.algorithms.lowest_common_ancestor(molecule.search_tree, ref_node, target_node) # if the ancestor is equal to the target node we have to switch the # reference and target node paths = [] @@ -106,15 +104,21 @@ def set_distance_restraint(molecule, ref_nodes = [target_node] target_nodes = [ref_node] distances = [distance] - # if ancestor is neither target nor ref_node, there is no continues path + # if ancestor is neither target nor ref_node, there is no continous path # between the two. In this case we have to split the distance restraint # into two parts elif ancestor != ref_node: - # !!!!!!!!! THIS NEEDS TO HAVE THE APPROPIATE ORDER !!!!!!!!!!!!!!! + print("go here") + # if target node is to be placed before ref node we need to switch them around + # otherwise the designations are fine + if molecule.search_tree[ref_node]["hierarchy"] > molecule.search_tree[target_node]["hierarchy"]: + ref_node, target_node = (target_node, ref_node) + ref_nodes = [ancestor, ref_node] target_nodes = [ref_node, target_node] - # the first part is the average expected distance between the target node and the + # The first part of the split distance restraint is a new restraint + # that is the average expected distance between the target node and the # common ancestor path = get_all_predecessors(molecule.search_tree, node=target_node, @@ -141,8 +145,8 @@ def set_distance_restraint(molecule, if not paths: paths = [get_all_predecessors(molecule.search_tree, - node=target_node, - start_node=ref_node)] + node=target_nodes[0], + start_node=ref_nodes[0])] for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths): _set_restraint_on_path(molecule, @@ -167,10 +171,6 @@ def set_restraints(topology, nonbond_matrix): distance_restraints = topology.distance_restraints[(mol_name, mol_idx)] mol = topology.molecules[mol_idx] - if set(dict(nx.degree(mol)).values()) != {1, 2}: - raise IOError( - "Distance restraints currently can only be applied to linear molecules.") - for ref_node, target_node in distance_restraints: path = list(mol.search_tree.edges) avg_step_length, _ = compute_avg_step_length(mol, @@ -179,5 +179,9 @@ def set_restraints(topology, nonbond_matrix): path) distance, tolerance = distance_restraints[(ref_node, target_node)] - set_distance_restraint( - mol, target_node, ref_node, distance, avg_step_length, tolerance) + set_distance_restraint(mol, + target_node, + ref_node, + distance, + avg_step_length, + tolerance) diff --git a/polyply/tests/test_restraints.py b/polyply/tests/test_restraints.py index a19247cc..82cf1169 100644 --- a/polyply/tests/test_restraints.py +++ b/polyply/tests/test_restraints.py @@ -17,8 +17,12 @@ import pytest import numpy as np import networkx as nx +from hypothesis import given +from hypothesis_networkx import graph_builder +from hypothesis import strategies as st import polyply from polyply.tests.test_build_file_parser import test_molecule +from polyply.src.meta_molecule import MetaMolecule @pytest.mark.parametrize('target_node, ref_node, distance, avg_step_length, tolerance, expected',( # test simple revers @@ -67,3 +71,30 @@ def test_set_distance_restraint(test_molecule, assert pytest.approx(ref_restr[1], new_restr[1]) assert pytest.approx(ref_restr[2], new_restr[2]) + + + +builder = graph_builder(graph_type=nx.Graph, + node_keys=st.integers(), + min_nodes=4, max_nodes=10, + min_edges=3, max_edges=None, + self_loops=True, + connected=True) +@given(graph=builder) +def test_restraints_on_abitr_topology(graph): + test_molecule = MetaMolecule() + test_molecule.add_nodes_from(graph.nodes) + test_molecule.add_edges_from(graph.edges) + + target_node = list(graph.nodes)[0] + ref_node = list(graph.nodes)[1] + distance = 4 + avg_step_length = 0.47 + tolerance = 0 + polyply.src.restraints.set_distance_restraint(test_molecule, + target_node, + ref_node, + distance, + avg_step_length, + tolerance) + assert len(nx.get_node_attributes(test_molecule, "distance_restraints")) != 0 From 9a73050e8fcdfb56c28ec45e75344dc7c60cb52d Mon Sep 17 00:00:00 2001 From: fgrunewald Date: Mon, 8 Nov 2021 12:35:34 +0100 Subject: [PATCH 4/6] edit hypothesis test --- polyply/tests/test_restraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/tests/test_restraints.py b/polyply/tests/test_restraints.py index 82cf1169..a8a1681d 100644 --- a/polyply/tests/test_restraints.py +++ b/polyply/tests/test_restraints.py @@ -87,7 +87,7 @@ def test_restraints_on_abitr_topology(graph): test_molecule.add_edges_from(graph.edges) target_node = list(graph.nodes)[0] - ref_node = list(graph.nodes)[1] + ref_node = list(graph.nodes)[-1] distance = 4 avg_step_length = 0.47 tolerance = 0 From b74206e546ab10f89e7f8e11c2763475d4f1b3d3 Mon Sep 17 00:00:00 2001 From: fgrunewald Date: Mon, 8 Nov 2021 12:47:29 +0100 Subject: [PATCH 5/6] add hypothesis to requirements for tests --- requirements-tests.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-tests.txt b/requirements-tests.txt index 595a4902..3b33a67e 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -4,3 +4,4 @@ pytest-cov pylint codecov tqdm +hypothesis-networkx From 06b1db3c49512ce328cef09d66a9cc88f1931600 Mon Sep 17 00:00:00 2001 From: fgrunewald Date: Mon, 8 Nov 2021 13:30:46 +0100 Subject: [PATCH 6/6] debug hypothesis test and clean common ancestor algo --- polyply/src/restraints.py | 12 +++++++----- polyply/tests/test_restraints.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/polyply/src/restraints.py b/polyply/src/restraints.py index fbc18338..735000b6 100644 --- a/polyply/src/restraints.py +++ b/polyply/src/restraints.py @@ -111,17 +111,18 @@ def set_distance_restraint(molecule, print("go here") # if target node is to be placed before ref node we need to switch them around # otherwise the designations are fine - if molecule.search_tree[ref_node]["hierarchy"] > molecule.search_tree[target_node]["hierarchy"]: + if molecule.search_tree.nodes[ref_node]["hierarchy"] >\ + molecule.search_tree.nodes[target_node]["hierarchy"]: ref_node, target_node = (target_node, ref_node) - ref_nodes = [ancestor, ref_node] + ref_nodes = [ancestor, ancestor] target_nodes = [ref_node, target_node] # The first part of the split distance restraint is a new restraint - # that is the average expected distance between the target node and the + # that is the average expected distance between the ref node and the # common ancestor path = get_all_predecessors(molecule.search_tree, - node=target_node, + node=ref_node, start_node=ancestor) graph_dist_ref_target = len(path) - 1 @@ -134,7 +135,7 @@ def set_distance_restraint(molecule, # on a partial path between ancestor and target node path = get_all_predecessors(molecule.search_tree, node=target_node, - start_node=ref_node) + start_node=ancestor) paths.append(path) distances.append(distance) # if ancestor is equal to ref node all order is full-filled and we just proceed @@ -149,6 +150,7 @@ def set_distance_restraint(molecule, start_node=ref_nodes[0])] for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths): + print(ref_node, target_node, path) _set_restraint_on_path(molecule, path, avg_step_length, diff --git a/polyply/tests/test_restraints.py b/polyply/tests/test_restraints.py index a8a1681d..86a35445 100644 --- a/polyply/tests/test_restraints.py +++ b/polyply/tests/test_restraints.py @@ -76,7 +76,7 @@ def test_set_distance_restraint(test_molecule, builder = graph_builder(graph_type=nx.Graph, node_keys=st.integers(), - min_nodes=4, max_nodes=10, + min_nodes=4, max_nodes=60, min_edges=3, max_edges=None, self_loops=True, connected=True) @@ -86,8 +86,13 @@ def test_restraints_on_abitr_topology(graph): test_molecule.add_nodes_from(graph.nodes) test_molecule.add_edges_from(graph.edges) - target_node = list(graph.nodes)[0] - ref_node = list(graph.nodes)[-1] + ref_node, target_node = np.random.choice(list(graph.nodes), replace=False, size=2) + + # if set(dict(nx.degree(test_molecule)).values()) != {1, 2}: + # ancestor = nx.algorithms.lowest_common_ancestor(test_molecule.search_tree, ref_node, target_node) + # if ancestor != ref_node and ancestor != target_node: + # assert False + distance = 4 avg_step_length = 0.47 tolerance = 0