Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/157 #161

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions polyply/src/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,39 @@ def get_all_predecessors(graph, node, start_node=0):
break
predecessors.reverse()
return predecessors

def _find_longest_path(graph, source, target):
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
max_path_length = 0
idx = 0
for jdx, path in enumerate(all_paths):
if len(path) > max_path_length:
max_path_length = len(path)
idx = jdx
return all_paths[idx]
Comment on lines +189 to +196
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
max_path_length = 0
idx = 0
for jdx, path in enumerate(all_paths):
if len(path) > max_path_length:
max_path_length = len(path)
idx = jdx
return all_paths[idx]
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
return max(all_paths, key=len)

Unless you want to deal with multiple longest paths

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to deal with it in the sense that there can be more than one with equivalent path length. But the smaller code should account for that I guess?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Then you should get the one with the lowest index, but don't pin me on that.


def polyply_custom_tree(graph, source, target):
# find all simple paths from source to target
longest_path = _find_longest_path(graph, source, target)
path_nodes = { u for edge in longest_path for u in edge }
# substract path and extract the remaining graphs of the branches
graph_copy = graph.copy()
graph_copy.remove_edges_from(longest_path)
components = list(nx.connected_components(graph_copy))
# loop over the edges in the longest path and
# see if they are part of a connected component
# then add the bfs tree edges of that component to the
# graph before the edge
tree_graph = nx.DiGraph()
for from_node, to_node in longest_path:
for c in components:
if from_node in c:
break

subgraph = graph_copy.subgraph(c)
for u, v in nx.bfs_tree(subgraph, source=from_node).edges:
if u not in path_nodes or v not in path_nodes:
tree_graph.add_edge(u, v)
tree_graph.add_edge(from_node, to_node)

return tree_graph
Comment on lines +198 to +222
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes but that is intended. The polyply search tree should be a directed graph without cycles. The tree should have been a separate PR but since it made it in. Basically the idea is to get rid of the dfs vs bfs thing for cycles. To do that I need to make a tree which essentially is a DFS tree along the backbone (i.e. longest path) but the branches are placed bfs like with cycles being replaced by distance restraints.

133 changes: 103 additions & 30 deletions polyply/src/restraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,52 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
from .graph_utils import compute_avg_step_length, get_all_predecessors
from polyply.src.graph_utils import compute_avg_step_length, get_all_predecessors

def upper_bound(avg_step_length, distance, graph_dist, tolerance=0):
bound = graph_dist * avg_step_length + distance + tolerance
return bound


def lower_bound(distance, graph_dist_ref, graph_dist_target, tolerance=0):
avg_needed_step_length = distance / graph_dist_target
bound = avg_needed_step_length * graph_dist_ref - tolerance
return bound


def _set_restraint_on_path(graph,
path,
avg_step_length,
distance,
tolerance,
ref_node=None,
target_node=None):

# graph distances can be computed from the path positions
graph_distances_ref = {node: path.index(node) for node in path}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expensive! path.index costs N for the N=len(path)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but its cheaper than computing the disjstra path length no?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Consider however, the following:
{node: idx for (idx, node) in enumerate(path)}

graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path}

if not target_node:
target_node = path[-1]

if not ref_node:
ref_node = path[0]

for node in path[1:]:
if node == target_node:
graph_dist_ref = 1.0
else:
graph_dist_ref = graph_distances_target[node]

graph_dist_target = graph_distances_target[node]
up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref)
low_bound = lower_bound(distance, graph_dist_ref, graph_dist_target, tolerance)

current_restraints = graph.nodes[node].get('distance_restraints', [])
graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node,
up_bound,
low_bound)]


def set_distance_restraint(molecule,
target_node,
Expand Down Expand Up @@ -50,37 +95,63 @@ def set_distance_restraint(molecule,
tolerance: float
absolute tolerance (nm)
"""
# if the target node is a predecssor to the ref node the order
# needs to be reveresed because the target node will be placed
# before the ref node
# we get the path lying between target and reference node
try:
path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node)
except IndexError:
ref_node, target_node = target_node, ref_node
path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node)

# graph distances can be computed from the path positions
graph_distances_ref = {node: path.index(node) for node in path}
graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path}
# First we need to figure out if the two nodes to be restrained are
# each others common ancestor. This breaks on cyclic graphs
ancestor = nx.find_lowest_common_ancestor(
molecule.search_tree, ref_node, target_node)
# if the ancestor is equal to the target node we have to switch the
# reference and target node
paths = []
if ancestor == target_node:
ref_nodes = [target_node]
target_nodes = [ref_node]
distances = [distance]
# if ancestor is neither target nor ref_node, there is no continues path
# between the two. In this case we have to split the distance restraint
# into two parts
elif ancestor != ref_node:
# !!!!!!!!! THIS NEEDS TO HAVE THE APPROPIATE ORDER !!!!!!!!!!!!!!!
ref_nodes = [ancestor, ref_node]
target_nodes = [ref_node, target_node]

for node in path:
if node == target_node:
graph_distance = 1.0
elif node == ref_node:
continue
else:
graph_distance = graph_distances_target[node]
# the first part is the average expected distance between the target node and the
# common ancestor
path = get_all_predecessors(molecule.search_tree,
node=target_node,
start_node=ancestor)

upper_bound = graph_distance * avg_step_length + distance + tolerance
graph_dist_ref_target = len(path) - 1
up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref_target)
low_bound = lower_bound(distance, graph_dist_ref_target, graph_dist_ref_target, tolerance)
partial_distance = (up_bound + low_bound) / 2.
paths.append(path)
distances = [partial_distance]
# then we put the other distance restraint that acts like the full one but
# on a partial path between ancestor and target node
path = get_all_predecessors(molecule.search_tree,
node=target_node,
start_node=ref_node)
paths.append(path)
distances.append(distance)
# if ancestor is equal to ref node all order is full-filled and we just proceed
else:
ref_nodes = [ref_node]
target_nodes = [target_node]
distances = [distance]

avg_needed_step_length = distance / graph_distances_target[ref_node]
lower_bound = avg_needed_step_length * graph_distances_ref[node] - tolerance
if not paths:
paths = [get_all_predecessors(molecule.search_tree,
node=target_node,
start_node=ref_node)]

current_restraints = molecule.nodes[node].get('distance_restraints', [])
molecule.nodes[node]['distance_restraints'] = current_restraints + [(ref_node,
upper_bound,
lower_bound)]
for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths):
_set_restraint_on_path(molecule,
path,
avg_step_length,
dist,
tolerance,
ref_node,
target_node)

def set_restraints(topology, nonbond_matrix):
"""
Expand All @@ -97,7 +168,8 @@ def set_restraints(topology, nonbond_matrix):
mol = topology.molecules[mol_idx]

if set(dict(nx.degree(mol)).values()) != {1, 2}:
raise IOError("Distance restraints currently can only be applied to linear molecules.")
raise IOError(
"Distance restraints currently can only be applied to linear molecules.")

for ref_node, target_node in distance_restraints:
path = list(mol.search_tree.edges)
Expand All @@ -107,4 +179,5 @@ def set_restraints(topology, nonbond_matrix):
path)

distance, tolerance = distance_restraints[(ref_node, target_node)]
set_distance_restraint(mol, target_node, ref_node, distance, avg_step_length, tolerance)
set_distance_restraint(
mol, target_node, ref_node, distance, avg_step_length, tolerance)