FUll bending forces: fixed pi bond generation and bending bugs

willwng · Oct 20, 2024 · 8144f1b · 8144f1b
1 parent 3bc73ce
commit 8144f1b
Show file tree

Hide file tree

Showing 13 changed files with 206 additions and 185 deletions.
diff --git a/energyminimization/energies/bend_full.py b/energyminimization/energies/bend_full.py
@@ -22,16 +22,23 @@ def compute_angles(
     pos_matrix = pos_matrix.reshape(-1, 2)
     j, i, k = active_pi_indices[:, 0], active_pi_indices[:, 1], active_pi_indices[:, 2]
     bond1_idx, bond2_idx, pi_idx = active_pi_indices[:, 3], active_pi_indices[:, 4], active_pi_indices[:, 5]
-    hor_pbc1, top_pbc1 = active_pi_indices[:, 7], active_pi_indices[:, 8]
-    hor_pbc2, top_pbc2 = active_pi_indices[:, 9], active_pi_indices[:, 10]
+    hor_pbc1, top_pbc1 = active_pi_indices[:, 8], active_pi_indices[:, 9]
+    hor_pbc2, top_pbc2 = active_pi_indices[:, 10], active_pi_indices[:, 11]
+    sign_i, sign_j = active_pi_indices[:, 6], active_pi_indices[:, 7]
+
     # Vectors out of the vertex node
-    c_ji = pos_matrix[j] - pos_matrix[i]
-    c_jk = pos_matrix[j] - pos_matrix[k]
+    c_ji = pos_matrix[i] - pos_matrix[j]
+    c_jk = pos_matrix[k] - pos_matrix[j]
     # Handle periodic boundary conditions
-    c_ji[np.where(hor_pbc1 == 1), 0] += corrections[0]
-    c_ji[np.where(top_pbc1 == 1), 1] += corrections[1]
-    c_jk[np.where(hor_pbc2 == 1), 0] += corrections[0]
-    c_jk[np.where(top_pbc2 == 1), 1] += corrections[1]
+    idx_hor_i = np.where(hor_pbc1 == 1)
+    idx_top_i = np.where(top_pbc1 == 1)
+    idx_hor_j = np.where(hor_pbc2 == 1)
+    idx_top_j = np.where(top_pbc2 == 1)
+    # The PBC correction depends on which node is the vertex (determined by sign)
+    c_ji[idx_hor_i, 0] += sign_i[idx_hor_i] * corrections[0]
+    c_ji[idx_top_i, 1] += sign_i[idx_top_i] * corrections[1]
+    c_jk[idx_hor_j, 0] += sign_j[idx_hor_j] * corrections[0]
+    c_jk[idx_top_j, 1] += sign_j[idx_top_j] * corrections[1]
 
     theta_jik = np.arctan2(np.cross(c_ji, c_jk), np.einsum('ij,ij->i', c_ji, c_jk))
     theta_jik[theta_jik < 0] += 2 * np.pi
@@ -63,8 +70,8 @@ def get_bend_hessian(
     # We first compute the helper values needed for d_theta/d_dof
     length_ji = np.linalg.norm(c_ji, axis=1)
     length_jk = np.linalg.norm(c_jk, axis=1)
-    t_ji = np.divide(c_ji, np.square(length_ji))
-    t_jk = np.divide(c_jk, np.square(length_jk))
+    t_ji = np.divide(c_ji, np.square(length_ji)[: , np.newaxis])
+    t_jk = np.divide(c_jk, np.square(length_jk)[: , np.newaxis])
     # Compute d_theta/d_dof
     d_theta_djx = t_ji[:, 1] - t_jk[:, 1]
     d_theta_dix = -t_ji[:, 1]
@@ -97,7 +104,7 @@ def get_bend_hessian(
     cjk_x_minus_y = c_jk[:, 0] - c_jk[:, 1]
     # We're ready!
     d2_theta_djx_jx = 2 * (cjk_prod / l_jk_fourth - cji_prod / l_ji_fourth)
-    d2_theta_djx_jy = (1 / l_ji_sq) - (1 / l_jk_sq) + (2 * cjk_sq / l_jk_fourth) - (2 * cji_sq / l_ji_fourth)
+    d2_theta_djx_jy = (1 / l_ji_sq) - (1 / l_jk_sq) + (2 * cjk_sq[:, 1] / l_jk_fourth) - (2 * cji_sq[:, 1] / l_ji_fourth)
     d2_theta_djx_ix = 2 * (cji_prod / l_ji_fourth)
     d2_theta_djx_iy = -(cji_x_plus_y * cji_x_minus_y) / l_ji_fourth
     d2_theta_djx_kx = -2 * (cjk_prod / l_jk_fourth)
@@ -212,7 +219,8 @@ def get_bend_hessian(
     add_entry(rows, cols, data, entry_idx + 34, k_y, k_x, d2e_dky_kx)
     add_entry(rows, cols, data, entry_idx + 35, k_y, k_y, d2e_dky_ky)
 
-    return csr_matrix((data, (rows, cols)), shape=(2 * n, 2 * n))
+    n_dof = pos_matrix.size
+    return csr_matrix((data, (rows, cols)), shape=(n_dof, n_dof))
 
 
 def get_bend_jacobian(
@@ -225,21 +233,24 @@ def get_bend_jacobian(
     gradient = np.zeros(pos_matrix.size)
     theta, diff_theta, c_ji, c_jk = compute_angles(pos_matrix=pos_matrix, corrections=corrections,
                                                    active_pi_indices=active_pi_indices, orig_pi_angles=orig_pi_angles)
-    # Gradient is equal to -(kappa) * (theta - theta_0) * d_theta/d_dof
-    grad_factor = -bend_mod * diff_theta
+    # Gradient is equal to kappa * (theta - theta_0) * d_theta/d_dof
+    grad_factor = (bend_mod * diff_theta)[: , np.newaxis]
     # Temporary variables useful for the gradient
     length_ji = np.linalg.norm(c_ji, axis=1)
     length_jk = np.linalg.norm(c_jk, axis=1)
-    t_ji = grad_factor * np.divide(c_ji, np.square(length_ji))
-    t_jk = grad_factor * np.divide(c_jk, np.square(length_jk))
+    t_ji = np.divide(c_ji, np.square(length_ji)[:, np.newaxis])
+    t_jk = np.divide(c_jk, np.square(length_jk)[: , np.newaxis])
+    # Multiply be the gradient factor so we don't have to later (save some redundant computation)
+    g_ji = grad_factor * t_ji
+    g_jk = grad_factor * t_jk
     # Partials of theta with respect to x (times the gradient factor gives the correct partials of energy)
-    de_djx = t_ji[:, 1] - t_jk[:, 1]
-    de_dix = -t_ji[:, 1]
-    de_dkx = t_jk[:, 1]
+    de_djx = -g_ji[:, 1] + g_jk[:, 1]
+    de_dix = g_ji[:, 1]
+    de_dkx = -g_jk[:, 1]
     # Partials of theta with respect to y
-    de_djy = -t_ji[:, 0] + t_jk[:, 0]
-    de_diy = t_ji[:, 0]
-    de_dky = -t_jk[:, 0]
+    de_djy = g_ji[:, 0] - g_jk[:, 0]
+    de_diy = -g_ji[:, 0]
+    de_dky = g_jk[:, 0]
 
     # Update the gradient
     j, i, k = active_pi_indices[:, 0], active_pi_indices[:, 1], active_pi_indices[:, 2]

diff --git a/energyminimization/energies/stretch_full.py b/energyminimization/energies/stretch_full.py
@@ -16,23 +16,25 @@ def compute_lengths(
     """
     Computes the vector between each node (c_ij), the length of each bond, and the difference between the
         current length and the rest length
-    :return: c_ji (contains vectors from node j to i for each bond), length_ji (length of each bond),
-        d_ji (difference between current length and rest length)
+    :return:
+        - c_ij: vector between nodes j and i for each bond
+        - length_ij: lengths of c_ij
+        - d_ij: difference between current length and rest length
     """
     pos_matrix = pos_matrix.reshape(-1, 2)
     # Indices of the nodes for each bond
     i, j = active_bond_indices[:, 0], active_bond_indices[:, 1]
-    # c_ij points from j to i based on their current positions
-    c_ji = pos_matrix[j] - pos_matrix[i]
+    # c_ij points from i to j based on their current positions
+    c_ij = pos_matrix[j] - pos_matrix[i]
     # Handle periodic boundary conditions
     hor_pbc, top_pbc = active_bond_indices[:, 2], active_bond_indices[:, 3]
-    c_ji[np.where(hor_pbc == 1), 0] += corrections[0]
-    c_ji[np.where(top_pbc == 1), 1] += corrections[1]
+    c_ij[np.where(hor_pbc == 1), 0] += corrections[0]
+    c_ij[np.where(top_pbc == 1), 1] += corrections[1]
     # Lengths and differences
     idx = active_bond_indices[:, -1]
-    length_ji = np.linalg.norm(c_ji, axis=1)
-    d_ji = length_ji - active_bond_lengths[idx]
-    return c_ji, length_ji, d_ji
+    length_ij = np.linalg.norm(c_ij, axis=1)
+    d_ij = length_ij - active_bond_lengths[idx]
+    return c_ij, length_ij, d_ij
 
 
 def get_nonlinear_stretch_hessian(
@@ -43,21 +45,21 @@ def get_nonlinear_stretch_hessian(
         active_bond_lengths: np.ndarray
 ) -> csr_matrix:
     # Compute the current vectors associated with the position
-    c_ji, l_ji, d_matrix = compute_lengths(pos_matrix=pos_matrix, active_bond_indices=active_bond_indices,
+    c_ij, l_ij, d_ij = compute_lengths(pos_matrix=pos_matrix, active_bond_indices=active_bond_indices,
                                            corrections=corrections, active_bond_lengths=active_bond_lengths)
 
-    i, j, idx = active_bond_indices[:, 0], active_bond_indices[:, 1], active_bond_indices[:, -1]
-    # Second derivative of energy with respect to same variable
-    c_ij_sq = np.square(c_ji)
-    init_bond_lengths = active_bond_lengths[idx].reshape(-1, 1)
-    curr_bond_lengths = l_ji.reshape(-1, 1)
-    # Formula is a * (1 - ((l0 * (c2_ij[!i]) / l^3)), where !i is y if i is x and vice versa
-    dd = stretch_mod * (1 - np.divide(np.multiply(init_bond_lengths, c_ij_sq), np.power(curr_bond_lengths, 3)))
+    idx = active_bond_indices[:, -1]
+    l0 = active_bond_lengths[idx]
+    # Helpful values for computing second derivatives
+    c_ij_sq = np.square(c_ij)
+    c_ij_prod = np.prod(c_ij, axis=1)  # c_ij[0] * c_ij[1]
+    l0_div_l3 = np.divide(l0, np.power(l_ij, 3)) # l0 / l^3
+
+    # Formula for d^2E/d(x,y)^2 and alpha * (1 - ((l0 * (c2_ij[!i]) / l^3)), where !i is y if i is x and vice versa
+    dd = stretch_mod * (1 - np.multiply(l0_div_l3[:, np.newaxis], c_ij_sq))  # need new axis since l0_div_l3 is 1D
     dxx, dyy = dd[:, 1], dd[:, 0]
-    # Formula is a * (l0 * (c_ij[0] * c_ij[1]) / l^3)
-    c_ij_prod = np.prod(c_ji, axis=1).flatten()
-    dxy = stretch_mod * np.divide(np.multiply(init_bond_lengths.flatten(), c_ij_prod),
-                                  np.power(curr_bond_lengths.flatten(), 3))
+    # Formula for d^2E/dxdy is alpha * (l0 * (c_ij[0] * c_ij[1]) / l^3)
+    dxy = stretch_mod * np.multiply(l0_div_l3, c_ij_prod)
 
     n_dof = pos_matrix.size
     return sh.generate_hessian(active_bond_indices=active_bond_indices, dxx=dxx, dyy=dyy, dxy=dxy, n=n_dof)
@@ -70,14 +72,14 @@ def get_nonlinear_stretch_jacobian(
         active_bond_indices: np.ndarray,
         active_bond_lengths: np.ndarray
 ) -> np.ndarray:
-    c_ji, l_ji, d_matrix = compute_lengths(pos_matrix=pos_matrix, active_bond_indices=active_bond_indices,
+    c_ij, l_ij, d_ij = compute_lengths(pos_matrix=pos_matrix, active_bond_indices=active_bond_indices,
                                            corrections=corrections, active_bond_lengths=active_bond_lengths)
 
-    # Gradient is equal to -alpha * ((l - l_0) / l) * c_ij. Compute the pre-factor
-    grad_factor = -stretch_mod * np.divide(d_matrix, l_ji)
+    # Gradient is equal to -alpha * ((l - l_0) / l) * c_ij. First compute the pre-factor
+    grad_factor = -stretch_mod * np.divide(d_ij, l_ij)[:, np.newaxis]
     # Then multiply the pre-factor by the c_ij vectors
-    grad = np.multiply(grad_factor.reshape(-1, 1), c_ji)
-    # The derivative with respect to x has only the x component. Same for y
+    grad = np.multiply(grad_factor, c_ij)
+    # The derivative with respect to x and y
     grad_x, grad_y = grad[:, 0], grad[:, 1]
 
     n_dof = pos_matrix.size
@@ -100,7 +102,8 @@ def get_full_stretch_energy(
     :param active_bond_lengths: ORIGINAL lengths of active bonds
     """
 
-    _, _, d_ji = compute_lengths(pos_matrix=pos_matrix, active_bond_indices=active_bond_indices,
+    _, _, d_ij = compute_lengths(pos_matrix=pos_matrix, active_bond_indices=active_bond_indices,
                                  corrections=corrections, active_bond_lengths=active_bond_lengths)
-    energy = 0.5 * stretch_mod * np.square(d_ji)
+    # E = 1/2 * alpha * (l - l_0)^2
+    energy = 0.5 * stretch_mod * np.square(d_ij)
     return float(np.sum(energy))
diff --git a/energyminimization/matrix_helper.py b/energyminimization/matrix_helper.py
@@ -124,26 +124,34 @@ def verify_r_matrix(
 def create_angle_matrix(
         pos_vector: np.ndarray,
         active_pi_indices: np.ndarray,
-        corrections: np.ndarray,
+        lattice: AbstractLattice,
 ) -> np.ndarray:
     """
-    Matrix size (# pi bonds, 1) where value at index i is the angle for pi bond index i
+    Matrix size (# pi bonds, 1) containing the *initial* angles for each pi bond
     """
     pos_matrix = pos_vector.reshape(-1, 2)
 
     j, i, k = active_pi_indices[:, 0], active_pi_indices[:, 1], active_pi_indices[:, 2]
-    hor_pbc1, top_pbc1 = active_pi_indices[:, 7], active_pi_indices[:, 8]
-    hor_pbc2, top_pbc2 = active_pi_indices[:, 9], active_pi_indices[:, 10]
+    hor_pbc1, top_pbc1 = active_pi_indices[:, 8], active_pi_indices[:, 9]
+    hor_pbc2, top_pbc2 = active_pi_indices[:, 10], active_pi_indices[:, 11]
+    sign_i, sign_j = active_pi_indices[:, 6], active_pi_indices[:, 7]
 
-    # Vectors from vertex node j to the edge nodes i and k
-    c_ji = pos_matrix[j] - pos_matrix[i]
-    c_jk = pos_matrix[j] - pos_matrix[k]
+    # Vectors from vertex node j to edge nodes i and k
+    c_ji = pos_matrix[i] - pos_matrix[j]
+    c_jk = pos_matrix[k] - pos_matrix[j]
     # Handle periodic boundary conditions
-    c_ji[np.where(hor_pbc1 == 1), 0] += corrections[0]
-    c_ji[np.where(top_pbc1 == 1), 1] += corrections[1]
-    c_jk[np.where(hor_pbc2 == 1), 0] += corrections[0]
-    c_jk[np.where(top_pbc2 == 1), 1] += corrections[1]
-
+    corrections = np.array([lattice.get_length(), lattice.get_height() + lattice.height_increment])
+    idx_hor_i = np.where(hor_pbc1 == 1)
+    idx_top_i = np.where(top_pbc1 == 1)
+    idx_hor_j = np.where(hor_pbc2 == 1)
+    idx_top_j = np.where(top_pbc2 == 1)
+    # The PBC correction depends on which node is the vertex (determined by sign)
+    c_ji[idx_hor_i, 0] += sign_i[idx_hor_i] * corrections[0]
+    c_ji[idx_top_i, 1] += sign_i[idx_top_i] * corrections[1]
+    c_jk[idx_hor_j, 0] += sign_j[idx_hor_j] * corrections[0]
+    c_jk[idx_top_j, 1] += sign_j[idx_top_j] * corrections[1]
+
+    # Compute the angle between the two vectors
     angle_matrix = np.arctan2(np.cross(c_ji, c_jk), np.einsum('ij,ij->i', c_ji, c_jk))
     angle_matrix[angle_matrix < 0] += 2 * np.pi
     return angle_matrix

diff --git a/energyminimization/minimize.py b/energyminimization/minimize.py
@@ -105,21 +105,22 @@ def minimize(
 
     # [vertex id, edge node 1 id, edge node 2 id, bond 1 id, bond 2 id, pi bond id, sign for r_matrix[bond id], horizontal PBC, top PBC]
     active_pi_bonds = lattice.get_active_pi_bonds()
-    active_pi_indices = np.zeros((len(active_pi_bonds), 11), dtype=np.int32)
+    active_pi_indices = np.zeros((len(active_pi_bonds), 12), dtype=np.int32)
     for i, pi_bond in enumerate(active_pi_bonds):
         assert (pi_bond.exists())
-        active_pi_indices[i][0] = pi_bond.get_vertex_node().get_id()
-        active_pi_indices[i][1] = pi_bond.get_edge_nodes()[0].get_id()
-        active_pi_indices[i][2] = pi_bond.get_edge_nodes()[1].get_id()
+        active_pi_indices[i][0] = pi_bond.get_vertex_node().get_id() # j
+        active_pi_indices[i][1] = pi_bond.get_edge_nodes()[0].get_id() # i
+        active_pi_indices[i][2] = pi_bond.get_edge_nodes()[1].get_id() # k
         active_pi_indices[i][3] = bond_to_idx[pi_bond.get_bond1()]
         active_pi_indices[i][4] = bond_to_idx[pi_bond.get_bond1()]
         active_pi_indices[i][5] = i
-        active_pi_indices[i][6] = 1 if pi_bond.get_bond1().get_node1() == pi_bond.get_vertex_node() else -1
+        active_pi_indices[i][6] = 1 if pi_bond.get_bond1().get_node1() == pi_bond.get_vertex_node() else -1 # sign 1
+        active_pi_indices[i][7] = 1 if pi_bond.get_bond2().get_node1() == pi_bond.get_vertex_node() else -1 # sign 2
         # PBC
-        active_pi_indices[i][7] = pi_bond.get_bond1().is_hor_pbc()
-        active_pi_indices[i][8] = pi_bond.get_bond1().is_top_pbc()
-        active_pi_indices[i][9] = pi_bond.get_bond2().is_hor_pbc()
-        active_pi_indices[i][10] = pi_bond.get_bond2().is_top_pbc()
+        active_pi_indices[i][8] = pi_bond.get_bond1().is_hor_pbc()
+        active_pi_indices[i][9] = pi_bond.get_bond1().is_top_pbc()
+        active_pi_indices[i][10] = pi_bond.get_bond2().is_hor_pbc()
+        active_pi_indices[i][11] = pi_bond.get_bond2().is_top_pbc()
 
     # 2. Convert lattice positions to matrices
     # Initial position of nodes, initial unit vectors for bonds, initial energy

diff --git a/energyminimization/solvers/newton.py b/energyminimization/solvers/newton.py
@@ -162,7 +162,7 @@ def tr_solve(f0, x, g, A, max_iter, tol, trust_radius):
 
 def trust_region_newton_cg(fun, x0, jac, hess, g_tol=1e-8):
     # --- Initialize trust-region ---
-    trust_radius = 1.0
+    trust_radius = 10.0
     max_trust_radius = 1e3
     eta = 0.15
 

diff --git a/energyminimization/solvers/solver.py b/energyminimization/solvers/solver.py
@@ -5,13 +5,15 @@
 from scipy.sparse import spmatrix, csr_matrix
 
 import energyminimization.energies.stretch_full as snl
+import energyminimization.energies.bend_full as bnl
 import energyminimization.matrix_helper as pos
 from energyminimization.matrix_helper import KMatrixResult
 from energyminimization.solvers.conjugate_gradient import conjugate_gradient
 from energyminimization.solvers.minimization_type import MinimizationType
 from energyminimization.solvers.newton import trust_region_newton_cg
 from energyminimization.transformations import Strain
 from lattice.abstract_lattice import AbstractLattice
+from tests.matrix_tests import test_gradient_hessian, plot_sparsity
 
 
 @dataclass
@@ -162,7 +164,11 @@ def compute_total_energy(pos_matrix: np.ndarray) -> float:
                                                      corrections=correction,
                                                      active_bond_indices=params.active_bond_indices,
                                                      active_bond_lengths=params.length_matrix)
-        bend_energy = 0
+        bend_energy = bnl.get_bend_energy(bend_mod=params.bend_mod,
+                                          pos_matrix=pos_matrix,
+                                          corrections=correction,
+                                          active_pi_indices=params.active_pi_indices,
+                                          orig_pi_angles=params.angle_matrix)
         return stretch_energy + bend_energy
 
     def compute_total_gradient(pos_matrix: np.ndarray) -> np.ndarray:
@@ -172,16 +178,28 @@ def compute_total_gradient(pos_matrix: np.ndarray) -> np.ndarray:
             corrections=correction,
             active_bond_indices=params.active_bond_indices,
             active_bond_lengths=params.length_matrix)
-        bend_grad = np.zeros_like(pos_matrix)
-        return stretch_grad.ravel()
+        bend_grad = bnl.get_bend_jacobian(
+            bend_mod=params.bend_mod,
+            pos_matrix=pos_matrix,
+            corrections=correction,
+            active_pi_indices=params.active_pi_indices,
+            orig_pi_angles=params.angle_matrix)
+        return stretch_grad.ravel() + bend_grad.ravel()
 
     def compute_total_hessian(pos_matrix: np.ndarray) -> csr_matrix:
-        return snl.get_nonlinear_stretch_hessian(
+        stretch_hess = snl.get_nonlinear_stretch_hessian(
             stretch_mod=params.stretch_mod,
             pos_matrix=pos_matrix,
             corrections=correction,
             active_bond_indices=params.active_bond_indices,
             active_bond_lengths=params.length_matrix)
+        bend_hess = bnl.get_bend_hessian(
+            bend_mod=params.bend_mod,
+            pos_matrix=pos_matrix,
+            corrections=correction,
+            active_pi_indices=params.active_pi_indices,
+            orig_pi_angles=params.angle_matrix)
+        return stretch_hess + bend_hess
 
     init_energy = compute_total_energy(pos_matrix=params.strained_pos)
 

diff --git a/energyminimization/transformations.py b/energyminimization/transformations.py
@@ -42,6 +42,11 @@ def __init__(self, gamma: float):
         super().__init__(gamma, transformation)
         self.name = "shear"
 
+class Random(Strain):
+    def __init__(self, gamma: float):
+        transformation = np.array([[1 + gamma * np.random.uniform(), gamma * np.random.uniform()], [gamma * np.random.uniform(), 1 + gamma * np.random.uniform()]])
+        super().__init__(gamma, transformation)
+        self.name = "random"
 
 class Dilate(Strain):
     def __init__(self, gamma: float):