zhejiangzhuque · oddoking · Feb 27, 2024 · Mar 7, 2024 · Mar 23, 2024 · Mar 25, 2024
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/aMat.py b/aMat.py
@@ -0,0 +1,44 @@
+import time
+
+import numpy as np
+
+# Define the add_mat function as previously described with corrections
+def add_mat_numpy(mark):
+    p = np.mean(0.5 * mark, axis=0)  # Ensure p is an ndarray for element-wise operations
+    t = np.ones((mark.shape[0], 1))
+    p_reshape = p.reshape(1,-1)
+    x = np.dot(t, p.reshape(1, -1))
+    m = mark - 2 * np.dot(t, p.reshape(1, -1))  # Corrected matrix multiplication
+    rel = np.dot(m, m.T)
+    q = 1 - p
+    sum_val = np.sum(2 * p * q)  # Element-wise multiplication
+    relf = rel / sum_val  # Element-wise division
+    return relf
+
+
+if __name__ == '__main__':
+    # Create fixed fake data as specified
+    mark_numpy = np.array([
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # First individual with all 0s
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Second individual with all 1s
+        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],  # Third individual with all 2s
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  # Fourth individual with all 0s
+    ])
+
+    start_time = time.time()
+    relf_numpy = add_mat_numpy(mark_numpy)
+    print("Additive genetic relationship matrix:\n", relf_numpy)
+    end_time = time.time()
+    print("NumPy version execution time:", end_time - start_time, "seconds")
+
+    # Create large test data for NumPy version
+    num_individuals = 20000
+    num_markers = 32000
+    # Each row alternates between 0, 1, and 2
+    mark_numpy_large = np.tile(np.array([0, 1, 2]), (num_individuals, num_markers // 3 + 1))[:, :num_markers]
+
+    # Time the execution with large data for NumPy version
+    start_time = time.time()
+    relf_numpy_large = add_mat_numpy(mark_numpy_large)
+    end_time = time.time()
+    print("NumPy version execution time:", end_time - start_time, "seconds")
diff --git a/a_mat_jax.py b/a_mat_jax.py
@@ -0,0 +1,72 @@
+import jax.numpy as jnp
+from jax import jit
+import time
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+@jit
+def add_mat_jax(mark):
+    """
+        mark use 0,1,2 encoding
+    Args:
+        mark:  shape is n,m
+
+    Returns:
+    """
+
+    p = jnp.mean(0.5 * mark, axis=0)
+    t = jnp.ones((mark.shape[0], 1))
+    m = mark - 2 * jnp.dot(t, p.reshape(1, -1))
+    rel = jnp.dot(m, m.T)
+    q = 1 - p
+    sum_val = jnp.sum(2 * p * q)
+    relf = rel / sum_val
+    return relf
+
+
+def small_test():
+    global _, start_time, end_time
+    # Fixed fake data for JAX
+    mark_jax = jnp.array([
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    ])
+    # Warm-up call to include JIT compilation time separately
+    _ = add_mat_jax(mark_jax).block_until_ready()
+    # Measure execution time after compilation
+    start_time = time.time()
+    relf_jax = add_mat_jax(mark_jax).block_until_ready()
+    print("Additive genetic relationship matrix:\n", relf_jax)
+    end_time = time.time()
+    print("JAX version execution time (after JIT compilation):", end_time - start_time, "seconds")
+    # Perform PCA
+    pca = PCA(n_components=3)  # Adjust n_components as needed
+    principal_components = pca.fit_transform(relf_jax)
+
+    # Plot the first two principal components
+    plt.figure(figsize=(8, 6))
+    plt.scatter(principal_components[:, 0], principal_components[:, 1])
+    plt.xlabel('Principal Component 1')
+    plt.ylabel('Principal Component 2')
+    plt.title('PCA of Relationship Matrix')
+    plt.show()
+
+
+def large_test():
+    global _, start_time, end_time
+    num_individuals = 20000
+    num_markers = 40000
+    mark_large = jnp.tile(jnp.array([0, 1, 2]), (num_individuals, num_markers // 3 + 1))[:, :num_markers]
+    # Warm-up JIT compilation with large data
+    _ = add_mat_jax(mark_large).block_until_ready()
+    # Time the execution with large data
+    start_time = time.time()
+    relf_large = add_mat_jax(mark_large).block_until_ready()
+    end_time = time.time()
+    print("JAX version execution time (after JIT compilation):", end_time - start_time, "seconds")
+
+    pca = PCA(n_components=3)  # Adjust n_components as needed
+    principal_components = pca.fit_transform(relf_large)
+
+