From 619ff585efc08320c31797c33425edab36d03d7c Mon Sep 17 00:00:00 2001
From: zhanglei <min.oddo@gmail.com>
Date: Tue, 27 Feb 2024 14:02:28 +0800
Subject: [PATCH 1/6] removing R code, sort eigen values and vectors before
 optimising

---
 rrBLUP.py | 85 ++++++++++++++++++-------------------------------------
 1 file changed, 28 insertions(+), 57 deletions(-)

diff --git a/rrBLUP.py b/rrBLUP.py
index f2fcadd..b34a113 100644
--- a/rrBLUP.py
+++ b/rrBLUP.py
@@ -376,12 +376,11 @@ def tcrossprod(A, B):
         try:
             svd_SZBt_u, svd_SZBt_d, v = np.linalg.svd(SZBt, full_matrices = 0)
         except:
-            svd_SZBt_u, svd_SZBt_d, v = np.linalg.svd(SZBt + 1e-10, full_matrices = 0)
-        QR = robjects.r['qr'](np.hstack((X, svd_SZBt_u)))
-        q = robjects.r['qr.Q'](QR, complete=True)
-        r = robjects.r['qr.R'](QR)
-        Q = q[:, p:n]
-        R = r[p:m, p:m]
+            svd_SZBt_u, svd_SZBt_d, v = np.linalg.svd(SZBt + 1e-10, full_matrices=0)
+        matrix_to_decompose = np.hstack((X, svd_SZBt_u))
+        Q_numpy, R_numpy = np.linalg.qr(matrix_to_decompose, mode='complete')
+        Q = Q_numpy[:, p:n]
+        R = R_numpy[p:m, p:m]
         try:
             ans = np.linalg.solve(np.transpose(R * R), svd_SZBt_d * svd_SZBt_d)
             theta = np.append(ans, np.zeros(n - p - m))
@@ -393,72 +392,44 @@ def tcrossprod(A, B):
             Hb =  tcrossprod(Z, Z) + offset * np.eye(n)
         else:
             Hb = tcrossprod(np.dot(Z, K), Z) + offset * np.eye(n)
-        tmp = robjects.r['eigen'](Hb)
-        Hb_system_values = list(tmp)[0]
-        Hb_system_vectors = list(tmp)[1]
+
+        eigenvalues, eigenvectors = np.linalg.eig(Hb)
+
+        # Sort the eigenvalues in descending order, and sort the eigenvectors to match
+        idx = eigenvalues.argsort()[::-1]
+        Hb_system_values = eigenvalues[idx]
+        Hb_system_vectors = eigenvectors[:, idx]
+
         phi = Hb_system_values - offset
         if np.nanmin(phi) < -1e-06:
             print("K not positive semi-definite.")
             return 
         U = Hb_system_vectors
         SHbS = np.dot(np.dot(S, Hb), S)
-        tmp = robjects.r['eigen'](SHbS)
-        SHbS_system_values = list(tmp)[0]
-        SHbS_system_vectors = list(tmp)[1]
+        eigenvalues, eigenvectors = np.linalg.eig(SHbS)
+
+        # Sort the eigenvalues in descending order, and sort the eigenvectors to match
+        idx = eigenvalues.argsort()[::-1]
+        SHbS_system_values = eigenvalues[idx]
+        SHbS_system_vectors = eigenvectors[:, idx]
+
         theta = SHbS_system_values[0:(n - p)] - offset
         Q = SHbS_system_vectors[:, 0:(n - p)]
     omega = crossprod(Q, y)
     omega_sq = omega * omega
     if method == 'ML':
-        #def f_ML(Lambda):
-        #    return n * np.log(np.nansum(omega_sq.reshape(-1) / (theta + Lambda))) + np.nansum(np.log(phi + Lambda))
-        #lambda_opt = fminbound(f_ML, bounds[0], bounds[1])
-        #objective = f_ML(lambda_opt)
         df = n
-        pd.DataFrame(np.array(bounds)).to_csv('bounds.csv')
-        pd.DataFrame(np.array([n])).to_csv('df.csv')
-        pd.DataFrame(phi).to_csv('phi.csv')
-        pd.DataFrame(theta).to_csv('theta.csv')
-        pd.DataFrame(omega_sq).to_csv('omega_sq.csv')
-        rcode = """
-        bounds <- c(read.csv('bounds.csv',row.names=1)[1,1], read.csv('bounds.csv',row.names=1)[2,1])
-        df <- read.csv('df.csv',row.names=1)[1,1]
-        phi <- read.csv('phi.csv',row.names=1)
-        theta <- read.csv('theta.csv',row.names=1)
-        omega_sq <- read.csv('omega_sq.csv',row.names=1)
-        f_ML <- function(Lambda) {
-            df * log(sum(omega.sq/(theta + lambda))) + sum(log(phi + lambda))
-        }
-        optimize(f_ML, interval = bounds)
-        """
-        tmp = robjects.r(rcode)
-        lambda_opt = list(tmp)[0]
-        objective = list(tmp)[1]
+        def f_ML(Lambda):
+            return n * np.log(np.nansum(omega_sq.reshape(-1) / (theta + Lambda))) + np.nansum(np.log(phi + Lambda))
+        lambda_opt = fminbound(f_ML, bounds[0], bounds[1])
+        objective = f_ML(lambda_opt)
         os.system('rm bounds.csv df.csv phi.csv theta.csv omega_sq.csv')
     else:
-        #def f_REML(Lambda):
-        #    return (n - p) * np.log(np.nansum(omega_sq.reshape(-1) / (theta + Lambda))) + np.nansum(np.log(theta + Lambda))
-        #lambda_opt = fminbound(f_REML, bounds[0], bounds[1])
-        #objective = f_REML(lambda_opt)
         df = n - p
-        pd.DataFrame(np.array(bounds)).to_csv('bounds.csv')
-        pd.DataFrame(np.array([n-p])).to_csv('df.csv')
-        pd.DataFrame(omega_sq).to_csv('omega_sq.csv')
-        pd.DataFrame(theta).to_csv('theta.csv')
-        rcode = """
-        bounds <- c(read.csv('bounds.csv',row.names=1)[1,1], read.csv('bounds.csv',row.names=1)[2,1])
-        df <- read.csv('df.csv',row.names=1)[1,1]
-        theta <- read.csv('theta.csv',row.names=1)
-        omega_sq <- read.csv('omega_sq.csv',row.names=1)
-        f_REML <- function(Lambda) {
-            df * log(sum(omega_sq/(theta + Lambda))) + sum(log(theta + Lambda))
-        }
-        optimize(f_REML, interval = bounds)
-        """
-        tmp = robjects.r(rcode)
-        lambda_opt = list(tmp)[0]
-        objective = list(tmp)[1]
-        os.system('rm bounds.csv df.csv omega_sq.csv theta.csv')
+        def f_REML(Lambda):
+            return (n - p) * np.log(np.nansum(omega_sq.reshape(-1) / (theta + Lambda))) + np.nansum(np.log(theta + Lambda))
+        lambda_opt = fminbound(f_REML, bounds[0], bounds[1])
+        objective = f_REML(lambda_opt)
     Vu_opt = np.nansum(omega_sq.reshape(-1) / (theta + lambda_opt)) / df
     Ve_opt = lambda_opt * Vu_opt
     Hinv = np.dot(U, (np.transpose(U) / (phi + lambda_opt).reshape(-1,1)))

From 3b162513ca520df4301fe66f498e4740637057c2 Mon Sep 17 00:00:00 2001
From: zhanglei <min.oddo@gmail.com>
Date: Thu, 7 Mar 2024 16:34:47 +0800
Subject: [PATCH 2/6] removing R code, sort eigen values and vectors before
 optimising

---
 aMat.py      | 43 +++++++++++++++++++++++++++++++++
 a_mat_jax.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 aMat.py
 create mode 100644 a_mat_jax.py

diff --git a/aMat.py b/aMat.py
new file mode 100644
index 0000000..753b5a6
--- /dev/null
+++ b/aMat.py
@@ -0,0 +1,43 @@
+import time
+
+import numpy as np
+
+# Define the add_mat function as previously described with corrections
+def add_mat_numpy(mark):
+    p = np.mean(0.5 * mark, axis=0)  # Ensure p is an ndarray for element-wise operations
+    t = np.ones((mark.shape[0], 1))
+    p_reshape = p.reshape(1,-1)
+    x = np.dot(t, p.reshape(1, -1))
+    m = mark - 2 * np.dot(t, p.reshape(1, -1))  # Corrected matrix multiplication
+    rel = np.dot(m, m.T)
+    q = 1 - p
+    sum_val = np.sum(2 * p * q)  # Element-wise multiplication
+    relf = rel / sum_val  # Element-wise division
+    return relf
+
+# Create fixed fake data as specified
+mark_numpy = np.array([
+    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # First individual with all 0s
+    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Second individual with all 1s
+    [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],  # Third individual with all 2s
+    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]   # Fourth individual with all 0s
+])
+
+start_time = time.time()
+relf_numpy = add_mat_numpy(mark_numpy)
+print("Additive genetic relationship matrix:\n", relf_numpy)
+end_time = time.time()
+print("NumPy version execution time:", end_time - start_time, "seconds")
+
+
+# Create large test data for NumPy version
+num_individuals = 20000
+num_markers = 32000
+# Each row alternates between 0, 1, and 2
+mark_numpy_large = np.tile(np.array([0, 1, 2]), (num_individuals, num_markers // 3 + 1))[:, :num_markers]
+
+# Time the execution with large data for NumPy version
+start_time = time.time()
+relf_numpy_large = add_mat_numpy(mark_numpy_large)
+end_time = time.time()
+print("NumPy version execution time:", end_time - start_time, "seconds")
diff --git a/a_mat_jax.py b/a_mat_jax.py
new file mode 100644
index 0000000..e9b29f4
--- /dev/null
+++ b/a_mat_jax.py
@@ -0,0 +1,67 @@
+import jax.numpy as jnp
+from jax import jit
+import time
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+@jit
+def add_mat_jax(mark):
+    p = jnp.mean(0.5 * mark, axis=0)
+    t = jnp.ones((mark.shape[0], 1))
+    m = mark - 2 * jnp.dot(t, p.reshape(1, -1))
+    rel = jnp.dot(m, m.T)
+    q = 1 - p
+    sum_val = jnp.sum(2 * p * q)
+    relf = rel / sum_val
+    return relf
+
+
+def small_test():
+    global _, start_time, end_time
+    # Fixed fake data for JAX
+    mark_jax = jnp.array([
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    ])
+    # Warm-up call to include JIT compilation time separately
+    _ = add_mat_jax(mark_jax).block_until_ready()
+    # Measure execution time after compilation
+    start_time = time.time()
+    relf_jax = add_mat_jax(mark_jax).block_until_ready()
+    print("Additive genetic relationship matrix:\n", relf_jax)
+    end_time = time.time()
+    print("JAX version execution time (after JIT compilation):", end_time - start_time, "seconds")
+    # Perform PCA
+    pca = PCA(n_components=3)  # Adjust n_components as needed
+    principal_components = pca.fit_transform(relf_jax)
+
+    # Plot the first two principal components
+    plt.figure(figsize=(8, 6))
+    plt.scatter(principal_components[:, 0], principal_components[:, 1])
+    plt.xlabel('Principal Component 1')
+    plt.ylabel('Principal Component 2')
+    plt.title('PCA of Relationship Matrix')
+    plt.show()
+
+
+def large_test():
+    global _, start_time, end_time
+    num_individuals = 20000
+    num_markers = 40000
+    mark_large = jnp.tile(jnp.array([0, 1, 2]), (num_individuals, num_markers // 3 + 1))[:, :num_markers]
+    # Warm-up JIT compilation with large data
+    _ = add_mat_jax(mark_large).block_until_ready()
+    # Time the execution with large data
+    start_time = time.time()
+    relf_large = add_mat_jax(mark_large).block_until_ready()
+    end_time = time.time()
+    print("JAX version execution time (after JIT compilation):", end_time - start_time, "seconds")
+
+    pca = PCA(n_components=3)  # Adjust n_components as needed
+    principal_components = pca.fit_transform(relf_large)
+
+#small_test()
+
+large_test()
+

From fe235324b80e5d5b94456f0314d62e7156c63be2 Mon Sep 17 00:00:00 2001
From: zhanglei <min.oddo@gmail.com>
Date: Sat, 23 Mar 2024 09:17:47 +0800
Subject: [PATCH 3/6] update a kinship

---
 aMat.py                     |  45 ++---
 a_mat_jax.py                |  11 +-
 geonomic_kinship_test.ipynb | 356 ++++++++++++++++++++++++++++++++++++
 3 files changed, 387 insertions(+), 25 deletions(-)
 create mode 100644 geonomic_kinship_test.ipynb

diff --git a/aMat.py b/aMat.py
index 753b5a6..d63a8af 100644
--- a/aMat.py
+++ b/aMat.py
@@ -15,29 +15,30 @@ def add_mat_numpy(mark):
     relf = rel / sum_val  # Element-wise division
     return relf
 
-# Create fixed fake data as specified
-mark_numpy = np.array([
-    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # First individual with all 0s
-    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Second individual with all 1s
-    [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],  # Third individual with all 2s
-    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]   # Fourth individual with all 0s
-])
 
-start_time = time.time()
-relf_numpy = add_mat_numpy(mark_numpy)
-print("Additive genetic relationship matrix:\n", relf_numpy)
-end_time = time.time()
-print("NumPy version execution time:", end_time - start_time, "seconds")
+if __name__ == '__main__':
+    # Create fixed fake data as specified
+    mark_numpy = np.array([
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # First individual with all 0s
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Second individual with all 1s
+        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],  # Third individual with all 2s
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  # Fourth individual with all 0s
+    ])
 
+    start_time = time.time()
+    relf_numpy = add_mat_numpy(mark_numpy)
+    print("Additive genetic relationship matrix:\n", relf_numpy)
+    end_time = time.time()
+    print("NumPy version execution time:", end_time - start_time, "seconds")
 
-# Create large test data for NumPy version
-num_individuals = 20000
-num_markers = 32000
-# Each row alternates between 0, 1, and 2
-mark_numpy_large = np.tile(np.array([0, 1, 2]), (num_individuals, num_markers // 3 + 1))[:, :num_markers]
+    # Create large test data for NumPy version
+    num_individuals = 20000
+    num_markers = 32000
+    # Each row alternates between 0, 1, and 2
+    mark_numpy_large = np.tile(np.array([0, 1, 2]), (num_individuals, num_markers // 3 + 1))[:, :num_markers]
 
-# Time the execution with large data for NumPy version
-start_time = time.time()
-relf_numpy_large = add_mat_numpy(mark_numpy_large)
-end_time = time.time()
-print("NumPy version execution time:", end_time - start_time, "seconds")
+    # Time the execution with large data for NumPy version
+    start_time = time.time()
+    relf_numpy_large = add_mat_numpy(mark_numpy_large)
+    end_time = time.time()
+    print("NumPy version execution time:", end_time - start_time, "seconds")
diff --git a/a_mat_jax.py b/a_mat_jax.py
index e9b29f4..3b86dfe 100644
--- a/a_mat_jax.py
+++ b/a_mat_jax.py
@@ -5,6 +5,14 @@
 import matplotlib.pyplot as plt
 @jit
 def add_mat_jax(mark):
+    """
+        mark use 0,1,2 encoding
+    Args:
+        mark:  shape is n,m
+
+    Returns:
+    """
+
     p = jnp.mean(0.5 * mark, axis=0)
     t = jnp.ones((mark.shape[0], 1))
     m = mark - 2 * jnp.dot(t, p.reshape(1, -1))
@@ -61,7 +69,4 @@ def large_test():
     pca = PCA(n_components=3)  # Adjust n_components as needed
     principal_components = pca.fit_transform(relf_large)
 
-#small_test()
-
-large_test()
 
diff --git a/geonomic_kinship_test.ipynb b/geonomic_kinship_test.ipynb
new file mode 100644
index 0000000..7dc6cfb
--- /dev/null
+++ b/geonomic_kinship_test.ipynb
@@ -0,0 +1,356 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T00:51:49.603854400Z",
+     "start_time": "2024-03-23T00:51:45.861543100Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Index: 2763 entries, 112 to 2396\n",
+      "Columns: 9642 entries, Genotype to 10_149535062_G\n",
+      "dtypes: int64(9641), object(1)\n",
+      "memory usage: 203.3+ MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import time\n",
+    "import rrBLUP\n",
+    "df_geno = pd.read_csv('data/23TC1_YLD14_SU_geno_final.txt', sep='\\t')\n",
+    "df_geno = df_geno.sort_values(by=['Genotype'])\n",
+    "print(df_geno.info())\n",
+    "df_pheno = pd.read_csv(\"data/23TC1_YLD14_SU_pheno_final.txt\", sep='\\t')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Index: 2763 entries, 1929 to 179\n",
+      "Data columns (total 2 columns):\n",
+      " #   Column    Non-Null Count  Dtype  \n",
+      "---  ------    --------------  -----  \n",
+      " 0   Genotype  2763 non-null   object \n",
+      " 1   YLD14_SU  2763 non-null   float64\n",
+      "dtypes: float64(1), object(1)\n",
+      "memory usage: 64.8+ KB\n",
+      "None\n",
+      "(2763, 9641)\n",
+      "(2763, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "df_pheno_ordered = df_pheno.sort_values(by=['Genotype'])\n",
+    "print(df_pheno_ordered.info())\n",
+    "train_x = np.array(df_geno.drop('Genotype', axis=1))\n",
+    "train_y = np.array(df_pheno_ordered['YLD14_SU']).reshape(-1, 1)\n",
+    "train_x = train_x - 1\n",
+    "print(train_x.shape)\n",
+    "print(train_y.shape)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T00:52:01.713868700Z",
+     "start_time": "2024-03-23T00:52:01.567439200Z"
+    }
+   },
+   "id": "4ea00660ee17a7da",
+   "execution_count": 2
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "a_1 = rrBLUP.A_mat(train_x)\n",
+    "a_1"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T00:53:45.474818Z",
+     "start_time": "2024-03-23T00:52:44.054145500Z"
+    }
+   },
+   "id": "1f778cf653c205a5",
+   "execution_count": 3
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "numpy.ndarray"
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(a_1)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:04:41.642221Z",
+     "start_time": "2024-03-23T01:04:41.624443300Z"
+    }
+   },
+   "id": "eb6e2174ae759be4",
+   "execution_count": 22
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "import aMat\n",
+    "a_2 = aMat.add_mat_numpy(train_x+1)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:10:23.791477300Z",
+     "start_time": "2024-03-23T01:10:22.853119Z"
+    }
+   },
+   "id": "22084ea87f9364b1",
+   "execution_count": 27
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "numpy.ndarray"
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import a_mat_jax\n",
+    "import jax\n",
+    "a_3 = a_mat_jax.add_mat_jax(train_x+1)\n",
+    "a_3 = np.asarray(a_3)\n",
+    "type(a_3)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:16:10.802205200Z",
+     "start_time": "2024-03-23T01:16:10.233960Z"
+    }
+   },
+   "id": "1ed66c383a884e0e",
+   "execution_count": 38
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Arrays are close: True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Using allclose to compare\n",
+    "close = np.allclose(a_1, a_2, rtol=0.0001, atol=0.0001)\n",
+    "\n",
+    "print(f\"Arrays are close: {close}\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:10:45.248073700Z",
+     "start_time": "2024-03-23T01:10:45.146571300Z"
+    }
+   },
+   "id": "bdbcc53bb039085a",
+   "execution_count": 30
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running Time: 17.74 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "start_time = time.time()\n",
+    "g_result = rrBLUP.mixed_solve(y = train_y, K = a_1)\n",
+    "end_time = time.time()\n",
+    "#print(g_result)\n",
+    "print('Running Time: '+str(round(end_time-start_time, 2))+' s')\n",
+    "train_pred = g_result['u'] "
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:01:35.992522700Z",
+     "start_time": "2024-03-23T01:01:18.250576300Z"
+    }
+   },
+   "id": "d32dd35631566b26",
+   "execution_count": 15
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "array([[-17.59836984],\n       [-20.95136377],\n       [ -0.13633499],\n       ...,\n       [ -4.65200707],\n       [-20.01554517],\n       [-20.1281834 ]])"
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_pred"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:01:45.967475Z",
+     "start_time": "2024-03-23T01:01:45.954682900Z"
+    }
+   },
+   "id": "3d0c303c9230d183",
+   "execution_count": 16
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "K not positive semi-definite.\n",
+      "Running Time: 8.41 s\n"
+     ]
+    },
+    {
+     "ename": "TypeError",
+     "evalue": "'NoneType' object is not subscriptable",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[1;31mTypeError\u001B[0m                                 Traceback (most recent call last)",
+      "Cell \u001B[1;32mIn[39], line 6\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[38;5;66;03m#print(g_result)\u001B[39;00m\n\u001B[0;32m      5\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mRunning Time: \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m+\u001B[39m\u001B[38;5;28mstr\u001B[39m(\u001B[38;5;28mround\u001B[39m(end_time\u001B[38;5;241m-\u001B[39mstart_time, \u001B[38;5;241m2\u001B[39m))\u001B[38;5;241m+\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m s\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[1;32m----> 6\u001B[0m train_pred_new \u001B[38;5;241m=\u001B[39m \u001B[43mg_result_new\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mu\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m \n\u001B[0;32m      7\u001B[0m train_pred_new\n",
+      "\u001B[1;31mTypeError\u001B[0m: 'NoneType' object is not subscriptable"
+     ]
+    }
+   ],
+   "source": [
+    "start_time = time.time()\n",
+    "g_result_new = rrBLUP.mixed_solve(y = train_y, K = a_3)\n",
+    "end_time = time.time()\n",
+    "#print(g_result)\n",
+    "print('Running Time: '+str(round(end_time-start_time, 2))+' s')\n",
+    "train_pred_new = g_result_new['u'] \n",
+    "train_pred_new"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:16:26.084069700Z",
+     "start_time": "2024-03-23T01:16:17.660955900Z"
+    }
+   },
+   "id": "557f3d43c301ad45",
+   "execution_count": 39
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'numpy.ndarray'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax.numpy as jnp\n",
+    "import numpy as np\n",
+    "\n",
+    "# Create a JAX array\n",
+    "jax_array = jnp.array([1, 2, 3])\n",
+    "\n",
+    "# Convert to a NumPy array\n",
+    "numpy_array = np.asarray(jax_array)\n",
+    "\n",
+    "print(type(numpy_array))  # <class 'numpy.ndarray'>\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:15:38.449024800Z",
+     "start_time": "2024-03-23T01:15:38.443083500Z"
+    }
+   },
+   "id": "2fdea088f03fd77",
+   "execution_count": 37
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "79a0fb4f44e8b0df"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 19eb1917ff9a8bfaf012b0592fdc4c9f15eead39 Mon Sep 17 00:00:00 2001
From: zhanglei <min.oddo@gmail.com>
Date: Mon, 25 Mar 2024 17:22:21 +0800
Subject: [PATCH 4/6] gblup and arseml test using gblup

---
 arseml_geonomic_kinship_test.ipynb | 608 +++++++++++++++++++++++++++++
 blup_animal.ipynb                  | 289 ++++++++++++++
 gblup.py                           |  36 ++
 geonomic_kinship_test.ipynb        | 356 -----------------
 4 files changed, 933 insertions(+), 356 deletions(-)
 create mode 100644 arseml_geonomic_kinship_test.ipynb
 create mode 100644 blup_animal.ipynb
 create mode 100644 gblup.py
 delete mode 100644 geonomic_kinship_test.ipynb

diff --git a/arseml_geonomic_kinship_test.ipynb b/arseml_geonomic_kinship_test.ipynb
new file mode 100644
index 0000000..b62700a
--- /dev/null
+++ b/arseml_geonomic_kinship_test.ipynb
@@ -0,0 +1,608 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T00:43:12.613827100Z",
+     "start_time": "2024-03-25T00:43:00.090048600Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Index: 2763 entries, 112 to 2396\n",
+      "Columns: 9642 entries, Genotype to 10_149535062_G\n",
+      "dtypes: int64(9641), object(1)\n",
+      "memory usage: 203.3+ MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import time\n",
+    "import rrBLUP as p_rrBlup\n",
+    "from rpy2.robjects.packages import importr\n",
+    "from rpy2.robjects import Formula as f\n",
+    "from rpy2.robjects import pandas2ri\n",
+    "import rpy2.robjects as ro\n",
+    "pandas2ri.activate()\n",
+    "rrBLUP = importr('rrBLUP')\n",
+    "df_geno = pd.read_csv('data/23TC1_YLD14_SU_geno_final.txt', sep='\\t')\n",
+    "df_geno = df_geno.sort_values(by=['Genotype'])\n",
+    "print(df_geno.info())\n",
+    "df_pheno = pd.read_csv(\"data/23TC1_YLD14_SU_pheno_final.txt\", sep='\\t')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Index: 2763 entries, 1929 to 179\n",
+      "Data columns (total 2 columns):\n",
+      " #   Column    Non-Null Count  Dtype  \n",
+      "---  ------    --------------  -----  \n",
+      " 0   Genotype  2763 non-null   object \n",
+      " 1   YLD14_SU  2763 non-null   float64\n",
+      "dtypes: float64(1), object(1)\n",
+      "memory usage: 64.8+ KB\n",
+      "None\n",
+      "(2763, 9641)\n",
+      "(2763, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "df_pheno_ordered = df_pheno.sort_values(by=['Genotype'])\n",
+    "print(df_pheno_ordered.info())\n",
+    "train_x = np.array(df_geno.drop('Genotype', axis=1))\n",
+    "train_y = np.array(df_pheno_ordered['YLD14_SU']).reshape(-1, 1)\n",
+    "train_x = train_x - 1\n",
+    "print(train_x.shape)\n",
+    "print(train_y.shape)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T00:43:14.833783200Z",
+     "start_time": "2024-03-25T00:43:14.642438500Z"
+    }
+   },
+   "id": "4ea00660ee17a7da",
+   "execution_count": 2
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "array([[ 0.96460593,  0.40092433,  0.25768902, ..., -0.30201703,\n        -0.23842051, -0.23847136],\n       [ 0.40092433,  0.81633067,  0.42894227, ..., -0.22019376,\n        -0.27356791, -0.27299987],\n       [ 0.25768902,  0.42894227,  0.75699359, ..., -0.24150724,\n        -0.2534156 , -0.25408534],\n       ...,\n       [-0.30201703, -0.22019376, -0.24150724, ...,  0.70114823,\n         0.31388157,  0.31383073],\n       [-0.23842051, -0.27356791, -0.2534156 , ...,  0.31388157,\n         0.89487222,  0.89420248],\n       [-0.23847136, -0.27299987, -0.25408534, ...,  0.31383073,\n         0.89420248,  0.89569887]])"
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a_1 = rrBLUP.A_mat(train_x)\n",
+    "a_1"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T00:43:53.731595900Z",
+     "start_time": "2024-03-25T00:43:16.866956900Z"
+    }
+   },
+   "id": "1f778cf653c205a5",
+   "execution_count": 3
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "numpy.ndarray"
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(a_1)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:35:43.596344300Z",
+     "start_time": "2024-03-23T01:35:43.592192800Z"
+    }
+   },
+   "id": "eb6e2174ae759be4",
+   "execution_count": 52
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "import aMat\n",
+    "a_2 = aMat.add_mat_numpy(train_x+1)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T00:44:38.861429500Z",
+     "start_time": "2024-03-25T00:44:37.795651Z"
+    }
+   },
+   "id": "22084ea87f9364b1",
+   "execution_count": 5
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "numpy.ndarray"
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import a_mat_jax\n",
+    "import jax\n",
+    "a_3 = a_mat_jax.add_mat_jax(train_x+1)\n",
+    "a_3 = np.asarray(a_3)\n",
+    "type(a_3)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:35:48.457885400Z",
+     "start_time": "2024-03-23T01:35:47.864412600Z"
+    }
+   },
+   "id": "1ed66c383a884e0e",
+   "execution_count": 53
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Arrays are close: True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Using allclose to compare\n",
+    "close = np.allclose(a_2, a_3, rtol=0.00001, atol=0.00001)\n",
+    "\n",
+    "print(f\"Arrays are close: {close}\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-23T01:35:50.365137700Z",
+     "start_time": "2024-03-23T01:35:50.287741600Z"
+    }
+   },
+   "id": "bdbcc53bb039085a",
+   "execution_count": 54
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running Time: 20.03 s\n"
+     ]
+    },
+    {
+     "ename": "AttributeError",
+     "evalue": "'dict' object has no attribute 'rx2'",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[1;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
+      "Cell \u001B[1;32mIn[4], line 7\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mRunning Time: \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m+\u001B[39m\u001B[38;5;28mstr\u001B[39m(\u001B[38;5;28mround\u001B[39m(end_time\u001B[38;5;241m-\u001B[39mstart_time, \u001B[38;5;241m2\u001B[39m))\u001B[38;5;241m+\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m s\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m      6\u001B[0m \u001B[38;5;66;03m#train_pred = g_result['u'] \u001B[39;00m\n\u001B[1;32m----> 7\u001B[0m train_pred \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marray(\u001B[43mg_result\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrx2\u001B[49m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mu\u001B[39m\u001B[38;5;124m'\u001B[39m))\n",
+      "\u001B[1;31mAttributeError\u001B[0m: 'dict' object has no attribute 'rx2'"
+     ]
+    }
+   ],
+   "source": [
+    "start_time = time.time()\n",
+    "g_result = rrBLUP.mixed_solve(y = train_y, K = a_1)\n",
+    "end_time = time.time()\n",
+    "#print(g_result)\n",
+    "print('Running Time: '+str(round(end_time-start_time, 2))+' s')\n",
+    "#train_pred = g_result['u'] \n",
+    "train_pred = np.array(g_result.rx2('u'))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T00:44:26.308267100Z",
+     "start_time": "2024-03-25T00:44:05.897407300Z"
+    }
+   },
+   "id": "d32dd35631566b26",
+   "execution_count": 4
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "array([[689.5701105],\n       [615.9386355],\n       [661.3197776],\n       ...,\n       [692.8649345],\n       [709.3628695],\n       [684.4678623]])"
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_y"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:34:24.962192500Z",
+     "start_time": "2024-03-25T06:34:24.951436400Z"
+    }
+   },
+   "id": "3d0c303c9230d183",
+   "execution_count": 39
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'Vu': 462.6130366195777, 'Ve': 969.4607183018168, 'beta': array([[703.72274814]]), 'u': array([[-17.58200532],\n",
+      "       [-21.2687656 ],\n",
+      "       [ -0.33706766],\n",
+      "       ...,\n",
+      "       [ -4.68147966],\n",
+      "       [-19.89370123],\n",
+      "       [-20.12403582]]), 'LL': -13572.19170915865}\n",
+      "Running Time: 17.25 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "start_time = time.time()\n",
+    "g_result = p_rrBlup.mixed_solve(y = train_y, K = a_2)\n",
+    "end_time = time.time()\n",
+    "print(g_result)\n",
+    "print('Running Time: '+str(round(end_time-start_time, 2))+' s')\n",
+    "#train_pred = g_result['u'] \n",
+    "rrblup_u = g_result['u']"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:52:04.948507200Z",
+     "start_time": "2024-03-25T06:51:47.691501200Z"
+    }
+   },
+   "id": "557f3d43c301ad45",
+   "execution_count": 64
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "array([[-17.59836984],\n       [-20.95136378],\n       [ -0.13633498],\n       ...,\n       [ -4.65200707],\n       [-20.01554518],\n       [-20.12818341]])"
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rrblup_u"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T00:45:22.463225400Z",
+     "start_time": "2024-03-25T00:45:22.454325500Z"
+    }
+   },
+   "id": "2fdea088f03fd77",
+   "execution_count": 7
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Correlation Coefficient (相关系数): 0.9999505295951803\n"
+     ]
+    }
+   ],
+   "source": [
+    "import gblup\n",
+    "g_result = gblup.solve_mixed_gblup_model(n_obs=len(train_y), obs_ids=[i for i in range(len(train_y))], y_values=train_y, ainv=np.linalg.inv(a_2))\n",
+    "gblue_values = g_result[1:,]\n",
+    "from scipy.stats import pearsonr\n",
+    "\n",
+    "rrblup_u_1d = rrblup_u.flatten()\n",
+    "gblue_values_1d = gblue_values.flatten()\n",
+    "\n",
+    "# Now you can calculate the correlation coefficient\n",
+    "correlation_coef = np.corrcoef(rrblup_u_1d, gblue_values_1d)[0, 1]\n",
+    "print(\"Correlation Coefficient (相关系数):\", correlation_coef)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T08:34:01.640160900Z",
+     "start_time": "2024-03-25T08:34:00.704205500Z"
+    }
+   },
+   "id": "173e3d457a409f8e",
+   "execution_count": 76
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Online License checked out Mon Mar 25 13:40:58 2024\n"
+     ]
+    }
+   ],
+   "source": [
+    "asreml = importr('asreml')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T05:40:58.749306200Z",
+     "start_time": "2024-03-25T05:40:56.371246400Z"
+    }
+   },
+   "id": "79a0fb4f44e8b0df",
+   "execution_count": 8
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "R[write to console]: Reciprocal conditional number for original matrix is: 1.59179018378963e-06\n",
+      "\n",
+      "R[write to console]: Reciprocal conditional number for inverted matrix is: 1.24079893155397e-06\n",
+      "\n",
+      "R[write to console]: Inverse of matrix G does not appear to be ill-conditioned.\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "array([[ 1.00000000e+00,  1.00000000e+00,  1.04168312e+01],\n       [ 2.00000000e+00,  1.00000000e+00,  5.02454510e-01],\n       [ 2.00000000e+00,  2.00000000e+00,  2.62023822e+01],\n       ...,\n       [ 2.76300000e+03,  2.76100000e+03, -2.16311000e-03],\n       [ 2.76300000e+03,  2.76200000e+03, -2.17669057e+01],\n       [ 2.76300000e+03,  2.76300000e+03,  2.63256497e+01]])"
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from rpy2.robjects import r, pandas2ri\n",
+    "# Transfer G_matrix to R\n",
+    "r.assign('ID', df_pheno_ordered['Genotype'])\n",
+    "r.assign('Gmat', a_2)\n",
+    "r('''\n",
+    "# Adjust G_matrix and invert\n",
+    "library(ASRgenomics)\n",
+    "rownames(Gmat) = ID\n",
+    "colnames(Gmat) = ID\n",
+    "diag(Gmat) <- diag(Gmat) + 0.01\n",
+    "ginv = G.inverse(Gmat,sparseform = T)$Ginv.sparse''')\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:48:06.249000300Z",
+     "start_time": "2024-03-25T06:47:55.518842300Z"
+    }
+   },
+   "id": "b3e7ef8025b116",
+   "execution_count": 55
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "\n",
+    "df_pheno_ordered['Genotype'] = df_pheno_ordered['Genotype'].astype('category')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:17:55.988217Z",
+     "start_time": "2024-03-25T06:17:55.974047700Z"
+    }
+   },
+   "id": "acabf46be808b8ab",
+   "execution_count": 18
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "import rpy2.robjects as robjects\n",
+    "\n",
+    "modg = asreml.asreml(\n",
+    "fixed = f('YLD14_SU ~ 1'),\n",
+    "random = f('~vm(Genotype,ginv)'),\n",
+    "dense = f('~vm(Genotype,ginv)'),\n",
+    "residual =f('~idv(units)'), \n",
+    "workspace = \"2Gb\",\n",
+    "data = df_pheno_ordered)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:49:06.121751300Z",
+     "start_time": "2024-03-25T06:48:43.097036700Z"
+    }
+   },
+   "id": "5e0bb540a4a4c72c",
+   "execution_count": 56
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                              component std.error  z.ratio bound %ch\n",
+      "vm(Genotype, G_matrix_inv) 5.972789e-06        NA       NA     B  NA\n",
+      "units!units                1.170023e+03  31.48448 37.16189     P   0\n",
+      "units!R                    1.000000e+00        NA       NA     F   0\n"
+     ]
+    }
+   ],
+   "source": [
+    "summ = asreml.summary_asreml(modg)\n",
+    "print(summ.rx2('varcomp'))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:20:01.616208300Z",
+     "start_time": "2024-03-25T06:20:01.609791200Z"
+    }
+   },
+   "id": "305ddc385d71eed4",
+   "execution_count": 21
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "asreml_u = (modg.rx2['coefficients'].rx2['random'])\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:50:25.449294Z",
+     "start_time": "2024-03-25T06:50:25.444424600Z"
+    }
+   },
+   "id": "5b5a60ff985757ff",
+   "execution_count": 58
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "array([[-17.5657198 ],\n       [-21.58628211],\n       [ -0.53769809],\n       ...,\n       [ -4.71096706],\n       [-19.77202265],\n       [-20.12006455]])"
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "close = np.allclose(rrblup_u, asreml_u, rtol=0.00001, atol=0.00001)\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:51:13.081435600Z",
+     "start_time": "2024-03-25T06:51:13.077258400Z"
+    }
+   },
+   "id": "e2d6659412f49f53",
+   "execution_count": 63
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Correlation Coefficient (相关系数): 0.9999217812834612\n"
+     ]
+    }
+   ],
+   "source": [
+    "from scipy.stats import pearsonr\n",
+    "\n",
+    "rrblup_u_1d = rrblup_u.flatten()\n",
+    "asreml_u_1d = asreml_u.flatten()\n",
+    "\n",
+    "# Now you can calculate the correlation coefficient\n",
+    "correlation_coef = np.corrcoef(rrblup_u_1d, asreml_u_1d)[0, 1]\n",
+    "print(\"Correlation Coefficient (相关系数):\", correlation_coef)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T06:55:01.965913100Z",
+     "start_time": "2024-03-25T06:55:01.955398600Z"
+    }
+   },
+   "id": "d0553370feb1b95f",
+   "execution_count": 70
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/blup_animal.ipynb b/blup_animal.ipynb
new file mode 100644
index 0000000..680e41c
--- /dev/null
+++ b/blup_animal.ipynb
@@ -0,0 +1,289 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T09:19:22.281546600Z",
+     "start_time": "2024-03-25T09:19:22.075735900Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[1.    0.    0.    0.5   0.    0.5   0.25  0.25 ]\n",
+      " [0.    1.    0.    0.    0.5   0.5   0.25  0.25 ]\n",
+      " [0.    0.    1.    0.    0.5   0.    0.25  0.5  ]\n",
+      " [0.5   0.    0.    1.    0.    0.25  0.5   0.125]\n",
+      " [0.    0.5   0.5   0.    1.    0.25  0.5   0.375]\n",
+      " [0.5   0.5   0.    0.25  0.25  1.    0.25  0.5  ]\n",
+      " [0.25  0.25  0.25  0.5   0.5   0.25  1.    0.25 ]\n",
+      " [0.25  0.25  0.5   0.125 0.375 0.5   0.25  1.   ]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from rpy2.robjects import pandas2ri, r\n",
+    "from rpy2.robjects.packages import importr\n",
+    "pandas2ri.activate()\n",
+    "import pandas as pd\n",
+    "# Load qgenet package in R\n",
+    "qgenet = importr('qgenet')\n",
+    "\n",
+    "# Define your pedigree data\n",
+    "ped_data = {\n",
+    "    'ID': [1, 2, 3, 4, 5, 6, 7, 8],\n",
+    "    'SIRE': [0, 0, 0, 1, 3, 1, 4, 3],\n",
+    "    'DAM': [0, 0, 0, 0, 2, 2, 5, 6]\n",
+    "}\n",
+    "\n",
+    "# Convert your pedigree data to an R dataframe\n",
+    "ped_df = pandas2ri.py2rpy_pandasdataframe(pd.DataFrame(ped_data))\n",
+    "\n",
+    "# Compute the A matrix using qgenet package in R\n",
+    "A = qgenet.amatrix(ped_df)\n",
+    "\n",
+    "# Invert the A matrix\n",
+    "#ainv = np.array(r.solve(A))\n",
+    "ainv = np.linalg.inv(A)\n",
+    "print(np.array(A))\n",
+    "#np.allclose(ainv,e)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Solution: [[ 3.97256482]\n",
+      " [ 0.12596923]\n",
+      " [-0.15757814]\n",
+      " [-0.024685  ]\n",
+      " [ 0.14742549]\n",
+      " [-0.31838503]\n",
+      " [ 0.05387085]\n",
+      " [-0.16289678]\n",
+      " [ 0.21716138]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example fixed effects (mean) and random effects (animal genetic effects) in Python\n",
+    "X = np.ones((5,1))  # Assuming 5 observations and only a mean effect\n",
+    "Z = np.array([\n",
+    "    [0, 0, 0, 1, 0, 0, 0, 0],  # Observation for individual 4\n",
+    "    [0, 0, 0, 0, 1, 0, 0, 0],  # Observation for individual 5\n",
+    "    [0, 0, 0, 0, 0, 1, 0, 0],  # Observation for individual 6\n",
+    "    [0, 0, 0, 0, 0, 0, 1, 0],  # Observation for individual 7\n",
+    "    [0, 0, 0, 0, 0, 0, 0, 1]   # Observation for individual 8\n",
+    "])\n",
+    "# Observations\n",
+    "y = np.array([4.5, 2.9, 3.9, 3.5, 5.0]).reshape(-1, 1)\n",
+    "\n",
+    "# Matrix operations\n",
+    "XX = X.T @ X\n",
+    "XZ = X.T @ Z\n",
+    "ZX = Z.T @ X\n",
+    "ZZ = Z.T @ Z\n",
+    "ZZA = ZZ + ainv * 2  # Adjust based on your specific model\n",
+    "Xy = X.T @ y\n",
+    "Zy = Z.T @ y\n",
+    "\n",
+    "# Assemble mixed model equations\n",
+    "mme = np.vstack([\n",
+    "    np.hstack([XX, XZ]),\n",
+    "    np.hstack([ZX, ZZA])\n",
+    "])\n",
+    "\n",
+    "# Solution vector\n",
+    "sol = np.linalg.solve(mme, np.vstack([Xy, Zy]))\n",
+    "\n",
+    "print(\"Solution:\", sol)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T08:25:27.920019400Z",
+     "start_time": "2024-03-25T08:25:27.910221100Z"
+    }
+   },
+   "id": "2d0bde54f8d10808",
+   "execution_count": 25
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Solution vector: [[ 3.97256482]\n",
+      " [ 0.12596923]\n",
+      " [-0.15757814]\n",
+      " [-0.024685  ]\n",
+      " [ 0.14742549]\n",
+      " [-0.31838503]\n",
+      " [ 0.05387085]\n",
+      " [-0.16289678]\n",
+      " [ 0.21716138]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import gblup\n",
+    "# Number of observations\n",
+    "n_obs = 5\n",
+    "\n",
+    "# IDs of individuals with observations (assuming these are 1-indexed and correspond to the order in 'ainv')\n",
+    "obs_ids = [3,4,5,6,7]\n",
+    "\n",
+    "# Phenotype values for these observations\n",
+    "y_values = [4.5, 2.9, 3.9, 3.5, 5.0]\n",
+    "\n",
+    "\n",
+    "# Solve the mixed model\n",
+    "solution_vector = gblup.solve_mixed_gblup_model(n_obs, obs_ids, y_values, A)\n",
+    "\n",
+    "print(\"Solution vector:\", solution_vector)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T08:35:28.902084100Z",
+     "start_time": "2024-03-25T08:35:28.849465700Z"
+    }
+   },
+   "id": "b2cc4cc1ae75c9e2",
+   "execution_count": 30
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "True"
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.allclose(solution_vector, sol)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T08:35:34.146131Z",
+     "start_time": "2024-03-25T08:35:34.139378800Z"
+    }
+   },
+   "id": "a70e3dd6866c165d",
+   "execution_count": 31
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def generate_synthetic_data(n_individuals, n_observations):\n",
+    "    \"\"\"\n",
+    "    Generate synthetic pedigree, observation IDs, and phenotype values.\n",
+    "    \n",
+    "    Parameters:\n",
+    "    - n_individuals: Number of individuals in the pedigree.\n",
+    "    - n_observations: Number of observations (must be <= n_individuals).\n",
+    "    \n",
+    "    Returns:\n",
+    "    - ainv: Synthetic additive relationship matrix inverse for the pedigree.\n",
+    "    - obs_ids: IDs of individuals with observations.\n",
+    "    - y_values: Synthetic phenotype values for observations.\n",
+    "    \"\"\"\n",
+    "    # Generate a synthetic additive relationship matrix (A matrix) and invert it\n",
+    "    # Here, we'll use an identity matrix for simplicity; in practice, A would be based on pedigree\n",
+    "    A = np.eye(n_individuals)\n",
+    "    ainv = np.linalg.inv(A + np.random.normal(0, 0.05, A.shape))  # Adding noise for a more realistic scenario\n",
+    "    \n",
+    "    # Generate observation IDs (random sample of individuals)\n",
+    "    obs_ids = np.random.choice(range(0, n_individuals), n_observations, replace=False)\n",
+    "    \n",
+    "    # Generate synthetic phenotype values (normal distribution)\n",
+    "    y_values = np.random.normal(10, 2, n_observations)\n",
+    "    \n",
+    "    return ainv, obs_ids, y_values\n",
+    "\n",
+    "# Parameters for synthetic data generation\n",
+    "n_individuals = 6000  # Number of individuals in the pedigree\n",
+    "n_observations = 3000  # Number of observations\n",
+    "\n",
+    "# Generate synthetic data\n",
+    "ainv, obs_ids, y_values = generate_synthetic_data(n_individuals, n_observations)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T08:41:54.236514800Z",
+     "start_time": "2024-03-25T08:41:49.193187800Z"
+    }
+   },
+   "id": "640368833e3eb822",
+   "execution_count": 32
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "array([[  9.65855164],\n       [ 26.53936216],\n       [ -3.45564058],\n       ...,\n       [ -1.37455656],\n       [ 14.0913003 ],\n       [-18.79276052]])"
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "solution_vector = gblup.solve_mixed_gblup_model(n_observations, obs_ids, y_values, ainv)\n",
+    "solution_vector"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-25T08:42:03.490882200Z",
+     "start_time": "2024-03-25T08:41:55.848715200Z"
+    }
+   },
+   "id": "17d9a37deccf8a55",
+   "execution_count": 33
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/gblup.py b/gblup.py
new file mode 100644
index 0000000..0c67316
--- /dev/null
+++ b/gblup.py
@@ -0,0 +1,36 @@
+def solve_mixed_gblup_model(n_obs, obs_ids, y_values, a):
+    import numpy as np
+    ainv = np.linalg.inv(np.array(a))
+
+
+    # Fixed effects: Assuming only a mean effect for simplicity
+    X = np.ones((n_obs, 1))
+
+    # Random effects design matrix Z based on observation IDs
+    Z = np.zeros(
+        (n_obs, ainv.shape[0]))  # Z should have a row for each observation and a column for each individual in ainv
+    for i, obs_id in enumerate(obs_ids):
+        Z[i, obs_id] = 1  # Assuming obs_ids are 0-indexed and match the order in ainv
+
+    # Observations
+    y = np.array(y_values).reshape(-1, 1)
+
+    # Matrix operations
+    XX = X.T @ X
+    XZ = X.T @ Z
+    ZX = Z.T @ X
+    ZZ = Z.T @ Z
+    ZZA = ZZ + ainv * 2  # Adjust based on your specific model
+    Xy = X.T @ y
+    Zy = Z.T @ y
+
+    # Assemble mixed model equations (MME)
+    mme = np.vstack([
+        np.hstack([XX, XZ]),
+        np.hstack([ZX, ZZA])
+    ])
+
+    # Solve the MME for solution vector
+    sol = np.linalg.solve(mme, np.vstack([Xy, Zy]))
+
+    return sol
\ No newline at end of file
diff --git a/geonomic_kinship_test.ipynb b/geonomic_kinship_test.ipynb
deleted file mode 100644
index 7dc6cfb..0000000
--- a/geonomic_kinship_test.ipynb
+++ /dev/null
@@ -1,356 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T00:51:49.603854400Z",
-     "start_time": "2024-03-23T00:51:45.861543100Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 2763 entries, 112 to 2396\n",
-      "Columns: 9642 entries, Genotype to 10_149535062_G\n",
-      "dtypes: int64(9641), object(1)\n",
-      "memory usage: 203.3+ MB\n",
-      "None\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import time\n",
-    "import rrBLUP\n",
-    "df_geno = pd.read_csv('data/23TC1_YLD14_SU_geno_final.txt', sep='\\t')\n",
-    "df_geno = df_geno.sort_values(by=['Genotype'])\n",
-    "print(df_geno.info())\n",
-    "df_pheno = pd.read_csv(\"data/23TC1_YLD14_SU_pheno_final.txt\", sep='\\t')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 2763 entries, 1929 to 179\n",
-      "Data columns (total 2 columns):\n",
-      " #   Column    Non-Null Count  Dtype  \n",
-      "---  ------    --------------  -----  \n",
-      " 0   Genotype  2763 non-null   object \n",
-      " 1   YLD14_SU  2763 non-null   float64\n",
-      "dtypes: float64(1), object(1)\n",
-      "memory usage: 64.8+ KB\n",
-      "None\n",
-      "(2763, 9641)\n",
-      "(2763, 1)\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "df_pheno_ordered = df_pheno.sort_values(by=['Genotype'])\n",
-    "print(df_pheno_ordered.info())\n",
-    "train_x = np.array(df_geno.drop('Genotype', axis=1))\n",
-    "train_y = np.array(df_pheno_ordered['YLD14_SU']).reshape(-1, 1)\n",
-    "train_x = train_x - 1\n",
-    "print(train_x.shape)\n",
-    "print(train_y.shape)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T00:52:01.713868700Z",
-     "start_time": "2024-03-23T00:52:01.567439200Z"
-    }
-   },
-   "id": "4ea00660ee17a7da",
-   "execution_count": 2
-  },
-  {
-   "cell_type": "code",
-   "outputs": [],
-   "source": [
-    "a_1 = rrBLUP.A_mat(train_x)\n",
-    "a_1"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T00:53:45.474818Z",
-     "start_time": "2024-03-23T00:52:44.054145500Z"
-    }
-   },
-   "id": "1f778cf653c205a5",
-   "execution_count": 3
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "numpy.ndarray"
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "type(a_1)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:04:41.642221Z",
-     "start_time": "2024-03-23T01:04:41.624443300Z"
-    }
-   },
-   "id": "eb6e2174ae759be4",
-   "execution_count": 22
-  },
-  {
-   "cell_type": "code",
-   "outputs": [],
-   "source": [
-    "import aMat\n",
-    "a_2 = aMat.add_mat_numpy(train_x+1)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:10:23.791477300Z",
-     "start_time": "2024-03-23T01:10:22.853119Z"
-    }
-   },
-   "id": "22084ea87f9364b1",
-   "execution_count": 27
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "numpy.ndarray"
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import a_mat_jax\n",
-    "import jax\n",
-    "a_3 = a_mat_jax.add_mat_jax(train_x+1)\n",
-    "a_3 = np.asarray(a_3)\n",
-    "type(a_3)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:16:10.802205200Z",
-     "start_time": "2024-03-23T01:16:10.233960Z"
-    }
-   },
-   "id": "1ed66c383a884e0e",
-   "execution_count": 38
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Arrays are close: True\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Using allclose to compare\n",
-    "close = np.allclose(a_1, a_2, rtol=0.0001, atol=0.0001)\n",
-    "\n",
-    "print(f\"Arrays are close: {close}\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:10:45.248073700Z",
-     "start_time": "2024-03-23T01:10:45.146571300Z"
-    }
-   },
-   "id": "bdbcc53bb039085a",
-   "execution_count": 30
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running Time: 17.74 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "start_time = time.time()\n",
-    "g_result = rrBLUP.mixed_solve(y = train_y, K = a_1)\n",
-    "end_time = time.time()\n",
-    "#print(g_result)\n",
-    "print('Running Time: '+str(round(end_time-start_time, 2))+' s')\n",
-    "train_pred = g_result['u'] "
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:01:35.992522700Z",
-     "start_time": "2024-03-23T01:01:18.250576300Z"
-    }
-   },
-   "id": "d32dd35631566b26",
-   "execution_count": 15
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "array([[-17.59836984],\n       [-20.95136377],\n       [ -0.13633499],\n       ...,\n       [ -4.65200707],\n       [-20.01554517],\n       [-20.1281834 ]])"
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "train_pred"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:01:45.967475Z",
-     "start_time": "2024-03-23T01:01:45.954682900Z"
-    }
-   },
-   "id": "3d0c303c9230d183",
-   "execution_count": 16
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "K not positive semi-definite.\n",
-      "Running Time: 8.41 s\n"
-     ]
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "'NoneType' object is not subscriptable",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mTypeError\u001B[0m                                 Traceback (most recent call last)",
-      "Cell \u001B[1;32mIn[39], line 6\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[38;5;66;03m#print(g_result)\u001B[39;00m\n\u001B[0;32m      5\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mRunning Time: \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m+\u001B[39m\u001B[38;5;28mstr\u001B[39m(\u001B[38;5;28mround\u001B[39m(end_time\u001B[38;5;241m-\u001B[39mstart_time, \u001B[38;5;241m2\u001B[39m))\u001B[38;5;241m+\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m s\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[1;32m----> 6\u001B[0m train_pred_new \u001B[38;5;241m=\u001B[39m \u001B[43mg_result_new\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mu\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m \n\u001B[0;32m      7\u001B[0m train_pred_new\n",
-      "\u001B[1;31mTypeError\u001B[0m: 'NoneType' object is not subscriptable"
-     ]
-    }
-   ],
-   "source": [
-    "start_time = time.time()\n",
-    "g_result_new = rrBLUP.mixed_solve(y = train_y, K = a_3)\n",
-    "end_time = time.time()\n",
-    "#print(g_result)\n",
-    "print('Running Time: '+str(round(end_time-start_time, 2))+' s')\n",
-    "train_pred_new = g_result_new['u'] \n",
-    "train_pred_new"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:16:26.084069700Z",
-     "start_time": "2024-03-23T01:16:17.660955900Z"
-    }
-   },
-   "id": "557f3d43c301ad45",
-   "execution_count": 39
-  },
-  {
-   "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'numpy.ndarray'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "import jax.numpy as jnp\n",
-    "import numpy as np\n",
-    "\n",
-    "# Create a JAX array\n",
-    "jax_array = jnp.array([1, 2, 3])\n",
-    "\n",
-    "# Convert to a NumPy array\n",
-    "numpy_array = np.asarray(jax_array)\n",
-    "\n",
-    "print(type(numpy_array))  # <class 'numpy.ndarray'>\n"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-03-23T01:15:38.449024800Z",
-     "start_time": "2024-03-23T01:15:38.443083500Z"
-    }
-   },
-   "id": "2fdea088f03fd77",
-   "execution_count": 37
-  },
-  {
-   "cell_type": "code",
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "79a0fb4f44e8b0df"
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 1a48a4acc4c115212e90bf9853a75629d025a9e8 Mon Sep 17 00:00:00 2001
From: zhanglei <min.oddo@gmail.com>
Date: Tue, 26 Mar 2024 16:21:04 +0800
Subject: [PATCH 5/6] gblup and arseml test using gblup

---
 arseml_geonomic_kinship_test.ipynb | 230 +++++----------
 asreml_gblup.ipynb                 | 455 +++++++++++++++++++++++++++++
 blup_animal.ipynb                  |  28 +-
 gblup.py                           | 106 +++++++
 4 files changed, 664 insertions(+), 155 deletions(-)
 create mode 100644 asreml_gblup.ipynb

diff --git a/arseml_geonomic_kinship_test.ipynb b/arseml_geonomic_kinship_test.ipynb
index b62700a..cde0c6d 100644
--- a/arseml_geonomic_kinship_test.ipynb
+++ b/arseml_geonomic_kinship_test.ipynb
@@ -2,13 +2,13 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
    "id": "initial_id",
    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
-     "end_time": "2024-03-25T00:43:12.613827100Z",
-     "start_time": "2024-03-25T00:43:00.090048600Z"
+     "end_time": "2024-03-26T01:25:28.301180100Z",
+     "start_time": "2024-03-26T01:25:25.491502Z"
     }
    },
    "outputs": [
@@ -77,12 +77,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T00:43:14.833783200Z",
-     "start_time": "2024-03-25T00:43:14.642438500Z"
+     "end_time": "2024-03-26T01:25:28.446814900Z",
+     "start_time": "2024-03-26T01:25:28.298414Z"
     }
    },
    "id": "4ea00660ee17a7da",
-   "execution_count": 2
+   "execution_count": 6
   },
   {
    "cell_type": "code",
@@ -91,7 +91,7 @@
      "data": {
       "text/plain": "array([[ 0.96460593,  0.40092433,  0.25768902, ..., -0.30201703,\n        -0.23842051, -0.23847136],\n       [ 0.40092433,  0.81633067,  0.42894227, ..., -0.22019376,\n        -0.27356791, -0.27299987],\n       [ 0.25768902,  0.42894227,  0.75699359, ..., -0.24150724,\n        -0.2534156 , -0.25408534],\n       ...,\n       [-0.30201703, -0.22019376, -0.24150724, ...,  0.70114823,\n         0.31388157,  0.31383073],\n       [-0.23842051, -0.27356791, -0.2534156 , ...,  0.31388157,\n         0.89487222,  0.89420248],\n       [-0.23847136, -0.27299987, -0.25408534, ...,  0.31383073,\n         0.89420248,  0.89569887]])"
      },
-     "execution_count": 3,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -103,12 +103,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T00:43:53.731595900Z",
-     "start_time": "2024-03-25T00:43:16.866956900Z"
+     "end_time": "2024-03-26T01:26:14.165137100Z",
+     "start_time": "2024-03-26T01:25:28.446814900Z"
     }
    },
    "id": "1f778cf653c205a5",
-   "execution_count": 3
+   "execution_count": 7
   },
   {
    "cell_type": "code",
@@ -117,7 +117,7 @@
      "data": {
       "text/plain": "numpy.ndarray"
      },
-     "execution_count": 52,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -128,12 +128,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-23T01:35:43.596344300Z",
-     "start_time": "2024-03-23T01:35:43.592192800Z"
+     "end_time": "2024-03-26T01:26:14.178057500Z",
+     "start_time": "2024-03-26T01:26:14.165137100Z"
     }
    },
    "id": "eb6e2174ae759be4",
-   "execution_count": 52
+   "execution_count": 8
   },
   {
    "cell_type": "code",
@@ -145,12 +145,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T00:44:38.861429500Z",
-     "start_time": "2024-03-25T00:44:37.795651Z"
+     "end_time": "2024-03-26T01:26:15.714776700Z",
+     "start_time": "2024-03-26T01:26:14.178057500Z"
     }
    },
    "id": "22084ea87f9364b1",
-   "execution_count": 5
+   "execution_count": 9
   },
   {
    "cell_type": "code",
@@ -159,7 +159,7 @@
      "data": {
       "text/plain": "numpy.ndarray"
      },
-     "execution_count": 53,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -174,12 +174,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-23T01:35:48.457885400Z",
-     "start_time": "2024-03-23T01:35:47.864412600Z"
+     "end_time": "2024-03-26T01:26:23.285849200Z",
+     "start_time": "2024-03-26T01:26:15.721070900Z"
     }
    },
    "id": "1ed66c383a884e0e",
-   "execution_count": 53
+   "execution_count": 10
   },
   {
    "cell_type": "code",
@@ -201,12 +201,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-23T01:35:50.365137700Z",
-     "start_time": "2024-03-23T01:35:50.287741600Z"
+     "end_time": "2024-03-26T01:26:23.456729100Z",
+     "start_time": "2024-03-26T01:26:23.285849200Z"
     }
    },
    "id": "bdbcc53bb039085a",
-   "execution_count": 54
+   "execution_count": 11
   },
   {
    "cell_type": "code",
@@ -215,18 +215,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Running Time: 20.03 s\n"
-     ]
-    },
-    {
-     "ename": "AttributeError",
-     "evalue": "'dict' object has no attribute 'rx2'",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
-      "Cell \u001B[1;32mIn[4], line 7\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mRunning Time: \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m+\u001B[39m\u001B[38;5;28mstr\u001B[39m(\u001B[38;5;28mround\u001B[39m(end_time\u001B[38;5;241m-\u001B[39mstart_time, \u001B[38;5;241m2\u001B[39m))\u001B[38;5;241m+\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m s\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m      6\u001B[0m \u001B[38;5;66;03m#train_pred = g_result['u'] \u001B[39;00m\n\u001B[1;32m----> 7\u001B[0m train_pred \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marray(\u001B[43mg_result\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrx2\u001B[49m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mu\u001B[39m\u001B[38;5;124m'\u001B[39m))\n",
-      "\u001B[1;31mAttributeError\u001B[0m: 'dict' object has no attribute 'rx2'"
+      "Running Time: 232.65 s\n"
      ]
     }
    ],
@@ -242,12 +231,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T00:44:26.308267100Z",
-     "start_time": "2024-03-25T00:44:05.897407300Z"
+     "end_time": "2024-03-26T01:30:16.134754800Z",
+     "start_time": "2024-03-26T01:26:23.456729100Z"
     }
    },
    "id": "d32dd35631566b26",
-   "execution_count": 4
+   "execution_count": 12
   },
   {
    "cell_type": "code",
@@ -256,7 +245,7 @@
      "data": {
       "text/plain": "array([[689.5701105],\n       [615.9386355],\n       [661.3197776],\n       ...,\n       [692.8649345],\n       [709.3628695],\n       [684.4678623]])"
      },
-     "execution_count": 39,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -267,12 +256,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:34:24.962192500Z",
-     "start_time": "2024-03-25T06:34:24.951436400Z"
+     "end_time": "2024-03-26T01:30:16.135764Z",
+     "start_time": "2024-03-26T01:30:16.125907400Z"
     }
    },
    "id": "3d0c303c9230d183",
-   "execution_count": 39
+   "execution_count": 13
   },
   {
    "cell_type": "code",
@@ -281,14 +270,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'Vu': 462.6130366195777, 'Ve': 969.4607183018168, 'beta': array([[703.72274814]]), 'u': array([[-17.58200532],\n",
-      "       [-21.2687656 ],\n",
-      "       [ -0.33706766],\n",
+      "{'Vu': 462.61305045468987, 'Ve': 974.0868459500658, 'beta': array([[703.72274814]]), 'u': array([[-17.59836984],\n",
+      "       [-20.95136378],\n",
+      "       [ -0.13633498],\n",
       "       ...,\n",
-      "       [ -4.68147966],\n",
-      "       [-19.89370123],\n",
-      "       [-20.12403582]]), 'LL': -13572.19170915865}\n",
-      "Running Time: 17.25 s\n"
+      "       [ -4.65200707],\n",
+      "       [-20.01554518],\n",
+      "       [-20.12818341]]), 'LL': -13572.19170879463}\n",
+      "Running Time: 37.24 s\n"
      ]
     }
    ],
@@ -304,12 +293,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:52:04.948507200Z",
-     "start_time": "2024-03-25T06:51:47.691501200Z"
+     "end_time": "2024-03-26T01:30:53.386672Z",
+     "start_time": "2024-03-26T01:30:16.135764Z"
     }
    },
    "id": "557f3d43c301ad45",
-   "execution_count": 64
+   "execution_count": 14
   },
   {
    "cell_type": "code",
@@ -318,7 +307,7 @@
      "data": {
       "text/plain": "array([[-17.59836984],\n       [-20.95136378],\n       [ -0.13633498],\n       ...,\n       [ -4.65200707],\n       [-20.01554518],\n       [-20.12818341]])"
      },
-     "execution_count": 7,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -329,21 +318,25 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T00:45:22.463225400Z",
-     "start_time": "2024-03-25T00:45:22.454325500Z"
+     "end_time": "2024-03-26T01:30:53.404147700Z",
+     "start_time": "2024-03-26T01:30:53.386672Z"
     }
    },
    "id": "2fdea088f03fd77",
-   "execution_count": 7
+   "execution_count": 15
   },
   {
    "cell_type": "code",
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Correlation Coefficient (相关系数): 0.9999505295951803\n"
+     "ename": "TypeError",
+     "evalue": "solve_mixed_gblup_model() got an unexpected keyword argument 'ainv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[1;31mTypeError\u001B[0m                                 Traceback (most recent call last)",
+      "Cell \u001B[1;32mIn[16], line 2\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mgblup\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m g_result \u001B[38;5;241m=\u001B[39m \u001B[43mgblup\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43msolve_mixed_gblup_model\u001B[49m\u001B[43m(\u001B[49m\u001B[43mn_obs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mlen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtrain_y\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mobs_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m[\u001B[49m\u001B[43mi\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mi\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mrange\u001B[39;49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mlen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtrain_y\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my_values\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtrain_y\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mainv\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mlinalg\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minv\u001B[49m\u001B[43m(\u001B[49m\u001B[43ma_2\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m      3\u001B[0m gblue_values \u001B[38;5;241m=\u001B[39m g_result[\u001B[38;5;241m1\u001B[39m:,]\n\u001B[0;32m      4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mscipy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mstats\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m pearsonr\n",
+      "\u001B[1;31mTypeError\u001B[0m: solve_mixed_gblup_model() got an unexpected keyword argument 'ainv'"
      ]
     }
    ],
@@ -363,61 +356,32 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T08:34:01.640160900Z",
-     "start_time": "2024-03-25T08:34:00.704205500Z"
+     "end_time": "2024-03-26T01:31:01.081182100Z",
+     "start_time": "2024-03-26T01:30:53.400772100Z"
     }
    },
    "id": "173e3d457a409f8e",
-   "execution_count": 76
+   "execution_count": 16
   },
   {
    "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Online License checked out Mon Mar 25 13:40:58 2024\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "asreml = importr('asreml')"
    ],
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T05:40:58.749306200Z",
-     "start_time": "2024-03-25T05:40:56.371246400Z"
+     "end_time": "2024-03-26T01:31:01.091405100Z",
+     "start_time": "2024-03-26T01:31:01.086189800Z"
     }
    },
    "id": "79a0fb4f44e8b0df",
-   "execution_count": 8
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "R[write to console]: Reciprocal conditional number for original matrix is: 1.59179018378963e-06\n",
-      "\n",
-      "R[write to console]: Reciprocal conditional number for inverted matrix is: 1.24079893155397e-06\n",
-      "\n",
-      "R[write to console]: Inverse of matrix G does not appear to be ill-conditioned.\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": "array([[ 1.00000000e+00,  1.00000000e+00,  1.04168312e+01],\n       [ 2.00000000e+00,  1.00000000e+00,  5.02454510e-01],\n       [ 2.00000000e+00,  2.00000000e+00,  2.62023822e+01],\n       ...,\n       [ 2.76300000e+03,  2.76100000e+03, -2.16311000e-03],\n       [ 2.76300000e+03,  2.76200000e+03, -2.17669057e+01],\n       [ 2.76300000e+03,  2.76300000e+03,  2.63256497e+01]])"
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from rpy2.robjects import r, pandas2ri\n",
     "# Transfer G_matrix to R\n",
@@ -434,12 +398,11 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:48:06.249000300Z",
-     "start_time": "2024-03-25T06:47:55.518842300Z"
+     "start_time": "2024-03-26T01:31:01.089831300Z"
     }
    },
    "id": "b3e7ef8025b116",
-   "execution_count": 55
+   "execution_count": null
   },
   {
    "cell_type": "code",
@@ -451,12 +414,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:17:55.988217Z",
-     "start_time": "2024-03-25T06:17:55.974047700Z"
+     "end_time": "2024-03-26T01:31:01.091405100Z",
+     "start_time": "2024-03-26T01:31:01.091405100Z"
     }
    },
    "id": "acabf46be808b8ab",
-   "execution_count": 18
+   "execution_count": null
   },
   {
    "cell_type": "code",
@@ -475,27 +438,15 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:49:06.121751300Z",
-     "start_time": "2024-03-25T06:48:43.097036700Z"
+     "start_time": "2024-03-26T01:31:01.091405100Z"
     }
    },
    "id": "5e0bb540a4a4c72c",
-   "execution_count": 56
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                              component std.error  z.ratio bound %ch\n",
-      "vm(Genotype, G_matrix_inv) 5.972789e-06        NA       NA     B  NA\n",
-      "units!units                1.170023e+03  31.48448 37.16189     P   0\n",
-      "units!R                    1.000000e+00        NA       NA     F   0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "summ = asreml.summary_asreml(modg)\n",
     "print(summ.rx2('varcomp'))"
@@ -503,12 +454,11 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:20:01.616208300Z",
-     "start_time": "2024-03-25T06:20:01.609791200Z"
+     "start_time": "2024-03-26T01:31:01.091405100Z"
     }
    },
    "id": "305ddc385d71eed4",
-   "execution_count": 21
+   "execution_count": null
   },
   {
    "cell_type": "code",
@@ -519,25 +469,15 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:50:25.449294Z",
-     "start_time": "2024-03-25T06:50:25.444424600Z"
+     "start_time": "2024-03-26T01:31:01.101914600Z"
     }
    },
    "id": "5b5a60ff985757ff",
-   "execution_count": 58
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "array([[-17.5657198 ],\n       [-21.58628211],\n       [ -0.53769809],\n       ...,\n       [ -4.71096706],\n       [-19.77202265],\n       [-20.12006455]])"
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "close = np.allclose(rrblup_u, asreml_u, rtol=0.00001, atol=0.00001)\n",
     "\n"
@@ -545,24 +485,15 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:51:13.081435600Z",
-     "start_time": "2024-03-25T06:51:13.077258400Z"
+     "start_time": "2024-03-26T01:31:01.101914600Z"
     }
    },
    "id": "e2d6659412f49f53",
-   "execution_count": 63
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Correlation Coefficient (相关系数): 0.9999217812834612\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from scipy.stats import pearsonr\n",
     "\n",
@@ -576,12 +507,11 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T06:55:01.965913100Z",
-     "start_time": "2024-03-25T06:55:01.955398600Z"
+     "start_time": "2024-03-26T01:31:01.101914600Z"
     }
    },
    "id": "d0553370feb1b95f",
-   "execution_count": 70
+   "execution_count": null
   }
  ],
  "metadata": {
diff --git a/asreml_gblup.ipynb b/asreml_gblup.ipynb
new file mode 100644
index 0000000..065261e
--- /dev/null
+++ b/asreml_gblup.ipynb
@@ -0,0 +1,455 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:29.984856Z",
+     "start_time": "2024-03-26T06:41:28.523504300Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyreadr\n",
+    "import rpy2.robjects as ro\n",
+    "from rpy2.robjects import pandas2ri\n",
+    "from rpy2.robjects.packages import importr\n",
+    "\n",
+    "# Enable automatic conversion between pandas dataframes and R data frames\n",
+    "pandas2ri.activate()\n",
+    "\n",
+    "# Import the R packages needed\n",
+    "base = importr('base')\n",
+    "asreml = importr('asreml')\n",
+    "\n",
+    "# Load your data from .rds files using pyreadr\n",
+    "ped_result = pyreadr.read_r('data/pedigree_19_23P_1031.rds')\n",
+    "blue_result = pyreadr.read_r('data/blue_yield.rds')\n",
+    "\n",
+    "# Extracting the data frames from the results\n",
+    "ped_df = ped_result[None]  # Assuming it's the first and only item in the result\n",
+    "blue_df = blue_result[None]  # Assuming it's the first and only item in the result\n",
+    "\n",
+    "# Convert pandas dataframes to R data frames and put them into the R environment\n",
+    "ro.globalenv['ped'] = pandas2ri.py2rpy(ped_df)\n",
+    "ro.globalenv['blue'] = pandas2ri.py2rpy(blue_df)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "             Name  predicted.value  std.error     status Block       DAM  \\\n0     23TC2XM3066       817.157834  55.459393  Estimable     2  ZMN00545   \n1     23TC2XM4004       736.076043  55.698624  Estimable     2    20W041   \n2        D178070A       369.039247  46.982307  Estimable     6  ZMN00824   \n3        D178070A       358.845544  39.343070  Estimable     3  ZMN00824   \n4        D178070A       525.068916  54.857062  Estimable     2  ZMN00824   \n...           ...              ...        ...        ...   ...       ...   \n2779        ZD958       268.765362  15.973387  Estimable     3  ZMN00113   \n2780        ZD958       510.696306  21.263064  Estimable     1  ZMN00113   \n2781        ZD958       314.059170  13.043818  Estimable     4  ZMN00113   \n2782        ZD958       333.884811  20.252772  Estimable     7  ZMN00113   \n2783        ZD958       300.026914  21.196859  Estimable     5  ZMN00113   \n\n             SIRE                 Fam  \n0        ZMN00392   ZMN00392_ZMN00545  \n1     20W065-43-2  20W065-43-2_20W041  \n2        ZMN00547   ZMN00547_ZMN00824  \n3        ZMN00547   ZMN00547_ZMN00824  \n4        ZMN00547   ZMN00547_ZMN00824  \n...           ...                 ...  \n2779     ZMN00337   ZMN00337_ZMN00113  \n2780     ZMN00337   ZMN00337_ZMN00113  \n2781     ZMN00337   ZMN00337_ZMN00113  \n2782     ZMN00337   ZMN00337_ZMN00113  \n2783     ZMN00337   ZMN00337_ZMN00113  \n\n[2784 rows x 8 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Name</th>\n      <th>predicted.value</th>\n      <th>std.error</th>\n      <th>status</th>\n      <th>Block</th>\n      <th>DAM</th>\n      <th>SIRE</th>\n      <th>Fam</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>23TC2XM3066</td>\n      <td>817.157834</td>\n      <td>55.459393</td>\n      <td>Estimable</td>\n      <td>2</td>\n      <td>ZMN00545</td>\n      <td>ZMN00392</td>\n      <td>ZMN00392_ZMN00545</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>23TC2XM4004</td>\n      <td>736.076043</td>\n      <td>55.698624</td>\n      <td>Estimable</td>\n      <td>2</td>\n      <td>20W041</td>\n      <td>20W065-43-2</td>\n      <td>20W065-43-2_20W041</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>D178070A</td>\n      <td>369.039247</td>\n      <td>46.982307</td>\n      <td>Estimable</td>\n      <td>6</td>\n      <td>ZMN00824</td>\n      <td>ZMN00547</td>\n      <td>ZMN00547_ZMN00824</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>D178070A</td>\n      <td>358.845544</td>\n      <td>39.343070</td>\n      <td>Estimable</td>\n      <td>3</td>\n      <td>ZMN00824</td>\n      <td>ZMN00547</td>\n      <td>ZMN00547_ZMN00824</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>D178070A</td>\n      <td>525.068916</td>\n      <td>54.857062</td>\n      <td>Estimable</td>\n      <td>2</td>\n      <td>ZMN00824</td>\n      <td>ZMN00547</td>\n      <td>ZMN00547_ZMN00824</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>2779</th>\n      <td>ZD958</td>\n      <td>268.765362</td>\n      <td>15.973387</td>\n      <td>Estimable</td>\n      <td>3</td>\n      <td>ZMN00113</td>\n      <td>ZMN00337</td>\n      <td>ZMN00337_ZMN00113</td>\n    </tr>\n    <tr>\n      <th>2780</th>\n      <td>ZD958</td>\n      <td>510.696306</td>\n      <td>21.263064</td>\n      <td>Estimable</td>\n      <td>1</td>\n      <td>ZMN00113</td>\n      <td>ZMN00337</td>\n      <td>ZMN00337_ZMN00113</td>\n    </tr>\n    <tr>\n      <th>2781</th>\n      <td>ZD958</td>\n      <td>314.059170</td>\n      <td>13.043818</td>\n      <td>Estimable</td>\n      <td>4</td>\n      <td>ZMN00113</td>\n      <td>ZMN00337</td>\n      <td>ZMN00337_ZMN00113</td>\n    </tr>\n    <tr>\n      <th>2782</th>\n      <td>ZD958</td>\n      <td>333.884811</td>\n      <td>20.252772</td>\n      <td>Estimable</td>\n      <td>7</td>\n      <td>ZMN00113</td>\n      <td>ZMN00337</td>\n      <td>ZMN00337_ZMN00113</td>\n    </tr>\n    <tr>\n      <th>2783</th>\n      <td>ZD958</td>\n      <td>300.026914</td>\n      <td>21.196859</td>\n      <td>Estimable</td>\n      <td>5</td>\n      <td>ZMN00113</td>\n      <td>ZMN00337</td>\n      <td>ZMN00337_ZMN00113</td>\n    </tr>\n  </tbody>\n</table>\n<p>2784 rows × 8 columns</p>\n</div>"
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "blue_df"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:29.996886300Z",
+     "start_time": "2024-03-26T06:41:29.985856400Z"
+    }
+   },
+   "id": "fe6c64dc27d80b60",
+   "execution_count": 30
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "              Name   FGenoID   MGenoID\nrownames                              \n2            DMY1F         0         0\n3           DH605F         0         0\n4            PHHJC         0         0\n5            PH4CV         0         0\n6         ZMN01635         0         0\n...            ...       ...       ...\n2537      D2284159  ZMN01648  ZMN00735\n2559      D2284296  ZMN01705  ZMN00735\n2680      D2284308  ZMN01707  ZMN00735\n27721     D2284109  ZMN01638  ZMN00735\n35432     D2284180  ZMN01655  ZMN00735\n\n[10310 rows x 3 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Name</th>\n      <th>FGenoID</th>\n      <th>MGenoID</th>\n    </tr>\n    <tr>\n      <th>rownames</th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2</th>\n      <td>DMY1F</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>DH605F</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>PHHJC</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>PH4CV</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>ZMN01635</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>2537</th>\n      <td>D2284159</td>\n      <td>ZMN01648</td>\n      <td>ZMN00735</td>\n    </tr>\n    <tr>\n      <th>2559</th>\n      <td>D2284296</td>\n      <td>ZMN01705</td>\n      <td>ZMN00735</td>\n    </tr>\n    <tr>\n      <th>2680</th>\n      <td>D2284308</td>\n      <td>ZMN01707</td>\n      <td>ZMN00735</td>\n    </tr>\n    <tr>\n      <th>27721</th>\n      <td>D2284109</td>\n      <td>ZMN01638</td>\n      <td>ZMN00735</td>\n    </tr>\n    <tr>\n      <th>35432</th>\n      <td>D2284180</td>\n      <td>ZMN01655</td>\n      <td>ZMN00735</td>\n    </tr>\n  </tbody>\n</table>\n<p>10310 rows × 3 columns</p>\n</div>"
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ped_df"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:30.007435900Z",
+     "start_time": "2024-03-26T06:41:29.995886600Z"
+    }
+   },
+   "id": "3300b0df5fd114d0",
+   "execution_count": 31
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pedigree  order (P2): Individual \"WG603\" moved to record 1\n",
+      "Pedigree  order (P1): Individual \"PH4CV\" moved to record 1\n",
+      "Pedigree  order (P2): Individual \"WG5603\" moved to record 2\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Convert factors as in your R code\n",
+    "ro.r('''\n",
+    "ainv <- asreml::ainverse(ped)\n",
+    "blue$Name <- as.factor(blue$Name)\n",
+    "blue <- merge(blue, ped, by = \"Name\")\n",
+    "asr2 <- asreml::asreml(predicted.value ~ 1,\n",
+    "                       random = ~ vm(Name, ainv),\n",
+    "                       residual = ~ idv(units),\n",
+    "                       data = blue)\n",
+    "''')\n",
+    "\n",
+    "# To extract results from the asr2 model in R to Python\n",
+    "varcomp = ro.r('summary(asr2)$varcomp')\n",
+    "g = ro.r('coef(asr2)$random')\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:30.341690500Z",
+     "start_time": "2024-03-26T06:41:30.005436100Z"
+    }
+   },
+   "id": "47f01cf2234a3eb8",
+   "execution_count": 32
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "                             Name      Value\nvm(Name, ainv)_PH4CV        PH4CV   0.000000\nvm(Name, ainv)_WG5603      WG5603   0.000000\nvm(Name, ainv)_WG603        WG603   0.000000\nvm(Name, ainv)_DMY1F        DMY1F   0.000000\nvm(Name, ainv)_DH605F      DH605F -23.800054\n...                           ...        ...\nvm(Name, ainv)_D2284159  D2284159  16.686925\nvm(Name, ainv)_D2284296  D2284296  14.130782\nvm(Name, ainv)_D2284308  D2284308  16.686925\nvm(Name, ainv)_D2284109  D2284109  16.201840\nvm(Name, ainv)_D2284180  D2284180  16.686925\n\n[10310 rows x 2 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Name</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>vm(Name, ainv)_PH4CV</th>\n      <td>PH4CV</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_WG5603</th>\n      <td>WG5603</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_WG603</th>\n      <td>WG603</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_DMY1F</th>\n      <td>DMY1F</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_DH605F</th>\n      <td>DH605F</td>\n      <td>-23.800054</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284159</th>\n      <td>D2284159</td>\n      <td>16.686925</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284296</th>\n      <td>D2284296</td>\n      <td>14.130782</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284308</th>\n      <td>D2284308</td>\n      <td>16.686925</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284109</th>\n      <td>D2284109</td>\n      <td>16.201840</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284180</th>\n      <td>D2284180</td>\n      <td>16.686925</td>\n    </tr>\n  </tbody>\n</table>\n<p>10310 rows × 2 columns</p>\n</div>"
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Converting R objects back to pandas DataFrames or numpy arrays if needed\n",
+    "import pandas as pd\n",
+    "# Extract row names from the R object\n",
+    "g_row_names = ro.r('rownames(coef(asr2)$random)')\n",
+    "\n",
+    "\n",
+    "g_df = pd.DataFrame(g)\n",
+    "\n",
+    "# Convert the row names to a Python list\n",
+    "g_row_names_py = list(g_row_names)\n",
+    "\n",
+    "# Assign the row names to the pandas DataFrame index\n",
+    "g_df.index = g_row_names_py\n",
+    "g_df['Name'] = [name.split('_')[1] for name in g_df.index]\n",
+    "\n",
+    "g_df.columns = ['Value', 'Name']\n",
+    "g_df = g_df[['Name', 'Value']]\n",
+    "g_df"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:30.398371500Z",
+     "start_time": "2024-03-26T06:41:30.340685100Z"
+    }
+   },
+   "id": "386282876cd5ef83",
+   "execution_count": 33
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pedigree  order (P2): Individual \"WG603\" moved to record 1\n",
+      "Pedigree  order (P1): Individual \"PH4CV\" moved to record 1\n",
+      "Pedigree  order (P2): Individual \"WG5603\" moved to record 2\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "                             Name      Value\nvm(Name, ainv)_PH4CV        PH4CV   0.000000\nvm(Name, ainv)_WG5603      WG5603   0.000000\nvm(Name, ainv)_WG603        WG603   0.000000\nvm(Name, ainv)_DMY1F        DMY1F   0.000000\nvm(Name, ainv)_DH605F      DH605F -23.800054\n...                           ...        ...\nvm(Name, ainv)_D2284159  D2284159  16.686925\nvm(Name, ainv)_D2284296  D2284296  14.130782\nvm(Name, ainv)_D2284308  D2284308  16.686925\nvm(Name, ainv)_D2284109  D2284109  16.201840\nvm(Name, ainv)_D2284180  D2284180  16.686925\n\n[10310 rows x 2 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Name</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>vm(Name, ainv)_PH4CV</th>\n      <td>PH4CV</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_WG5603</th>\n      <td>WG5603</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_WG603</th>\n      <td>WG603</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_DMY1F</th>\n      <td>DMY1F</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_DH605F</th>\n      <td>DH605F</td>\n      <td>-23.800054</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284159</th>\n      <td>D2284159</td>\n      <td>16.686925</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284296</th>\n      <td>D2284296</td>\n      <td>14.130782</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284308</th>\n      <td>D2284308</td>\n      <td>16.686925</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284109</th>\n      <td>D2284109</td>\n      <td>16.201840</td>\n    </tr>\n    <tr>\n      <th>vm(Name, ainv)_D2284180</th>\n      <td>D2284180</td>\n      <td>16.686925</td>\n    </tr>\n  </tbody>\n</table>\n<p>10310 rows × 2 columns</p>\n</div>"
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import gblup\n",
+    "gblup.run_asreml_ped_gblup(ped_df,blue_df)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:30.876737300Z",
+     "start_time": "2024-03-26T06:41:30.398371500Z"
+    }
+   },
+   "id": "d30dc3ae150a2065",
+   "execution_count": 34
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "          marker_1  marker_2  marker_3  marker_4  marker_5  marker_6  \\\nIID_1            1         2         0         0         1         2   \nIID_2            2         0         0         2         0         2   \nIID_3            2         2         1         2         0         0   \nIID_4            0         1         0         0         0         1   \nIID_5            0         2         1         2         1         2   \n...            ...       ...       ...       ...       ...       ...   \nIID_5996         1         1         1         1         2         1   \nIID_5997         1         1         2         2         0         1   \nIID_5998         2         2         1         0         2         1   \nIID_5999         2         2         1         2         0         0   \nIID_6000         2         2         1         0         2         2   \n\n          marker_7  marker_8  marker_9  marker_10  ...  marker_8357  \\\nIID_1            1         2         0          1  ...            0   \nIID_2            1         1         2          1  ...            2   \nIID_3            1         2         1          0  ...            0   \nIID_4            0         1         0          1  ...            2   \nIID_5            0         2         0          2  ...            1   \n...            ...       ...       ...        ...  ...          ...   \nIID_5996         0         1         0          2  ...            2   \nIID_5997         1         1         2          0  ...            0   \nIID_5998         1         1         1          1  ...            2   \nIID_5999         1         0         0          0  ...            0   \nIID_6000         0         1         1          0  ...            2   \n\n          marker_8358  marker_8359  marker_8360  marker_8361  marker_8362  \\\nIID_1               2            0            0            0            1   \nIID_2               2            2            0            2            1   \nIID_3               2            1            1            0            0   \nIID_4               2            0            0            2            1   \nIID_5               0            2            1            1            2   \n...               ...          ...          ...          ...          ...   \nIID_5996            1            1            0            1            2   \nIID_5997            1            2            0            1            0   \nIID_5998            0            1            2            0            2   \nIID_5999            2            1            0            1            0   \nIID_6000            1            0            1            0            1   \n\n          marker_8363  marker_8364  marker_8365  marker_8366  \nIID_1               0            2            1            2  \nIID_2               2            0            2            1  \nIID_3               1            2            0            1  \nIID_4               1            1            1            0  \nIID_5               1            2            1            0  \n...               ...          ...          ...          ...  \nIID_5996            0            2            1            1  \nIID_5997            2            1            1            2  \nIID_5998            0            2            1            1  \nIID_5999            1            1            2            2  \nIID_6000            2            2            2            0  \n\n[6000 rows x 8366 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>marker_1</th>\n      <th>marker_2</th>\n      <th>marker_3</th>\n      <th>marker_4</th>\n      <th>marker_5</th>\n      <th>marker_6</th>\n      <th>marker_7</th>\n      <th>marker_8</th>\n      <th>marker_9</th>\n      <th>marker_10</th>\n      <th>...</th>\n      <th>marker_8357</th>\n      <th>marker_8358</th>\n      <th>marker_8359</th>\n      <th>marker_8360</th>\n      <th>marker_8361</th>\n      <th>marker_8362</th>\n      <th>marker_8363</th>\n      <th>marker_8364</th>\n      <th>marker_8365</th>\n      <th>marker_8366</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>IID_1</th>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>...</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>IID_2</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>...</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>IID_3</th>\n      <td>2</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>IID_4</th>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>...</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>IID_5</th>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>...</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>IID_5996</th>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>...</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>IID_5997</th>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>IID_5998</th>\n      <td>2</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>...</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>IID_5999</th>\n      <td>2</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>IID_6000</th>\n      <td>2</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>6000 rows × 8366 columns</p>\n</div>"
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import  numpy as np\n",
+    "G_matrix = np.random.randint(0, 3, size=(6000, 8366))\n",
+    "\n",
+    "# Convert the numpy matrix to a pandas DataFrame for easier manipulation and visualization\n",
+    "G_matrix = pd.DataFrame(G_matrix)\n",
+    "\n",
+    "# Optionally, name the columns and index\n",
+    "G_matrix.columns = [f\"marker_{i+1}\" for i in range(G_matrix.shape[1])]\n",
+    "G_matrix.index = [f\"IID_{i+1}\" for i in range(G_matrix.shape[0])]\n",
+    "G_matrix"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:41:31.387574600Z",
+     "start_time": "2024-03-26T06:41:30.872935500Z"
+    }
+   },
+   "id": "1c5098ff572f68de",
+   "execution_count": 35
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "       marker_1  marker_2  marker_3  marker_4  marker_5  marker_6  marker_7  \\\nIID_1         1         2         0         0         1         2         1   \nIID_2         2         0         0         2         0         2         1   \nIID_3         2         2         1         2         0         0         1   \nIID_4         0         1         0         0         0         1         0   \nIID_5         0         2         1         2         1         2         0   \nIID_6         0         0         1         0         0         2         0   \n\n       marker_8  marker_9  marker_10  ...  marker_8357  marker_8358  \\\nIID_1         2         0          1  ...            0            2   \nIID_2         1         2          1  ...            2            2   \nIID_3         2         1          0  ...            0            2   \nIID_4         1         0          1  ...            2            2   \nIID_5         2         0          2  ...            1            0   \nIID_6         0         2          1  ...            2            1   \n\n       marker_8359  marker_8360  marker_8361  marker_8362  marker_8363  \\\nIID_1            0            0            0            1            0   \nIID_2            2            0            2            1            2   \nIID_3            1            1            0            0            1   \nIID_4            0            0            2            1            1   \nIID_5            2            1            1            2            1   \nIID_6            0            2            1            0            0   \n\n       marker_8364  marker_8365  marker_8366  \nIID_1            2            1            2  \nIID_2            0            2            1  \nIID_3            2            0            1  \nIID_4            1            1            0  \nIID_5            2            1            0  \nIID_6            1            0            2  \n\n[6 rows x 8366 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>marker_1</th>\n      <th>marker_2</th>\n      <th>marker_3</th>\n      <th>marker_4</th>\n      <th>marker_5</th>\n      <th>marker_6</th>\n      <th>marker_7</th>\n      <th>marker_8</th>\n      <th>marker_9</th>\n      <th>marker_10</th>\n      <th>...</th>\n      <th>marker_8357</th>\n      <th>marker_8358</th>\n      <th>marker_8359</th>\n      <th>marker_8360</th>\n      <th>marker_8361</th>\n      <th>marker_8362</th>\n      <th>marker_8363</th>\n      <th>marker_8364</th>\n      <th>marker_8365</th>\n      <th>marker_8366</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>IID_1</th>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n      <td>...</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>IID_2</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>...</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>IID_3</th>\n      <td>2</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>IID_4</th>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>...</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>IID_5</th>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>...</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>IID_6</th>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>...</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n    </tr>\n  </tbody>\n</table>\n<p>6 rows × 8366 columns</p>\n</div>"
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Pass the adjusted DataFrame into the R environment\n",
+    "ro.globalenv['G_matrix'] = pandas2ri.py2rpy(G_matrix)\n",
+    "\n",
+    "# Verify by printing the first few rows in R\n",
+    "# Ensure that the dimnames (row and column names) are correctly set in R\n",
+    "ro.r('''\n",
+    "dimnames(G_matrix) <- list(rownames(G_matrix), colnames(G_matrix))\n",
+    "''')\n",
+    "\n",
+    "ro.r('head(G_matrix)')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:43:09.773274800Z",
+     "start_time": "2024-03-26T06:41:31.387574600Z"
+    }
+   },
+   "id": "e025ceac8fe61ecf",
+   "execution_count": 36
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "blue_df = pd.DataFrame({\n",
+    "    \"Name\": [\"IID_3\", \"IID_4\", \"IID_5\"],\n",
+    "    \"Value\": [2.4, 2.4, 5.3]\n",
+    "})\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-03-26T06:43:09.775274500Z",
+     "start_time": "2024-03-26T06:43:09.761671300Z"
+    }
+   },
+   "id": "8a0429505eafc8d6",
+   "execution_count": 37
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Initial data: \n",
+      "\tNumber of Individuals: 6000 \n",
+      "\tNumber of Markers: 8366 \n",
+      "\n",
+      "Missing data check: \n",
+      "\tTotal SNPs: 8366 \n",
+      "\t 0 SNPs dropped due to missing data threshold of 0.5 \n",
+      "\tTotal of: 8366  SNPs \n",
+      "\n",
+      "MAF check: \n",
+      "\tNo SNPs with MAF below 0 \n",
+      "\n",
+      "Heterozigosity data check: \n",
+      "\tNo SNPs with heterozygosity, missing threshold of =  0 \n",
+      "\n",
+      "Summary check: \n",
+      "\tInitial:  8366 SNPs \n",
+      "\tFinal:  8366  SNPs ( 0  SNPs removed) \n",
+      " \n",
+      "Completed! Time = 470.51  seconds \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "R[write to console]: Reciprocal conditional number for original matrix is: 5.75725494224797e-05\n",
+      "\n",
+      "R[write to console]: Reciprocal conditional number for inverted matrix is: 5.43301590950856e-05\n",
+      "\n",
+      "R[write to console]: Inverse of matrix G does not appear to be ill-conditioned.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "ro.globalenv['G_matrix'] = pandas2ri.py2rpy(G_matrix)\n",
+    "\n",
+    "# Verify by printing the first few rows in R\n",
+    "# Ensure that the dimnames (row and column names) are correctly set in R\n",
+    "ro.r('''\n",
+    "dimnames(G_matrix) <- list(rownames(G_matrix), colnames(G_matrix))\n",
+    "''')\n",
+    "\n",
+    "ro.globalenv['blue'] = pandas2ri.py2rpy(blue_df)\n",
+    "#ro.r('rownames(blue)')\n",
+    "ro.r('''\n",
+    "library(asreml)\n",
+    "library(ASRgenomics)\n",
+    "library(tidyverse)\n",
+    "G_matrix <- as.matrix(G_matrix)\n",
+    "Gmat = G.matrix(G_matrix)$G\n",
+    "rownames(Gmat) <- rownames(G_matrix)\n",
+    "colnames(Gmat) <- rownames(G_matrix)\n",
+    "diag(Gmat) = diag(Gmat) + 0.01\n",
+    "ginv = G.inverse(Gmat,sparseform = T)\n",
+    "ginv = ginv$Ginv.sparse\n",
+    "\n",
+    "attr(ginv,\"rowNames\") %>% head\n",
+    "\n",
+    "blue$Name <- as.factor(blue$Name)\n",
+    "\n",
+    "\n",
+    "asr2 <- asreml(Value ~ 1,\n",
+    "               random = ~ vm(Name,ginv), \n",
+    "               residual = ~idv(units),\n",
+    "               data = blue)\n",
+    "summary(asr2)$varcomp\n",
+    "\n",
+    "''')\n",
+    "g = ro.r('coef(asr2)$random')\n",
+    "\n",
+    "g_row_names = ro.r('rownames(coef(asr2)$random)')\n",
+    "\n",
+    "g_df = pd.DataFrame(g)\n",
+    "\n",
+    "# Convert the row names to a Python list\n",
+    "g_row_names_py = list(g_row_names)\n",
+    "\n",
+    "# Assign the row names to the pandas DataFrame index\n",
+    "g_df.index = g_row_names_py\n",
+    "g_df['Name'] = [name.split('_')[1:] for name in g_df.index]\n",
+    "g_df.columns = ['Value', 'Name']\n",
+    "g_df = g_df[['Name', 'Value']]"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "is_executing": true,
+    "ExecuteTime": {
+     "start_time": "2024-03-26T06:43:09.772278600Z"
+    }
+   },
+   "id": "21d78c7525765a4b",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "g_df"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "is_executing": true
+   },
+   "id": "7acf3d154cb5052c",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "import gblup\n",
+    "g_gblup = gblup.run_asreml_G_gblup(G_matrix,blue_df)\n",
+    "g_gblup"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "is_executing": true
+   },
+   "id": "4a3a8a5a537bdd5f",
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/blup_animal.ipynb b/blup_animal.ipynb
index 680e41c..49c3ff9 100644
--- a/blup_animal.ipynb
+++ b/blup_animal.ipynb
@@ -2,13 +2,13 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 46,
    "id": "initial_id",
    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
-     "end_time": "2024-03-25T09:19:22.281546600Z",
-     "start_time": "2024-03-25T09:19:22.075735900Z"
+     "end_time": "2024-03-25T09:48:46.209035Z",
+     "start_time": "2024-03-25T09:48:46.009607200Z"
     }
    },
    "outputs": [
@@ -25,6 +25,15 @@
       " [0.25  0.25  0.25  0.5   0.5   0.25  1.    0.25 ]\n",
       " [0.25  0.25  0.5   0.125 0.375 0.5   0.25  1.   ]]\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": "   ID  SIRE  DAM\n0   1     0    0\n1   2     0    0\n2   3     0    0\n3   4     1    0\n4   5     3    2\n5   6     1    2\n6   7     4    5\n7   8     3    6",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>ID</th>\n      <th>SIRE</th>\n      <th>DAM</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n      <td>3</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>6</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>7</td>\n      <td>4</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>8</td>\n      <td>3</td>\n      <td>6</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -53,6 +62,7 @@
     "#ainv = np.array(r.solve(A))\n",
     "ainv = np.linalg.inv(A)\n",
     "print(np.array(A))\n",
+    "pd.DataFrame(ped_data)\n",
     "#np.allclose(ainv,e)"
    ]
   },
@@ -248,6 +258,14 @@
      "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "text/plain": "array([[  8.74646688],\n       [120.73507016],\n       [-90.13470959],\n       ...,\n       [141.13168562],\n       [-44.48005818],\n       [ 84.26178548]])"
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -257,12 +275,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-03-25T08:42:03.490882200Z",
+     "end_time": "2024-03-25T09:36:13.096766900Z",
      "start_time": "2024-03-25T08:41:55.848715200Z"
     }
    },
    "id": "17d9a37deccf8a55",
-   "execution_count": 33
+   "execution_count": 45
   }
  ],
  "metadata": {
diff --git a/gblup.py b/gblup.py
index 0c67316..7389eec 100644
--- a/gblup.py
+++ b/gblup.py
@@ -1,3 +1,109 @@
+import pandas as pd
+import rpy2.robjects as ro
+from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
+
+def run_asreml_G_gblup(G_matrix, blue_df):
+    pandas2ri.activate()
+    print(f'G_matrix:\n{G_matrix.shape}, blue_df:\n{blue_df.shape}')
+    ro.r('''Sys.setlocale("LC_CTYPE", "en_US.UTF-8")''')
+    ro.globalenv['G_matrix'] = pandas2ri.py2rpy(G_matrix)
+    # Verify by printing the first few rows in R
+    # Ensure that the dimnames (row and column names) are correctly set in R
+    ro.r('''
+    dimnames(G_matrix) <- list(rownames(G_matrix), colnames(G_matrix))
+    ''')
+
+    ro.globalenv['blue'] = pandas2ri.py2rpy(blue_df)
+    # ro.r('rownames(blue)')
+    ro.r('''
+    library(asreml)
+    library(ASRgenomics)
+    library(tidyverse)
+    G_matrix <- as.matrix(G_matrix)
+    Gmat = G.matrix(G_matrix)$G
+    rownames(Gmat) <- rownames(G_matrix)
+    colnames(Gmat) <- rownames(G_matrix)
+    diag(Gmat) = diag(Gmat) + 0.01
+    ginv = G.inverse(Gmat,sparseform = T)
+    ginv = ginv$Ginv.sparse
+
+    attr(ginv,"rowNames") %>% head
+
+    blue$Name <- as.factor(blue$Name)
+
+
+    asr2 <- asreml(Value ~ 1,
+                   random = ~ vm(Name,ginv), 
+                   residual = ~idv(units),
+                   data = blue,workspace = 20480000 * 12)
+    summary(asr2)$varcomp
+
+    ''')
+    g = ro.r('coef(asr2)$random')
+
+    g_row_names = ro.r('rownames(coef(asr2)$random)')
+
+    g_df = pd.DataFrame(g)
+
+    # Convert the row names to a Python list
+    g_row_names_py = list(g_row_names)
+
+    # Assign the row names to the pandas DataFrame index
+    g_df.index = g_row_names_py
+    g_df['Name'] = ['_'.join(name.split('_')[1:]) for name in g_df.index]
+    g_df.columns = ['Value', 'Name']
+    g_df = g_df[['Name', 'Value']]
+    g_df.to_csv('output/gblup.csv',index=True)
+    print(g_df.head(10))
+    return g_df
+
+def run_asreml_ped_gblup(ped_df, blue_df):
+    # Ensure automatic conversion between pandas dataframes and R data frames
+    pandas2ri.activate()
+
+    # Convert pandas dataframes to R data frames and put them into the R environment
+    ro.globalenv['ped'] = pandas2ri.py2rpy(ped_df)
+    ro.globalenv['blue'] = pandas2ri.py2rpy(blue_df)
+
+    # Run the ASReml model in R
+    ro.r('''
+    library(asreml)
+    ainv <- asreml::ainverse(ped)
+    blue$Name <- as.factor(blue$Name)
+    blue <- merge(blue, ped, by = "Name")
+    asr2 <- asreml::asreml(predicted.value ~ 1,
+                           random = ~ vm(Name, ainv),
+                           residual = ~ idv(units),
+                           data = blue)
+    ''')
+
+    # Extract results from the asr2 model in R to Python
+    g = ro.r('coef(asr2)$random')
+
+    g_row_names = ro.r('rownames(coef(asr2)$random)')
+
+    g_df = pd.DataFrame(g)
+
+    # Convert the row names to a Python list
+    g_row_names_py = list(g_row_names)
+
+    # Assign the row names to the pandas DataFrame index
+    g_df.index = g_row_names_py
+    g_df['Name'] = [name.split('_')[1] for name in g_df.index]
+
+    g_df.columns = ['Value', 'Name']
+    g_df = g_df[['Name', 'Value']]
+
+    return g_df
+
+
+# Example usage
+# ped_df and blue_df should be defined as pandas DataFrames before calling this function
+# results_df = run_asreml_and_extract_results(ped_df, blue_df)
+# print(results
+
+
 def solve_mixed_gblup_model(n_obs, obs_ids, y_values, a):
     import numpy as np
     ainv = np.linalg.inv(np.array(a))

From fcb0fdde26ffd0068d0f6031e5bd5dc3f98d30bd Mon Sep 17 00:00:00 2001
From: zhanglei <min.oddo@gmail.com>
Date: Tue, 26 Mar 2024 16:21:21 +0800
Subject: [PATCH 6/6] gblup and arseml test using gblup

---
 .idea/.gitignore | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 .idea/.gitignore

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml