From bbe6369f4f4362022da466cc8c5663bb4f3284df Mon Sep 17 00:00:00 2001
From: Joshua Fogg <joshua.h.fogg@gmail.com>
Date: Fri, 26 Apr 2024 14:13:36 +0100
Subject: [PATCH 1/4] More sensible directory structure for examples As we add
 more, working with larger and larger examples, it doesn't make sense to have
 a directory for each

---
 Example/04/A04.txt                          | 4 ++++
 Example/04/EBV04.txt                        | 4 ++++
 Example/04/S04.txt                          | 4 ++++
 Example/{ => 50}/A50.txt                    | 0
 Example/{ => 50}/EBV50.txt                  | 0
 Example/{ => 50}/S50.txt                    | 0
 Example/README.md                           | 9 ++++++---
 {Example2 => Scratch/Example}/A02.txt       | 0
 {Example2 => Scratch/Example}/EBV02.txt     | 0
 {Example2 => Scratch/Example}/README.md     | 2 +-
 {Example2 => Scratch/Example}/S02.txt       | 0
 {Example2 => Scratch/Example}/n2-test.ipynb | 0
 12 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 Example/04/A04.txt
 create mode 100644 Example/04/EBV04.txt
 create mode 100644 Example/04/S04.txt
 rename Example/{ => 50}/A50.txt (100%)
 rename Example/{ => 50}/EBV50.txt (100%)
 rename Example/{ => 50}/S50.txt (100%)
 rename {Example2 => Scratch/Example}/A02.txt (100%)
 rename {Example2 => Scratch/Example}/EBV02.txt (100%)
 rename {Example2 => Scratch/Example}/README.md (59%)
 rename {Example2 => Scratch/Example}/S02.txt (100%)
 rename {Example2 => Scratch/Example}/n2-test.ipynb (100%)

diff --git a/Example/04/A04.txt b/Example/04/A04.txt
new file mode 100644
index 0000000..3012329
--- /dev/null
+++ b/Example/04/A04.txt
@@ -0,0 +1,4 @@
+1 1 1
+2 2 1
+3 3 1
+4 4 1
diff --git a/Example/04/EBV04.txt b/Example/04/EBV04.txt
new file mode 100644
index 0000000..be589c9
--- /dev/null
+++ b/Example/04/EBV04.txt
@@ -0,0 +1,4 @@
+1
+1
+2
+2
diff --git a/Example/04/S04.txt b/Example/04/S04.txt
new file mode 100644
index 0000000..ae2331b
--- /dev/null
+++ b/Example/04/S04.txt
@@ -0,0 +1,4 @@
+1 1 0.11111111111111
+2 2 0.11111111111111
+3 3 4.0
+4 4 4.0
\ No newline at end of file
diff --git a/Example/A50.txt b/Example/50/A50.txt
similarity index 100%
rename from Example/A50.txt
rename to Example/50/A50.txt
diff --git a/Example/EBV50.txt b/Example/50/EBV50.txt
similarity index 100%
rename from Example/EBV50.txt
rename to Example/50/EBV50.txt
diff --git a/Example/S50.txt b/Example/50/S50.txt
similarity index 100%
rename from Example/S50.txt
rename to Example/50/S50.txt
diff --git a/Example/README.md b/Example/README.md
index b15b17e..2c3d0af 100644
--- a/Example/README.md
+++ b/Example/README.md
@@ -1,4 +1,9 @@
-# File Description
+# Examples
+
+- [04/](04/) A scaled up version of the small $n = 2$ example created by Julian, replicating that problem across sires and dams.
+- [50/](50/) The 50 youngest individuals from a generated cohort of 12,000. Implications of this non-random example are unknown.
+
+## File Description
 
 In the original simulation, we had 12k individuals and 1k samples of their estimated breeding values (EBV) – our selection criterion.
 
@@ -8,8 +13,6 @@ Based on that simulated data:
 - I constructed the S-matrix (12k x 12k), which is the covariance matrix between 1k EBV samples for 12k individuals. [S50]
 - The EBV vector (12k), which is posterior mean over 1k samples of EBV. [EBV50]
 
-Since 12k might be a lot, I have extracted last 50 rows/columns from above 12k set, for your test. This corresponds to the youngest 50 individuals (I don’t know the implications of choosing this non-random sample ).
-
 The matrices are saved in row, column, value format like we talked about before.
 
 _Note: this is an unedited version of Gregor's description of the files._
diff --git a/Example2/A02.txt b/Scratch/Example/A02.txt
similarity index 100%
rename from Example2/A02.txt
rename to Scratch/Example/A02.txt
diff --git a/Example2/EBV02.txt b/Scratch/Example/EBV02.txt
similarity index 100%
rename from Example2/EBV02.txt
rename to Scratch/Example/EBV02.txt
diff --git a/Example2/README.md b/Scratch/Example/README.md
similarity index 59%
rename from Example2/README.md
rename to Scratch/Example/README.md
index 1a3bd3e..9324efb 100644
--- a/Example2/README.md
+++ b/Scratch/Example/README.md
@@ -1,3 +1,3 @@
 # Example 2
 
-This is a small example for $n = 2$ created by Julian. It uses the same file formats as [Example 1](../Example/).
+This is a small example for $n = 2$ created by Julian. It uses the same file formats as [Example 1](../../Example/).
diff --git a/Example2/S02.txt b/Scratch/Example/S02.txt
similarity index 100%
rename from Example2/S02.txt
rename to Scratch/Example/S02.txt
diff --git a/Example2/n2-test.ipynb b/Scratch/Example/n2-test.ipynb
similarity index 100%
rename from Example2/n2-test.ipynb
rename to Scratch/Example/n2-test.ipynb

From 60b31ee994093c2d0ad9d93a19e171ecacd362e1 Mon Sep 17 00:00:00 2001
From: Joshua Fogg <joshua.h.fogg@gmail.com>
Date: Fri, 26 Apr 2024 14:15:43 +0100
Subject: [PATCH 2/4] Began rewrite in raw Python At the moment a test problem
 is in the same file as the worker functions, but I'll split those out once
 happy it's not caused any regressions. As part of this I've added typing for
 function input and outputs.

---
 .../robust-genetics.ipynb                     |   0
 robust-genetics.py                            | 293 ++++++++++++++++++
 2 files changed, 293 insertions(+)
 rename robust-genetics.ipynb => Scratch/robust-genetics.ipynb (100%)
 create mode 100644 robust-genetics.py

diff --git a/robust-genetics.ipynb b/Scratch/robust-genetics.ipynb
similarity index 100%
rename from robust-genetics.ipynb
rename to Scratch/robust-genetics.ipynb
diff --git a/robust-genetics.py b/robust-genetics.py
new file mode 100644
index 0000000..7ec86f1
--- /dev/null
+++ b/robust-genetics.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+
+import numpy as np          # defines matrix structures
+import gurobipy as gp       # Gurobi optimization interface (1)
+from gurobipy import GRB    # Gurobi optimization interface (2)
+from typing import Union    # TODO replace with stock `|` from 3.10+ 
+
+
+# OUTPUT SETTINGS
+# Numpy makes some odd choices with its default output formats,
+# this adjusts the most intrusive of those for anything which
+# uses this utility.
+
+# want to round rather than truncate when printing
+np.set_printoptions(threshold=np.inf)
+
+# only show numpy output to eight decimal places
+np.set_printoptions(formatter={'float_kind':"{:.8f}".format})
+
+
+
+# READING GENETICS DATA
+# TODO write a more descriptive section heading
+
+def load_ped(filename: str) -> dict:
+    """
+    Function for reading *.ped files to a dictionary. Takes the file name
+    as a string input and returns the pedigree structure as a dictionary.
+    """
+    with open(filename, "r") as file:
+        # first line of *.ped lists the headers; skip
+        file.readline()
+        # create a list of int lists from each line (dropping optional labels)
+        data = [[int(x) for x in line.split(",")[0:3]] for line in file]
+    # convert this list of lists into a dictionary
+    ped = {entry[0]: entry[1:3] for entry in data}
+    return ped
+
+
+def makeA(pedigree: dict) -> np.ndarray:
+    """
+    Construct Wright's Numerator Relationship Matrix from a given pedigree
+    structure. Takes the pedigree as a dictionary input and returns the
+    matrix as output.
+    """
+    m = len(pedigree)
+    # preallocate memory for A 
+    A = np.zeros((m, m))
+
+    # iterate over rows
+    for i in range(0, m):
+        # save parent indexes: pedigrees indexed from 1, Python from 0
+        p = pedigree[i+1][0]-1
+        q = pedigree[i+1][1]-1
+        # iterate over columns sub-diagonal
+        for j in range(0, i):
+            # calculate sub-diagonal entries
+            A[i, j] = 0.5*(A[j, p] + A[j, q])
+            # populate sup-diagonal (symmetric)
+            A[j, i] = A[i, j]
+        # calculate diagonal entries
+        A[i, i] = 1 + 0.5*A[p, q]
+
+    return A
+
+
+def load_problem(A_filename: str, E_filename: str, S_filename: str,
+                 dimension: bool = False, pedigree: bool = False
+                 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]:
+    """
+    Used to load genetic selection problems into NumPy. It takes three
+    string inputs for filenames where Sigma, Mu, and Omega are stored,
+    as well as an optional integer input for problem dimension if this
+    is known. If it's know know, it's worked out based on E_filename.
+
+    As output, it returns (A, E, S, n), where A and S are n-by-n NumPy
+    arrays, E is a length n NumPy array, and n is an integer.
+    """
+
+    def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray:
+        """
+        Since NumPy doesn't have a stock way to load matrices
+        stored in coordinate format format, this adds one.
+        """
+
+        matrix = np.zeros([dimension, dimension])
+
+        with open(filename, 'r') as file:
+            for line in file:
+                i, j, entry = line.split(" ")
+                # data files indexed from 1, not 0
+                matrix[int(i)-1, int(j)-1] = entry
+                matrix[int(j)-1, int(i)-1] = entry
+
+        return matrix
+
+    # if dimension wasn't supplied, need to find that
+    if not dimension:
+        # get dimension from EBV, since it's the smallest file
+        with open(E_filename, 'r') as file:
+            dimension = sum(1 for _ in file)
+
+    # TODO test whether it's better to handle dimension separately as 
+    # above or to take the hit of doing `np.loadtxt`s work explicitly
+    # but then getting the dimension for free. 
+    # EBV isn't in coordinate format so can be loaded directly
+    E = np.loadtxt(E_filename)  
+    # S is stored by coordinates so need special loader
+    S = load_symmetric_matrix(S_filename, dimension)
+    # A can be stored as a pedigree or by coordinates
+    if pedigree:
+        A = makeA(load_ped(A_filename))
+    else:
+        A = load_symmetric_matrix(A_filename, dimension)
+
+    return A, E, S, dimension
+
+
+
+
+# DEFINING SOLVER
+
+def gurobi_standard_genetics(
+    sigma: np.ndarray,
+    mu: np.ndarray,
+    sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
+    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc 
+    lam: float,
+    dimension: int,
+    upper_bound: Union[np.ndarray, float] = 1.0,
+    lower_bound: Union[np.ndarray, float] = 0.0,
+    time_limit: Union[float, None] = None,
+    min_duality_gap: Union[float, None] = None,
+    debug: bool = False
+) -> tuple[float, float]:
+    """
+    TODO write a docstring for solving a robust genetics problem
+    """
+
+    # create models for standard and robust genetic selection
+    model = gp.Model("standard-genetics")
+
+    # Gurobi spews all its output into the terminal by default, this restricts
+    # that behaviour to only happen when the `debug` flag is used.
+    if not debug: model.setParam('OutputFlag', 0)
+
+    # integrating bounds within variable definitions is more efficient than
+    # as a separate constraint, which Gurobi would convert to bounds anyway
+    w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound,
+                      vtype=GRB.CONTINUOUS, name="w")
+
+    model.setObjective(
+        # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
+        w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w),
+    GRB.MAXIMIZE)
+
+    # set up the two sum-to-half constraints
+    M = np.zeros((2, dimension))
+    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise 
+    M[0, sires] = 1
+    M[1, dams] = 1
+    # define the right hand side of the constraint Mx = m
+    m = np.full(2, 0.5)
+    model.addConstr(M@w == m, name="sum-to-half")
+
+    # optional controls to stop Gurobi taking too long
+    if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
+    if min_duality_gap: model.setParam('MIPGap', min_duality_gap)
+
+    # model file can be used externally for verification
+    if debug: model.write("standard-opt.mps")
+
+    model.optimize()
+    return w.X, model.ObjVal
+
+def gurobi_robust_genetics(
+    sigma: np.ndarray,
+    mubar: np.ndarray,
+    omega: np.ndarray,
+    sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
+    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc 
+    lam: float,
+    dimension: int,
+    upper_bound: Union[np.ndarray, float] = 1.0,
+    lower_bound: Union[np.ndarray, float] = 0.0,
+    kappa: float = 0.0,
+    time_limit: Union[float, None] = None,
+    min_duality_gap: Union[float, None] = None,
+    debug: bool = False
+) -> tuple[float, float, float]:
+    """
+    TODO write a docstring for solving a robust genetics problem
+    """
+
+    # create models for standard and robust genetic selection
+    model = gp.Model("robust-genetics")
+
+    # Gurobi spews all its output into the terminal by default, this restricts
+    # that behaviour to only happen when the `debug` flag is used.
+    if not debug: model.setParam('OutputFlag', 0)
+
+    # integrating bounds within variable definitions is more efficient than
+    # as a separate constraint, which Gurobi would convert to bounds anyway
+    w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound,
+                      vtype=GRB.CONTINUOUS, name="w")
+    z = model.addVar(lb=0.0, name="z")
+
+    model.setObjective(
+        # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
+        w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z,
+    GRB.MAXIMIZE)
+
+    # set up the two sum-to-half constraints
+    M = np.zeros((2, dimension))
+    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise 
+    M[0, sires] = 1
+    M[1, dams] = 1
+    # define the right hand side of the constraint Mx = m
+    m = np.full(2, 0.5)
+    model.addConstr(M@w == m, name="sum-to-half")
+
+    # conic constraint which comes from robust optimization
+    model.addConstr(z**2 >= w.transpose()@omega@w, name="uncertainty")
+
+    # optional controls to stop Gurobi taking too long
+    if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
+    if min_duality_gap: model.setParam('MIPGap', min_duality_gap)
+
+    # model file can be used externally for verification
+    if debug: model.write("robust-opt.mps")
+
+    model.optimize()
+    return w.X, z.X, model.ObjVal
+
+
+def print_compare_solutions(
+    w_std: np.ndarray,
+    objective_std: float,
+    w_rbs: np.ndarray,
+    z_rbs: float,
+    objective_rbs: float,
+    dimension: int,
+    precision: int = 5
+) -> None:
+    """
+    TODO write a docstring for comparing two robust genetics portfolios
+    """
+
+    order = len(str(dimension))
+    print(" "*(order-1) + "i   w_std    w_rbs")
+    for candidate in range(dimension):
+        print(
+            f"{candidate+1:0{order}d}  " \
+            f"{w_std[candidate]:.{precision}f}  " \
+            f"{w_rbs[candidate]:.{precision}f}" 
+        )
+
+    print(
+        f"\nStandard Obj.:  {objective_std:.{precision}f}" \
+        f"\nRobust Obj:     {objective_rbs:.{precision}f} (z = {z_rbs:.{precision}f})" \
+        f"\nMaximum change: {max(np.abs(w_std-w_rbs)):.{precision}f}" \
+        f"\nAverage change: {np.mean(np.abs(w_std-w_rbs)):.{precision}f}" \
+        f"\nMinimum change: {min(np.abs(w_std-w_rbs)):.{precision}f}"
+    )
+
+
+# MAIN
+
+# key problem variables
+sigma, mubar, omega, n = load_problem(
+    "Example/04/A04.txt",
+    "Example/04/EBV04.txt",
+    "Example/04/S04.txt"
+)
+
+# NOTE this trick of handling sex data is specific to the initial simulation data
+# which is structured so that candidates alternate between sires (which have even
+# indices) and dams (which have odd indices).
+sires = range(0, n, 2)
+dams  = range(1, n, 2)
+
+lam = 0.5
+kap = 1
+
+# actually finding the standard genetics solution
+w_std, obj_std = gurobi_standard_genetics(sigma, mubar, sires, dams, lam, n)
+z_std = -1  # HACK placeholder for missing value
+
+# not specifying the kappa argument, so will solve the non-robust problem
+# w_std, z_std, obj_std = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n)
+w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n, kappa=kap)
+
+print_compare_solutions(w_std, obj_std, w_rbs, z_rbs, obj_rbs, n)

From 6a47becc74ae5cd8c5565b26be92de99936cac81 Mon Sep 17 00:00:00 2001
From: Joshua Fogg <joshua.h.fogg@gmail.com>
Date: Mon, 29 Apr 2024 17:57:55 +0100
Subject: [PATCH 3/4] Fixes for docs, typing, and PEP8

---
 .vscode/settings.json |   5 +-
 robust-genetics.py    | 204 ++++++++++++++++++++++++------------------
 2 files changed, 123 insertions(+), 86 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1549fc7..856835b 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -3,6 +3,7 @@
         "bilevel",
         "bmatrix",
         "diagflat",
+        "dtype",
         "Gorjanc",
         "Gregor",
         "Gregor's",
@@ -15,6 +16,7 @@
         "mathbb",
         "mathcal",
         "mubar",
+        "operatorname",
         "Pocrnić",
         "printoptions",
         "proxqp",
@@ -22,5 +24,6 @@
         "sqrtm",
         "vtype",
         "WNRM"
-    ]
+    ],
+    "python.analysis.typeCheckingMode": "basic"
 }
diff --git a/robust-genetics.py b/robust-genetics.py
index 7ec86f1..dff1abf 100644
--- a/robust-genetics.py
+++ b/robust-genetics.py
@@ -1,26 +1,25 @@
 #!/usr/bin/env python3
 
 import numpy as np          # defines matrix structures
+import numpy.typing as npt  # variable typing definitions for NumPy
 import gurobipy as gp       # Gurobi optimization interface (1)
 from gurobipy import GRB    # Gurobi optimization interface (2)
-from typing import Union    # TODO replace with stock `|` from 3.10+ 
+from sys import maxsize     # maximum precision for specific system
 
 
 # OUTPUT SETTINGS
-# Numpy makes some odd choices with its default output formats,
-# this adjusts the most intrusive of those for anything which
-# uses this utility.
-
-# want to round rather than truncate when printing
-np.set_printoptions(threshold=np.inf)
-
-# only show numpy output to eight decimal places
-np.set_printoptions(formatter={'float_kind':"{:.8f}".format})
+# Numpy makes some odd choices with its default output formats, this adjusts
+# the most intrusive of those for anything which uses this utility.
 
+np.set_printoptions(
+    formatter={'float_kind': "{:.8f}".format},  # only show to 8 d.p.
+    threshold=maxsize  # want to round rather than truncate when printing
+)
 
 
 # READING GENETICS DATA
-# TODO write a more descriptive section heading
+# Genetics data could be presented to our solvers in multiple formats, these
+# functions define the appropriate methods for loading those in correctly.
 
 def load_ped(filename: str) -> dict:
     """
@@ -37,15 +36,15 @@ def load_ped(filename: str) -> dict:
     return ped
 
 
-def makeA(pedigree: dict) -> np.ndarray:
+def makeA(pedigree: dict) -> npt.NDArray[np.float64]:
     """
     Construct Wright's Numerator Relationship Matrix from a given pedigree
     structure. Takes the pedigree as a dictionary input and returns the
     matrix as output.
     """
     m = len(pedigree)
-    # preallocate memory for A 
-    A = np.zeros((m, m))
+    # preallocate memory for A
+    A = np.zeros((m, m), dtype=float)
 
     # iterate over rows
     for i in range(0, m):
@@ -65,8 +64,9 @@ def makeA(pedigree: dict) -> np.ndarray:
 
 
 def load_problem(A_filename: str, E_filename: str, S_filename: str,
-                 dimension: bool = False, pedigree: bool = False
-                 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]:
+                 dimension: int | None = None, pedigree: bool = False
+                 ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64],
+                            npt.NDArray[np.float64], int]:
     """
     Used to load genetic selection problems into NumPy. It takes three
     string inputs for filenames where Sigma, Mu, and Omega are stored,
@@ -77,13 +77,14 @@ def load_problem(A_filename: str, E_filename: str, S_filename: str,
     arrays, E is a length n NumPy array, and n is an integer.
     """
 
-    def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray:
+    def load_symmetric_matrix(filename: str, dimension: int
+                              ) -> npt.NDArray[np.float64]:
         """
         Since NumPy doesn't have a stock way to load matrices
         stored in coordinate format format, this adds one.
         """
 
-        matrix = np.zeros([dimension, dimension])
+        matrix = np.zeros([dimension, dimension], dtype=float)
 
         with open(filename, 'r') as file:
             for line in file:
@@ -94,17 +95,12 @@ def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray:
 
         return matrix
 
-    # if dimension wasn't supplied, need to find that
+    E = np.loadtxt(E_filename, dtype=float)
+    # if dimension not specified, use `E` which doesn't need preallocation
     if not dimension:
-        # get dimension from EBV, since it's the smallest file
-        with open(E_filename, 'r') as file:
-            dimension = sum(1 for _ in file)
-
-    # TODO test whether it's better to handle dimension separately as 
-    # above or to take the hit of doing `np.loadtxt`s work explicitly
-    # but then getting the dimension for free. 
-    # EBV isn't in coordinate format so can be loaded directly
-    E = np.loadtxt(E_filename)  
+        assert isinstance(E.size, int)  # catches E being empty
+        dimension = E.size
+
     # S is stored by coordinates so need special loader
     S = load_symmetric_matrix(S_filename, dimension)
     # A can be stored as a pedigree or by coordinates
@@ -116,25 +112,33 @@ def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray:
     return A, E, S, dimension
 
 
-
-
 # DEFINING SOLVER
+# With the problems properly loaded into Numpy, this section contains functions
+# for solving those under various formulations and methods, as well as printing
+# and comparing outputted solutions.
 
 def gurobi_standard_genetics(
     sigma: np.ndarray,
     mu: np.ndarray,
     sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
-    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc 
+    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc
     lam: float,
     dimension: int,
-    upper_bound: Union[np.ndarray, float] = 1.0,
-    lower_bound: Union[np.ndarray, float] = 0.0,
-    time_limit: Union[float, None] = None,
-    min_duality_gap: Union[float, None] = None,
+    upper_bound: npt.NDArray[np.float64] | float = 1.0,
+    lower_bound: npt.NDArray[np.float64] | float = 0.0,
+    time_limit: float | None = None,
+    max_duality_gap: float | None = None,
     debug: bool = False
-) -> tuple[float, float]:
+):  # -> tuple[npt.NDArray[np.float64], float]:  # BUG Gurobi typing broken
     """
-    TODO write a docstring for solving a robust genetics problem
+    Takes a sigma, mu, sire list, dam list, and dimension as inputs and solves
+    the standard genetic selection problem with Gurobi for a given value of
+    lambda (lam), returning the portfolio and objective value as outputs. The
+    default lower and upper bounds of 0 and 1 can be changed also.
+
+    Optional arguments time_limit and max_duality_gap respectively control how
+    long Gurobi will spend on the problem and the maximum allowable duality
+    gap. Optional argument debug sets whether Gurobi prints output to terminal.
     """
 
     # create models for standard and robust genetic selection
@@ -151,12 +155,13 @@ def gurobi_standard_genetics(
 
     model.setObjective(
         # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
-        w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w),
-    GRB.MAXIMIZE)
+        w.transpose()@mu - (lam/2)*w.transpose()@(sigma@w),
+        GRB.MAXIMIZE
+    )
 
     # set up the two sum-to-half constraints
-    M = np.zeros((2, dimension))
-    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise 
+    M = np.zeros((2, dimension), dtype=int)
+    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise
     M[0, sires] = 1
     M[1, dams] = 1
     # define the right hand side of the constraint Mx = m
@@ -165,7 +170,7 @@ def gurobi_standard_genetics(
 
     # optional controls to stop Gurobi taking too long
     if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
-    if min_duality_gap: model.setParam('MIPGap', min_duality_gap)
+    if max_duality_gap: model.setParam('MIPGap', max_duality_gap)
 
     # model file can be used externally for verification
     if debug: model.write("standard-opt.mps")
@@ -173,23 +178,32 @@ def gurobi_standard_genetics(
     model.optimize()
     return w.X, model.ObjVal
 
+
 def gurobi_robust_genetics(
-    sigma: np.ndarray,
-    mubar: np.ndarray,
-    omega: np.ndarray,
+    sigma: npt.NDArray[np.float64],
+    mubar: npt.NDArray[np.float64],
+    omega: npt.NDArray[np.float64],
     sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
-    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc 
+    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc
     lam: float,
+    kappa: float,
     dimension: int,
-    upper_bound: Union[np.ndarray, float] = 1.0,
-    lower_bound: Union[np.ndarray, float] = 0.0,
-    kappa: float = 0.0,
-    time_limit: Union[float, None] = None,
-    min_duality_gap: Union[float, None] = None,
+    upper_bound: npt.NDArray[np.float64] | float = 1.0,
+    lower_bound: npt.NDArray[np.float64] | float = 0.0,
+    time_limit: float | None = None,
+    max_duality_gap: float | None = None,
     debug: bool = False
-) -> tuple[float, float, float]:
+):  # -> tuple[npt.NDArray, float, float]:  # BUG Gurobi typing broken
     """
-    TODO write a docstring for solving a robust genetics problem
+    Takes a sigma, mu-bar, omega, sire list, dam list, and dimension as inputs
+    and solves the robust genetic selection problem using Gurobi for given
+    values of lambda (lam) and kappa. It returns the portfolio and objective
+    value as outputs. The default lower and upper bounds of 0 and 1 can be
+    changed also.
+
+    Optional arguments time_limit and max_duality_gap respectively control how
+    long Gurobi will spend on the problem and the maximum allowable duality
+    gap. Optional argument debug sets whether Gurobi prints output to terminal.
     """
 
     # create models for standard and robust genetic selection
@@ -208,15 +222,16 @@ def gurobi_robust_genetics(
     model.setObjective(
         # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
         w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z,
-    GRB.MAXIMIZE)
+        GRB.MAXIMIZE
+    )
 
     # set up the two sum-to-half constraints
-    M = np.zeros((2, dimension))
-    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise 
+    M = np.zeros((2, dimension), dtype=int)
+    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise
     M[0, sires] = 1
     M[1, dams] = 1
     # define the right hand side of the constraint Mx = m
-    m = np.full(2, 0.5)
+    m = np.full(2, 0.5, dtype=float)
     model.addConstr(M@w == m, name="sum-to-half")
 
     # conic constraint which comes from robust optimization
@@ -224,7 +239,7 @@ def gurobi_robust_genetics(
 
     # optional controls to stop Gurobi taking too long
     if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
-    if min_duality_gap: model.setParam('MIPGap', min_duality_gap)
+    if max_duality_gap: model.setParam('MIPGap', max_duality_gap)
 
     # model file can be used externally for verification
     if debug: model.write("robust-opt.mps")
@@ -234,37 +249,55 @@ def gurobi_robust_genetics(
 
 
 def print_compare_solutions(
-    w_std: np.ndarray,
-    objective_std: float,
-    w_rbs: np.ndarray,
-    z_rbs: float,
-    objective_rbs: float,
-    dimension: int,
-    precision: int = 5
+    portfolio1,  # : npt.NDArray[np.float64],  # BUG Gurobi typing broken
+    portfolio2,  # : npt.NDArray[np.float64],  # BUG Gurobi typing broken
+    objective1: float,
+    objective2: float,
+    precision: int = 5,
+    z1: float | None = None,
+    z2: float | None = None,
+    name1: str = "First",
+    name2: str = "Second"
 ) -> None:
     """
-    TODO write a docstring for comparing two robust genetics portfolios
+    Takes two solutions (comprised of at least a portfolio and objective value,
+    plus an optional z-value and/or solution name) as inputs, and prints a
+    comparison of the two solutions to the terminal. The number of decimals
+    values are displayed to defaults to 5, but can be changed through the
+    precision argument.
     """
 
+    dimension = portfolio1.size
     order = len(str(dimension))
-    print(" "*(order-1) + "i   w_std    w_rbs")
+
+    # HACK header breaks if precision < 3 or len(problem1) != 5
+    print(f"i{' '*(order-1)}  {name1}  {' '*(precision-3)}{name2}")
     for candidate in range(dimension):
         print(
-            f"{candidate+1:0{order}d}  " \
-            f"{w_std[candidate]:.{precision}f}  " \
-            f"{w_rbs[candidate]:.{precision}f}" 
+            f"{candidate+1:0{order}d}  "
+            f"{portfolio1[candidate]:.{precision}f}  "
+            f"{portfolio2[candidate]:.{precision}f}"
         )
 
+    def obj_string(name: str, value: float, precision: int,
+                   z: float | None = None) -> str:
+        """Helper function which handles the optional z1 and z2 values"""
+        obj_str = f"{name}: {value:.{precision}f}"
+        return f"{obj_str} (z = {z:.{precision}f})" if z else f"{obj_str}"
+
+    portfolio_abs_diff = np.abs(portfolio1-portfolio2)
     print(
-        f"\nStandard Obj.:  {objective_std:.{precision}f}" \
-        f"\nRobust Obj:     {objective_rbs:.{precision}f} (z = {z_rbs:.{precision}f})" \
-        f"\nMaximum change: {max(np.abs(w_std-w_rbs)):.{precision}f}" \
-        f"\nAverage change: {np.mean(np.abs(w_std-w_rbs)):.{precision}f}" \
-        f"\nMinimum change: {min(np.abs(w_std-w_rbs)):.{precision}f}"
+        f"\n{obj_string(f'{name1} objective', objective1, precision, z1)}"
+        f"\n{obj_string(f'{name2} objective', objective2, precision, z2)}"
+        f"\nMaximum change: {max(portfolio_abs_diff):.{precision}f}"
+        f"\nAverage change: {np.mean(portfolio_abs_diff):.{precision}f}"
+        f"\nMinimum change: {min(portfolio_abs_diff):.{precision}f}"
     )
 
 
 # MAIN
+# A temporary section which includes an example problem. This will live here
+# while porting to module, but will be removed once done.
 
 # key problem variables
 sigma, mubar, omega, n = load_problem(
@@ -273,21 +306,22 @@ def print_compare_solutions(
     "Example/04/S04.txt"
 )
 
-# NOTE this trick of handling sex data is specific to the initial simulation data
-# which is structured so that candidates alternate between sires (which have even
-# indices) and dams (which have odd indices).
+# NOTE this trick of handling sex data is specific to the initial simulation
+# data which is structured so that candidates alternate between sires (which
+# have even indices) and dams (which have odd indices).
 sires = range(0, n, 2)
-dams  = range(1, n, 2)
+dams = range(1, n, 2)
 
 lam = 0.5
 kap = 1
 
-# actually finding the standard genetics solution
+# computes the standard and robust genetic selection solutions
 w_std, obj_std = gurobi_standard_genetics(sigma, mubar, sires, dams, lam, n)
-z_std = -1  # HACK placeholder for missing value
+w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires,
+                                               dams, lam, kap, n)
 
-# not specifying the kappa argument, so will solve the non-robust problem
-# w_std, z_std, obj_std = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n)
-w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n, kappa=kap)
+print_compare_solutions(w_std, w_rbs, obj_std, obj_rbs,
+                        z2=z_rbs, name1="w_std", name2="w_rbs")
 
-print_compare_solutions(w_std, obj_std, w_rbs, z_rbs, obj_rbs, n)
+# DONE
+pass

From 4e7ba824bc20c886b752142b6d2ae0982c1ac672 Mon Sep 17 00:00:00 2001
From: Joshua Fogg <joshua.h.fogg@gmail.com>
Date: Tue, 30 Apr 2024 16:50:10 +0100
Subject: [PATCH 4/4] Implemented module structure Had to rename from
 `robust-genetics` because of the `-` which Python doesn't support in module
 names. Chose `alphargs` b/c we're using `alphasim` output data

---
 .vscode/settings.json |   1 +
 Example/50/example.py |  32 +++++
 Example/README.md     |   2 +-
 README.md             |   8 +-
 alphargs/__init__.py  |  10 ++
 alphargs/loaders.py   | 101 +++++++++++++
 alphargs/solvers.py   | 142 ++++++++++++++++++
 alphargs/utils.py     |  55 +++++++
 example.py            |  31 ++++
 robust-genetics.py    | 327 ------------------------------------------
 10 files changed, 379 insertions(+), 330 deletions(-)
 create mode 100644 Example/50/example.py
 create mode 100644 alphargs/__init__.py
 create mode 100644 alphargs/loaders.py
 create mode 100644 alphargs/solvers.py
 create mode 100644 alphargs/utils.py
 create mode 100644 example.py
 delete mode 100644 robust-genetics.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 856835b..d3f60f8 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,6 @@
 {
     "cSpell.words": [
+        "alphargs",
         "bilevel",
         "bmatrix",
         "diagflat",
diff --git a/Example/50/example.py b/Example/50/example.py
new file mode 100644
index 0000000..6d649b3
--- /dev/null
+++ b/Example/50/example.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+import sys; sys.path.insert(1, '../../')  # HACK pre-module workaround
+import alphargs
+
+# key problem variables loaded from standard format txt files
+sigma, mubar, omega, n = alphargs.load_problem(
+    "A50.txt",
+    "EBV50.txt",
+    "S50.txt"
+)
+
+# NOTE this trick of handling sex data is specific to the initial simulation
+# data which is structured so that candidates alternate between sires (which
+# have even indices) and dams (which have odd indices).
+sires = range(0, n, 2)
+dams = range(1, n, 2)
+
+lam = 0.5
+kap = 1
+
+# computes the standard and robust genetic selection solutions
+w_std, obj_std = alphargs.gurobi_standard_genetics(
+    sigma, mubar, sires, dams, lam, n)
+w_rbs, z_rbs, obj_rbs = alphargs.gurobi_robust_genetics(
+    sigma, mubar, omega, sires, dams, lam, kap, n)
+
+alphargs.print_compare_solutions(
+    w_std, w_rbs, obj_std, obj_rbs, z2=z_rbs, name1="w_std", name2="w_rbs")
+
+
+print("\nDone!")
diff --git a/Example/README.md b/Example/README.md
index 2c3d0af..8e04305 100644
--- a/Example/README.md
+++ b/Example/README.md
@@ -1,6 +1,6 @@
 # Examples
 
-- [04/](04/) A scaled up version of the small $n = 2$ example created by Julian, replicating that problem across sires and dams.
+- [04/](04/) A scaled up version of the small $n = 2$ example created by Julian, replicating the same problem across sires and dams.
 - [50/](50/) The 50 youngest individuals from a generated cohort of 12,000. Implications of this non-random example are unknown.
 
 ## File Description
diff --git a/README.md b/README.md
index 2b65d39..a5b3e0a 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,7 @@
-# Robust Genetics Optimization
+# AlphaRGS
 
-This repository is split four parts. The main Jupyter notebook which walks through how standard and robust optimization work for genetic selection from start to finish, part of which is [Example/](Example/), a realistic simulated example from Gregor Gorjanc and Ivan Pocrnić. We also have [Scratch/](Scratch/), a mix of Jupyter notebooks used as part of working out the problem with some additional data files and example scripts. Finally we have [Writing/](Writing/), a more formal writeup of the notebook and its findings in LaTeX (written by Julian, currently housed in Overleaf).
+Tools for solving robust genetics selection problems in Python. It depends on Python 3.10+, [NumPy](https://pypi.org/project/numpy/), and [Gurobi](https://www.gurobi.com/) via [gurobipy](https://pypi.org/project/gurobipy/).  
+
+## Structure
+
+This repository is split four parts. As well as the module in [`alphargs/`](alphargs/), there's [`Example/`](Example/) which includes a realistic simulated example from Gregor Gorjanc and Ivan Pocrnić. We also have [Scratch/](Scratch/) which includes a notebook which walks through how standard and robust optimization work for genetic selection from start to finish. Finally we have [Writing/](Writing/), a more formal writeup of the notebook and its findings in LaTeX (written by Julian, currently housed in Overleaf).
diff --git a/alphargs/__init__.py b/alphargs/__init__.py
new file mode 100644
index 0000000..496422d
--- /dev/null
+++ b/alphargs/__init__.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+"""
+AlphaRGS provides functions for loading and solving robust genetic selection
+problems. It uses file structures from the AlphaGenes family, Numpy for its
+internal data structures, and depends on Gurobi for its solvers.
+"""
+
+from .loaders import *
+from .solvers import *
+from .utils import *
diff --git a/alphargs/loaders.py b/alphargs/loaders.py
new file mode 100644
index 0000000..b0db25f
--- /dev/null
+++ b/alphargs/loaders.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""Loading in Genetics Data
+
+Genetics data could be presented to our solvers in multiple formats, these
+functions define the appropriate methods for loading those in correctly.
+"""
+
+import numpy as np          # defines matrix structures
+import numpy.typing as npt  # variable typing definitions for NumPy
+
+
+def load_ped(filename: str) -> dict:  # TODO add dict typing
+    """
+    Function for reading *.ped files to a dictionary. Takes the file name
+    as a string input and returns the pedigree structure as a dictionary.
+    """
+    with open(filename, "r") as file:
+        # first line of *.ped lists the headers; skip
+        file.readline()
+        # create a list of int lists from each line (dropping optional labels)
+        data = [[int(x) for x in line.split(",")[0:3]] for line in file]
+    # convert this list of lists into a dictionary
+    ped = {entry[0]: entry[1:3] for entry in data}
+
+    return ped
+
+
+def makeA(pedigree: dict) -> npt.NDArray[np.float64]:  # TODO add dict typing
+    """
+    Construct Wright's Numerator Relationship Matrix from a given pedigree
+    structure. Takes the pedigree as a dictionary input and returns the
+    matrix as output.
+    """
+    m = len(pedigree)
+    # preallocate memory for A
+    A = np.zeros((m, m), dtype=float)
+
+    # iterate over rows
+    for i in range(0, m):
+        # save parent indexes: pedigrees indexed from 1, Python from 0
+        p = pedigree[i+1][0]-1
+        q = pedigree[i+1][1]-1
+        # iterate over columns sub-diagonal
+        for j in range(0, i):
+            # calculate sub-diagonal entries
+            A[i, j] = 0.5*(A[j, p] + A[j, q])
+            # populate sup-diagonal (symmetric)
+            A[j, i] = A[i, j]
+        # calculate diagonal entries
+        A[i, i] = 1 + 0.5*A[p, q]
+
+    return A
+
+
+def load_problem(A_filename: str, E_filename: str, S_filename: str,
+                 dimension: int | None = None, pedigree: bool = False
+                 ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64],
+                            npt.NDArray[np.float64], int]:
+    """
+    Used to load genetic selection problems into NumPy. It takes three
+    string inputs for filenames where Sigma, Mu, and Omega are stored,
+    as well as an optional integer input for problem dimension if this
+    is known. If it's know know, it's worked out based on E_filename.
+
+    As output, it returns (A, E, S, n), where A and S are n-by-n NumPy
+    arrays, E is a length n NumPy array, and n is an integer.
+    """
+
+    def load_symmetric_matrix(filename: str, dimension: int
+                              ) -> npt.NDArray[np.float64]:
+        """
+        Since NumPy doesn't have a stock way to load matrices
+        stored in coordinate format format, this adds one.
+        """
+
+        matrix = np.zeros([dimension, dimension], dtype=float)
+
+        with open(filename, 'r') as file:
+            for line in file:
+                i, j, entry = line.split(" ")
+                # data files indexed from 1, not 0
+                matrix[int(i)-1, int(j)-1] = entry
+                matrix[int(j)-1, int(i)-1] = entry
+
+        return matrix
+
+    E = np.loadtxt(E_filename, dtype=float)
+    # if dimension not specified, use `E` which doesn't need preallocation
+    if not dimension:
+        assert isinstance(E.size, int)  # catches E being empty
+        dimension = E.size
+
+    # S is stored by coordinates so need special loader
+    S = load_symmetric_matrix(S_filename, dimension)
+    # A can be stored as a pedigree or by coordinates
+    if pedigree:
+        A = makeA(load_ped(A_filename))
+    else:
+        A = load_symmetric_matrix(A_filename, dimension)
+
+    return A, E, S, dimension
diff --git a/alphargs/solvers.py b/alphargs/solvers.py
new file mode 100644
index 0000000..f0dacc8
--- /dev/null
+++ b/alphargs/solvers.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""Defining Solvers
+
+With the problems properly loaded into Numpy, this section contains functions
+for solving those under various formulations and methods.
+"""
+
+import numpy as np          # defines matrix structures
+import numpy.typing as npt  # variable typing definitions for NumPy
+import gurobipy as gp       # Gurobi optimization interface (1)
+from gurobipy import GRB    # Gurobi optimization interface (2)
+
+
+def gurobi_standard_genetics(
+    sigma: np.ndarray,
+    mu: np.ndarray,
+    sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
+    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc
+    lam: float,
+    dimension: int,
+    upper_bound: npt.NDArray[np.float64] | float = 1.0,
+    lower_bound: npt.NDArray[np.float64] | float = 0.0,
+    time_limit: float | None = None,
+    max_duality_gap: float | None = None,
+    debug: bool = False
+):  # -> tuple[npt.NDArray[np.float64], float]:  # BUG Gurobi typing broken
+    """
+    Takes a sigma, mu, sire list, dam list, and dimension as inputs and solves
+    the standard genetic selection problem with Gurobi for a given value of
+    lambda (lam), returning the portfolio and objective value as outputs. The
+    default lower and upper bounds of 0 and 1 can be changed also.
+
+    Optional arguments time_limit and max_duality_gap respectively control how
+    long Gurobi will spend on the problem and the maximum allowable duality
+    gap. Optional argument debug sets whether Gurobi prints output to terminal.
+    """
+
+    # create models for standard and robust genetic selection
+    model = gp.Model("standard-genetics")
+
+    # Gurobi spews all its output into the terminal by default, this restricts
+    # that behaviour to only happen when the `debug` flag is used.
+    if not debug: model.setParam('OutputFlag', 0)
+
+    # integrating bounds within variable definitions is more efficient than
+    # as a separate constraint, which Gurobi would convert to bounds anyway
+    w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound,
+                      vtype=GRB.CONTINUOUS, name="w")
+
+    model.setObjective(
+        # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
+        w.transpose()@mu - (lam/2)*w.transpose()@(sigma@w),
+        GRB.MAXIMIZE
+    )
+
+    # set up the two sum-to-half constraints
+    M = np.zeros((2, dimension), dtype=int)
+    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise
+    M[0, sires] = 1
+    M[1, dams] = 1
+    # define the right hand side of the constraint Mx = m
+    m = np.full(2, 0.5)
+    model.addConstr(M@w == m, name="sum-to-half")
+
+    # optional controls to stop Gurobi taking too long
+    if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
+    if max_duality_gap: model.setParam('MIPGap', max_duality_gap)
+
+    # model file can be used externally for verification
+    if debug: model.write("standard-opt.mps")
+
+    model.optimize()
+    return w.X, model.ObjVal
+
+
+def gurobi_robust_genetics(
+    sigma: npt.NDArray[np.float64],
+    mubar: npt.NDArray[np.float64],
+    omega: npt.NDArray[np.float64],
+    sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
+    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc
+    lam: float,
+    kappa: float,
+    dimension: int,
+    upper_bound: npt.NDArray[np.float64] | float = 1.0,
+    lower_bound: npt.NDArray[np.float64] | float = 0.0,
+    time_limit: float | None = None,
+    max_duality_gap: float | None = None,
+    debug: bool = False
+):  # -> tuple[npt.NDArray, float, float]:  # BUG Gurobi typing broken
+    """
+    Takes a sigma, mu-bar, omega, sire list, dam list, and dimension as inputs
+    and solves the robust genetic selection problem using Gurobi for given
+    values of lambda (lam) and kappa. It returns the portfolio and objective
+    value as outputs. The default lower and upper bounds of 0 and 1 can be
+    changed also.
+
+    Optional arguments time_limit and max_duality_gap respectively control how
+    long Gurobi will spend on the problem and the maximum allowable duality
+    gap. Optional argument debug sets whether Gurobi prints output to terminal.
+    """
+
+    # create models for standard and robust genetic selection
+    model = gp.Model("robust-genetics")
+
+    # Gurobi spews all its output into the terminal by default, this restricts
+    # that behaviour to only happen when the `debug` flag is used.
+    if not debug: model.setParam('OutputFlag', 0)
+
+    # integrating bounds within variable definitions is more efficient than
+    # as a separate constraint, which Gurobi would convert to bounds anyway
+    w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound,
+                      vtype=GRB.CONTINUOUS, name="w")
+    z = model.addVar(lb=0.0, name="z")
+
+    model.setObjective(
+        # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
+        w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z,
+        GRB.MAXIMIZE
+    )
+
+    # set up the two sum-to-half constraints
+    M = np.zeros((2, dimension), dtype=int)
+    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise
+    M[0, sires] = 1
+    M[1, dams] = 1
+    # define the right hand side of the constraint Mx = m
+    m = np.full(2, 0.5, dtype=float)
+    model.addConstr(M@w == m, name="sum-to-half")
+
+    # conic constraint which comes from robust optimization
+    model.addConstr(z**2 >= w.transpose()@omega@w, name="uncertainty")
+
+    # optional controls to stop Gurobi taking too long
+    if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
+    if max_duality_gap: model.setParam('MIPGap', max_duality_gap)
+
+    # model file can be used externally for verification
+    if debug: model.write("robust-opt.mps")
+
+    model.optimize()
+    return w.X, z.X, model.ObjVal
diff --git a/alphargs/utils.py b/alphargs/utils.py
new file mode 100644
index 0000000..d0a6cfe
--- /dev/null
+++ b/alphargs/utils.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+"""Utilities
+
+This file contains additional utilities, such as for printing and comparing
+portfolios produced by the solvers.
+"""
+
+import numpy as np
+
+
+def print_compare_solutions(
+    portfolio1,  # : npt.NDArray[np.float64],  # BUG Gurobi typing broken
+    portfolio2,  # : npt.NDArray[np.float64],  # BUG Gurobi typing broken
+    objective1: float,
+    objective2: float,
+    precision: int = 5,
+    z1: float | None = None,
+    z2: float | None = None,
+    name1: str = "First",
+    name2: str = "Second"
+) -> None:
+    """
+    Takes two solutions (comprised of at least a portfolio and objective value,
+    plus an optional z-value and/or solution name) as inputs, and prints a
+    comparison of the two solutions to the terminal. The number of decimals
+    values are displayed to defaults to 5, but can be changed through the
+    precision argument.
+    """
+
+    dimension = portfolio1.size
+    order = len(str(dimension))
+
+    # HACK header breaks if precision < 3 or len(problem1) != 5
+    print(f"i{' '*(order-1)}  {name1}  {' '*(precision-3)}{name2}")
+    for candidate in range(dimension):
+        print(
+            f"{candidate+1:0{order}d}  "
+            f"{portfolio1[candidate]:.{precision}f}  "
+            f"{portfolio2[candidate]:.{precision}f}"
+        )
+
+    def obj_string(name: str, value: float, precision: int,
+                   z: float | None = None) -> str:
+        """Helper function which handles the optional z1 and z2 values"""
+        obj_str = f"{name}: {value:.{precision}f}"
+        return f"{obj_str} (z = {z:.{precision}f})" if z else f"{obj_str}"
+
+    portfolio_abs_diff = np.abs(portfolio1-portfolio2)
+    print(
+        f"\n{obj_string(f'{name1} objective', objective1, precision, z1)}"
+        f"\n{obj_string(f'{name2} objective', objective2, precision, z2)}"
+        f"\nMaximum change: {max(portfolio_abs_diff):.{precision}f}"
+        f"\nAverage change: {np.mean(portfolio_abs_diff):.{precision}f}"
+        f"\nMinimum change: {min(portfolio_abs_diff):.{precision}f}"
+    )
diff --git a/example.py b/example.py
new file mode 100644
index 0000000..80c7f49
--- /dev/null
+++ b/example.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import alphargs
+
+# key problem variables loaded from standard format txt files
+sigma, mubar, omega, n = alphargs.load_problem(
+    "Example/04/A04.txt",
+    "Example/04/EBV04.txt",
+    "Example/04/S04.txt"
+)
+
+# NOTE this trick of handling sex data is specific to the initial simulation
+# data which is structured so that candidates alternate between sires (which
+# have even indices) and dams (which have odd indices).
+sires = range(0, n, 2)
+dams = range(1, n, 2)
+
+lam = 0.5
+kap = 1
+
+# computes the standard and robust genetic selection solutions
+w_std, obj_std = alphargs.gurobi_standard_genetics(
+    sigma, mubar, sires, dams, lam, n)
+w_rbs, z_rbs, obj_rbs = alphargs.gurobi_robust_genetics(
+    sigma, mubar, omega, sires, dams, lam, kap, n)
+
+alphargs.print_compare_solutions(
+    w_std, w_rbs, obj_std, obj_rbs, z2=z_rbs, name1="w_std", name2="w_rbs")
+
+
+print("\nDone!")
diff --git a/robust-genetics.py b/robust-genetics.py
deleted file mode 100644
index dff1abf..0000000
--- a/robust-genetics.py
+++ /dev/null
@@ -1,327 +0,0 @@
-#!/usr/bin/env python3
-
-import numpy as np          # defines matrix structures
-import numpy.typing as npt  # variable typing definitions for NumPy
-import gurobipy as gp       # Gurobi optimization interface (1)
-from gurobipy import GRB    # Gurobi optimization interface (2)
-from sys import maxsize     # maximum precision for specific system
-
-
-# OUTPUT SETTINGS
-# Numpy makes some odd choices with its default output formats, this adjusts
-# the most intrusive of those for anything which uses this utility.
-
-np.set_printoptions(
-    formatter={'float_kind': "{:.8f}".format},  # only show to 8 d.p.
-    threshold=maxsize  # want to round rather than truncate when printing
-)
-
-
-# READING GENETICS DATA
-# Genetics data could be presented to our solvers in multiple formats, these
-# functions define the appropriate methods for loading those in correctly.
-
-def load_ped(filename: str) -> dict:
-    """
-    Function for reading *.ped files to a dictionary. Takes the file name
-    as a string input and returns the pedigree structure as a dictionary.
-    """
-    with open(filename, "r") as file:
-        # first line of *.ped lists the headers; skip
-        file.readline()
-        # create a list of int lists from each line (dropping optional labels)
-        data = [[int(x) for x in line.split(",")[0:3]] for line in file]
-    # convert this list of lists into a dictionary
-    ped = {entry[0]: entry[1:3] for entry in data}
-    return ped
-
-
-def makeA(pedigree: dict) -> npt.NDArray[np.float64]:
-    """
-    Construct Wright's Numerator Relationship Matrix from a given pedigree
-    structure. Takes the pedigree as a dictionary input and returns the
-    matrix as output.
-    """
-    m = len(pedigree)
-    # preallocate memory for A
-    A = np.zeros((m, m), dtype=float)
-
-    # iterate over rows
-    for i in range(0, m):
-        # save parent indexes: pedigrees indexed from 1, Python from 0
-        p = pedigree[i+1][0]-1
-        q = pedigree[i+1][1]-1
-        # iterate over columns sub-diagonal
-        for j in range(0, i):
-            # calculate sub-diagonal entries
-            A[i, j] = 0.5*(A[j, p] + A[j, q])
-            # populate sup-diagonal (symmetric)
-            A[j, i] = A[i, j]
-        # calculate diagonal entries
-        A[i, i] = 1 + 0.5*A[p, q]
-
-    return A
-
-
-def load_problem(A_filename: str, E_filename: str, S_filename: str,
-                 dimension: int | None = None, pedigree: bool = False
-                 ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64],
-                            npt.NDArray[np.float64], int]:
-    """
-    Used to load genetic selection problems into NumPy. It takes three
-    string inputs for filenames where Sigma, Mu, and Omega are stored,
-    as well as an optional integer input for problem dimension if this
-    is known. If it's know know, it's worked out based on E_filename.
-
-    As output, it returns (A, E, S, n), where A and S are n-by-n NumPy
-    arrays, E is a length n NumPy array, and n is an integer.
-    """
-
-    def load_symmetric_matrix(filename: str, dimension: int
-                              ) -> npt.NDArray[np.float64]:
-        """
-        Since NumPy doesn't have a stock way to load matrices
-        stored in coordinate format format, this adds one.
-        """
-
-        matrix = np.zeros([dimension, dimension], dtype=float)
-
-        with open(filename, 'r') as file:
-            for line in file:
-                i, j, entry = line.split(" ")
-                # data files indexed from 1, not 0
-                matrix[int(i)-1, int(j)-1] = entry
-                matrix[int(j)-1, int(i)-1] = entry
-
-        return matrix
-
-    E = np.loadtxt(E_filename, dtype=float)
-    # if dimension not specified, use `E` which doesn't need preallocation
-    if not dimension:
-        assert isinstance(E.size, int)  # catches E being empty
-        dimension = E.size
-
-    # S is stored by coordinates so need special loader
-    S = load_symmetric_matrix(S_filename, dimension)
-    # A can be stored as a pedigree or by coordinates
-    if pedigree:
-        A = makeA(load_ped(A_filename))
-    else:
-        A = load_symmetric_matrix(A_filename, dimension)
-
-    return A, E, S, dimension
-
-
-# DEFINING SOLVER
-# With the problems properly loaded into Numpy, this section contains functions
-# for solving those under various formulations and methods, as well as printing
-# and comparing outputted solutions.
-
-def gurobi_standard_genetics(
-    sigma: np.ndarray,
-    mu: np.ndarray,
-    sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
-    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc
-    lam: float,
-    dimension: int,
-    upper_bound: npt.NDArray[np.float64] | float = 1.0,
-    lower_bound: npt.NDArray[np.float64] | float = 0.0,
-    time_limit: float | None = None,
-    max_duality_gap: float | None = None,
-    debug: bool = False
-):  # -> tuple[npt.NDArray[np.float64], float]:  # BUG Gurobi typing broken
-    """
-    Takes a sigma, mu, sire list, dam list, and dimension as inputs and solves
-    the standard genetic selection problem with Gurobi for a given value of
-    lambda (lam), returning the portfolio and objective value as outputs. The
-    default lower and upper bounds of 0 and 1 can be changed also.
-
-    Optional arguments time_limit and max_duality_gap respectively control how
-    long Gurobi will spend on the problem and the maximum allowable duality
-    gap. Optional argument debug sets whether Gurobi prints output to terminal.
-    """
-
-    # create models for standard and robust genetic selection
-    model = gp.Model("standard-genetics")
-
-    # Gurobi spews all its output into the terminal by default, this restricts
-    # that behaviour to only happen when the `debug` flag is used.
-    if not debug: model.setParam('OutputFlag', 0)
-
-    # integrating bounds within variable definitions is more efficient than
-    # as a separate constraint, which Gurobi would convert to bounds anyway
-    w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound,
-                      vtype=GRB.CONTINUOUS, name="w")
-
-    model.setObjective(
-        # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
-        w.transpose()@mu - (lam/2)*w.transpose()@(sigma@w),
-        GRB.MAXIMIZE
-    )
-
-    # set up the two sum-to-half constraints
-    M = np.zeros((2, dimension), dtype=int)
-    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise
-    M[0, sires] = 1
-    M[1, dams] = 1
-    # define the right hand side of the constraint Mx = m
-    m = np.full(2, 0.5)
-    model.addConstr(M@w == m, name="sum-to-half")
-
-    # optional controls to stop Gurobi taking too long
-    if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
-    if max_duality_gap: model.setParam('MIPGap', max_duality_gap)
-
-    # model file can be used externally for verification
-    if debug: model.write("standard-opt.mps")
-
-    model.optimize()
-    return w.X, model.ObjVal
-
-
-def gurobi_robust_genetics(
-    sigma: npt.NDArray[np.float64],
-    mubar: npt.NDArray[np.float64],
-    omega: npt.NDArray[np.float64],
-    sires,  # type could be np.ndarray, sets[ints], lists[int], range, etc
-    dams,   # type could be np.ndarray, sets[ints], lists[int], range, etc
-    lam: float,
-    kappa: float,
-    dimension: int,
-    upper_bound: npt.NDArray[np.float64] | float = 1.0,
-    lower_bound: npt.NDArray[np.float64] | float = 0.0,
-    time_limit: float | None = None,
-    max_duality_gap: float | None = None,
-    debug: bool = False
-):  # -> tuple[npt.NDArray, float, float]:  # BUG Gurobi typing broken
-    """
-    Takes a sigma, mu-bar, omega, sire list, dam list, and dimension as inputs
-    and solves the robust genetic selection problem using Gurobi for given
-    values of lambda (lam) and kappa. It returns the portfolio and objective
-    value as outputs. The default lower and upper bounds of 0 and 1 can be
-    changed also.
-
-    Optional arguments time_limit and max_duality_gap respectively control how
-    long Gurobi will spend on the problem and the maximum allowable duality
-    gap. Optional argument debug sets whether Gurobi prints output to terminal.
-    """
-
-    # create models for standard and robust genetic selection
-    model = gp.Model("robust-genetics")
-
-    # Gurobi spews all its output into the terminal by default, this restricts
-    # that behaviour to only happen when the `debug` flag is used.
-    if not debug: model.setParam('OutputFlag', 0)
-
-    # integrating bounds within variable definitions is more efficient than
-    # as a separate constraint, which Gurobi would convert to bounds anyway
-    w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound,
-                      vtype=GRB.CONTINUOUS, name="w")
-    z = model.addVar(lb=0.0, name="z")
-
-    model.setObjective(
-        # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here
-        w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z,
-        GRB.MAXIMIZE
-    )
-
-    # set up the two sum-to-half constraints
-    M = np.zeros((2, dimension), dtype=int)
-    # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise
-    M[0, sires] = 1
-    M[1, dams] = 1
-    # define the right hand side of the constraint Mx = m
-    m = np.full(2, 0.5, dtype=float)
-    model.addConstr(M@w == m, name="sum-to-half")
-
-    # conic constraint which comes from robust optimization
-    model.addConstr(z**2 >= w.transpose()@omega@w, name="uncertainty")
-
-    # optional controls to stop Gurobi taking too long
-    if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit)
-    if max_duality_gap: model.setParam('MIPGap', max_duality_gap)
-
-    # model file can be used externally for verification
-    if debug: model.write("robust-opt.mps")
-
-    model.optimize()
-    return w.X, z.X, model.ObjVal
-
-
-def print_compare_solutions(
-    portfolio1,  # : npt.NDArray[np.float64],  # BUG Gurobi typing broken
-    portfolio2,  # : npt.NDArray[np.float64],  # BUG Gurobi typing broken
-    objective1: float,
-    objective2: float,
-    precision: int = 5,
-    z1: float | None = None,
-    z2: float | None = None,
-    name1: str = "First",
-    name2: str = "Second"
-) -> None:
-    """
-    Takes two solutions (comprised of at least a portfolio and objective value,
-    plus an optional z-value and/or solution name) as inputs, and prints a
-    comparison of the two solutions to the terminal. The number of decimals
-    values are displayed to defaults to 5, but can be changed through the
-    precision argument.
-    """
-
-    dimension = portfolio1.size
-    order = len(str(dimension))
-
-    # HACK header breaks if precision < 3 or len(problem1) != 5
-    print(f"i{' '*(order-1)}  {name1}  {' '*(precision-3)}{name2}")
-    for candidate in range(dimension):
-        print(
-            f"{candidate+1:0{order}d}  "
-            f"{portfolio1[candidate]:.{precision}f}  "
-            f"{portfolio2[candidate]:.{precision}f}"
-        )
-
-    def obj_string(name: str, value: float, precision: int,
-                   z: float | None = None) -> str:
-        """Helper function which handles the optional z1 and z2 values"""
-        obj_str = f"{name}: {value:.{precision}f}"
-        return f"{obj_str} (z = {z:.{precision}f})" if z else f"{obj_str}"
-
-    portfolio_abs_diff = np.abs(portfolio1-portfolio2)
-    print(
-        f"\n{obj_string(f'{name1} objective', objective1, precision, z1)}"
-        f"\n{obj_string(f'{name2} objective', objective2, precision, z2)}"
-        f"\nMaximum change: {max(portfolio_abs_diff):.{precision}f}"
-        f"\nAverage change: {np.mean(portfolio_abs_diff):.{precision}f}"
-        f"\nMinimum change: {min(portfolio_abs_diff):.{precision}f}"
-    )
-
-
-# MAIN
-# A temporary section which includes an example problem. This will live here
-# while porting to module, but will be removed once done.
-
-# key problem variables
-sigma, mubar, omega, n = load_problem(
-    "Example/04/A04.txt",
-    "Example/04/EBV04.txt",
-    "Example/04/S04.txt"
-)
-
-# NOTE this trick of handling sex data is specific to the initial simulation
-# data which is structured so that candidates alternate between sires (which
-# have even indices) and dams (which have odd indices).
-sires = range(0, n, 2)
-dams = range(1, n, 2)
-
-lam = 0.5
-kap = 1
-
-# computes the standard and robust genetic selection solutions
-w_std, obj_std = gurobi_standard_genetics(sigma, mubar, sires, dams, lam, n)
-w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires,
-                                               dams, lam, kap, n)
-
-print_compare_solutions(w_std, w_rbs, obj_std, obj_rbs,
-                        z2=z_rbs, name1="w_std", name2="w_rbs")
-
-# DONE
-pass