From bbe6369f4f4362022da466cc8c5663bb4f3284df Mon Sep 17 00:00:00 2001 From: Joshua Fogg Date: Fri, 26 Apr 2024 14:13:36 +0100 Subject: [PATCH 1/4] More sensible directory structure for examples As we add more, working with larger and larger examples, it doesn't make sense to have a directory for each --- Example/04/A04.txt | 4 ++++ Example/04/EBV04.txt | 4 ++++ Example/04/S04.txt | 4 ++++ Example/{ => 50}/A50.txt | 0 Example/{ => 50}/EBV50.txt | 0 Example/{ => 50}/S50.txt | 0 Example/README.md | 9 ++++++--- {Example2 => Scratch/Example}/A02.txt | 0 {Example2 => Scratch/Example}/EBV02.txt | 0 {Example2 => Scratch/Example}/README.md | 2 +- {Example2 => Scratch/Example}/S02.txt | 0 {Example2 => Scratch/Example}/n2-test.ipynb | 0 12 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 Example/04/A04.txt create mode 100644 Example/04/EBV04.txt create mode 100644 Example/04/S04.txt rename Example/{ => 50}/A50.txt (100%) rename Example/{ => 50}/EBV50.txt (100%) rename Example/{ => 50}/S50.txt (100%) rename {Example2 => Scratch/Example}/A02.txt (100%) rename {Example2 => Scratch/Example}/EBV02.txt (100%) rename {Example2 => Scratch/Example}/README.md (59%) rename {Example2 => Scratch/Example}/S02.txt (100%) rename {Example2 => Scratch/Example}/n2-test.ipynb (100%) diff --git a/Example/04/A04.txt b/Example/04/A04.txt new file mode 100644 index 0000000..3012329 --- /dev/null +++ b/Example/04/A04.txt @@ -0,0 +1,4 @@ +1 1 1 +2 2 1 +3 3 1 +4 4 1 diff --git a/Example/04/EBV04.txt b/Example/04/EBV04.txt new file mode 100644 index 0000000..be589c9 --- /dev/null +++ b/Example/04/EBV04.txt @@ -0,0 +1,4 @@ +1 +1 +2 +2 diff --git a/Example/04/S04.txt b/Example/04/S04.txt new file mode 100644 index 0000000..ae2331b --- /dev/null +++ b/Example/04/S04.txt @@ -0,0 +1,4 @@ +1 1 0.11111111111111 +2 2 0.11111111111111 +3 3 4.0 +4 4 4.0 \ No newline at end of file diff --git a/Example/A50.txt b/Example/50/A50.txt similarity index 100% rename from Example/A50.txt rename to Example/50/A50.txt diff --git a/Example/EBV50.txt b/Example/50/EBV50.txt similarity index 100% rename from Example/EBV50.txt rename to Example/50/EBV50.txt diff --git a/Example/S50.txt b/Example/50/S50.txt similarity index 100% rename from Example/S50.txt rename to Example/50/S50.txt diff --git a/Example/README.md b/Example/README.md index b15b17e..2c3d0af 100644 --- a/Example/README.md +++ b/Example/README.md @@ -1,4 +1,9 @@ -# File Description +# Examples + +- [04/](04/) A scaled up version of the small $n = 2$ example created by Julian, replicating that problem across sires and dams. +- [50/](50/) The 50 youngest individuals from a generated cohort of 12,000. Implications of this non-random example are unknown. + +## File Description In the original simulation, we had 12k individuals and 1k samples of their estimated breeding values (EBV) – our selection criterion. @@ -8,8 +13,6 @@ Based on that simulated data: - I constructed the S-matrix (12k x 12k), which is the covariance matrix between 1k EBV samples for 12k individuals. [S50] - The EBV vector (12k), which is posterior mean over 1k samples of EBV. [EBV50] -Since 12k might be a lot, I have extracted last 50 rows/columns from above 12k set, for your test. This corresponds to the youngest 50 individuals (I don’t know the implications of choosing this non-random sample ). - The matrices are saved in row, column, value format like we talked about before. _Note: this is an unedited version of Gregor's description of the files._ diff --git a/Example2/A02.txt b/Scratch/Example/A02.txt similarity index 100% rename from Example2/A02.txt rename to Scratch/Example/A02.txt diff --git a/Example2/EBV02.txt b/Scratch/Example/EBV02.txt similarity index 100% rename from Example2/EBV02.txt rename to Scratch/Example/EBV02.txt diff --git a/Example2/README.md b/Scratch/Example/README.md similarity index 59% rename from Example2/README.md rename to Scratch/Example/README.md index 1a3bd3e..9324efb 100644 --- a/Example2/README.md +++ b/Scratch/Example/README.md @@ -1,3 +1,3 @@ # Example 2 -This is a small example for $n = 2$ created by Julian. It uses the same file formats as [Example 1](../Example/). +This is a small example for $n = 2$ created by Julian. It uses the same file formats as [Example 1](../../Example/). diff --git a/Example2/S02.txt b/Scratch/Example/S02.txt similarity index 100% rename from Example2/S02.txt rename to Scratch/Example/S02.txt diff --git a/Example2/n2-test.ipynb b/Scratch/Example/n2-test.ipynb similarity index 100% rename from Example2/n2-test.ipynb rename to Scratch/Example/n2-test.ipynb From 60b31ee994093c2d0ad9d93a19e171ecacd362e1 Mon Sep 17 00:00:00 2001 From: Joshua Fogg Date: Fri, 26 Apr 2024 14:15:43 +0100 Subject: [PATCH 2/4] Began rewrite in raw Python At the moment a test problem is in the same file as the worker functions, but I'll split those out once happy it's not caused any regressions. As part of this I've added typing for function input and outputs. --- .../robust-genetics.ipynb | 0 robust-genetics.py | 293 ++++++++++++++++++ 2 files changed, 293 insertions(+) rename robust-genetics.ipynb => Scratch/robust-genetics.ipynb (100%) create mode 100644 robust-genetics.py diff --git a/robust-genetics.ipynb b/Scratch/robust-genetics.ipynb similarity index 100% rename from robust-genetics.ipynb rename to Scratch/robust-genetics.ipynb diff --git a/robust-genetics.py b/robust-genetics.py new file mode 100644 index 0000000..7ec86f1 --- /dev/null +++ b/robust-genetics.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 + +import numpy as np # defines matrix structures +import gurobipy as gp # Gurobi optimization interface (1) +from gurobipy import GRB # Gurobi optimization interface (2) +from typing import Union # TODO replace with stock `|` from 3.10+ + + +# OUTPUT SETTINGS +# Numpy makes some odd choices with its default output formats, +# this adjusts the most intrusive of those for anything which +# uses this utility. + +# want to round rather than truncate when printing +np.set_printoptions(threshold=np.inf) + +# only show numpy output to eight decimal places +np.set_printoptions(formatter={'float_kind':"{:.8f}".format}) + + + +# READING GENETICS DATA +# TODO write a more descriptive section heading + +def load_ped(filename: str) -> dict: + """ + Function for reading *.ped files to a dictionary. Takes the file name + as a string input and returns the pedigree structure as a dictionary. + """ + with open(filename, "r") as file: + # first line of *.ped lists the headers; skip + file.readline() + # create a list of int lists from each line (dropping optional labels) + data = [[int(x) for x in line.split(",")[0:3]] for line in file] + # convert this list of lists into a dictionary + ped = {entry[0]: entry[1:3] for entry in data} + return ped + + +def makeA(pedigree: dict) -> np.ndarray: + """ + Construct Wright's Numerator Relationship Matrix from a given pedigree + structure. Takes the pedigree as a dictionary input and returns the + matrix as output. + """ + m = len(pedigree) + # preallocate memory for A + A = np.zeros((m, m)) + + # iterate over rows + for i in range(0, m): + # save parent indexes: pedigrees indexed from 1, Python from 0 + p = pedigree[i+1][0]-1 + q = pedigree[i+1][1]-1 + # iterate over columns sub-diagonal + for j in range(0, i): + # calculate sub-diagonal entries + A[i, j] = 0.5*(A[j, p] + A[j, q]) + # populate sup-diagonal (symmetric) + A[j, i] = A[i, j] + # calculate diagonal entries + A[i, i] = 1 + 0.5*A[p, q] + + return A + + +def load_problem(A_filename: str, E_filename: str, S_filename: str, + dimension: bool = False, pedigree: bool = False + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]: + """ + Used to load genetic selection problems into NumPy. It takes three + string inputs for filenames where Sigma, Mu, and Omega are stored, + as well as an optional integer input for problem dimension if this + is known. If it's know know, it's worked out based on E_filename. + + As output, it returns (A, E, S, n), where A and S are n-by-n NumPy + arrays, E is a length n NumPy array, and n is an integer. + """ + + def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray: + """ + Since NumPy doesn't have a stock way to load matrices + stored in coordinate format format, this adds one. + """ + + matrix = np.zeros([dimension, dimension]) + + with open(filename, 'r') as file: + for line in file: + i, j, entry = line.split(" ") + # data files indexed from 1, not 0 + matrix[int(i)-1, int(j)-1] = entry + matrix[int(j)-1, int(i)-1] = entry + + return matrix + + # if dimension wasn't supplied, need to find that + if not dimension: + # get dimension from EBV, since it's the smallest file + with open(E_filename, 'r') as file: + dimension = sum(1 for _ in file) + + # TODO test whether it's better to handle dimension separately as + # above or to take the hit of doing `np.loadtxt`s work explicitly + # but then getting the dimension for free. + # EBV isn't in coordinate format so can be loaded directly + E = np.loadtxt(E_filename) + # S is stored by coordinates so need special loader + S = load_symmetric_matrix(S_filename, dimension) + # A can be stored as a pedigree or by coordinates + if pedigree: + A = makeA(load_ped(A_filename)) + else: + A = load_symmetric_matrix(A_filename, dimension) + + return A, E, S, dimension + + + + +# DEFINING SOLVER + +def gurobi_standard_genetics( + sigma: np.ndarray, + mu: np.ndarray, + sires, # type could be np.ndarray, sets[ints], lists[int], range, etc + dams, # type could be np.ndarray, sets[ints], lists[int], range, etc + lam: float, + dimension: int, + upper_bound: Union[np.ndarray, float] = 1.0, + lower_bound: Union[np.ndarray, float] = 0.0, + time_limit: Union[float, None] = None, + min_duality_gap: Union[float, None] = None, + debug: bool = False +) -> tuple[float, float]: + """ + TODO write a docstring for solving a robust genetics problem + """ + + # create models for standard and robust genetic selection + model = gp.Model("standard-genetics") + + # Gurobi spews all its output into the terminal by default, this restricts + # that behaviour to only happen when the `debug` flag is used. + if not debug: model.setParam('OutputFlag', 0) + + # integrating bounds within variable definitions is more efficient than + # as a separate constraint, which Gurobi would convert to bounds anyway + w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound, + vtype=GRB.CONTINUOUS, name="w") + + model.setObjective( + # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here + w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w), + GRB.MAXIMIZE) + + # set up the two sum-to-half constraints + M = np.zeros((2, dimension)) + # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise + M[0, sires] = 1 + M[1, dams] = 1 + # define the right hand side of the constraint Mx = m + m = np.full(2, 0.5) + model.addConstr(M@w == m, name="sum-to-half") + + # optional controls to stop Gurobi taking too long + if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) + if min_duality_gap: model.setParam('MIPGap', min_duality_gap) + + # model file can be used externally for verification + if debug: model.write("standard-opt.mps") + + model.optimize() + return w.X, model.ObjVal + +def gurobi_robust_genetics( + sigma: np.ndarray, + mubar: np.ndarray, + omega: np.ndarray, + sires, # type could be np.ndarray, sets[ints], lists[int], range, etc + dams, # type could be np.ndarray, sets[ints], lists[int], range, etc + lam: float, + dimension: int, + upper_bound: Union[np.ndarray, float] = 1.0, + lower_bound: Union[np.ndarray, float] = 0.0, + kappa: float = 0.0, + time_limit: Union[float, None] = None, + min_duality_gap: Union[float, None] = None, + debug: bool = False +) -> tuple[float, float, float]: + """ + TODO write a docstring for solving a robust genetics problem + """ + + # create models for standard and robust genetic selection + model = gp.Model("robust-genetics") + + # Gurobi spews all its output into the terminal by default, this restricts + # that behaviour to only happen when the `debug` flag is used. + if not debug: model.setParam('OutputFlag', 0) + + # integrating bounds within variable definitions is more efficient than + # as a separate constraint, which Gurobi would convert to bounds anyway + w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound, + vtype=GRB.CONTINUOUS, name="w") + z = model.addVar(lb=0.0, name="z") + + model.setObjective( + # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here + w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z, + GRB.MAXIMIZE) + + # set up the two sum-to-half constraints + M = np.zeros((2, dimension)) + # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise + M[0, sires] = 1 + M[1, dams] = 1 + # define the right hand side of the constraint Mx = m + m = np.full(2, 0.5) + model.addConstr(M@w == m, name="sum-to-half") + + # conic constraint which comes from robust optimization + model.addConstr(z**2 >= w.transpose()@omega@w, name="uncertainty") + + # optional controls to stop Gurobi taking too long + if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) + if min_duality_gap: model.setParam('MIPGap', min_duality_gap) + + # model file can be used externally for verification + if debug: model.write("robust-opt.mps") + + model.optimize() + return w.X, z.X, model.ObjVal + + +def print_compare_solutions( + w_std: np.ndarray, + objective_std: float, + w_rbs: np.ndarray, + z_rbs: float, + objective_rbs: float, + dimension: int, + precision: int = 5 +) -> None: + """ + TODO write a docstring for comparing two robust genetics portfolios + """ + + order = len(str(dimension)) + print(" "*(order-1) + "i w_std w_rbs") + for candidate in range(dimension): + print( + f"{candidate+1:0{order}d} " \ + f"{w_std[candidate]:.{precision}f} " \ + f"{w_rbs[candidate]:.{precision}f}" + ) + + print( + f"\nStandard Obj.: {objective_std:.{precision}f}" \ + f"\nRobust Obj: {objective_rbs:.{precision}f} (z = {z_rbs:.{precision}f})" \ + f"\nMaximum change: {max(np.abs(w_std-w_rbs)):.{precision}f}" \ + f"\nAverage change: {np.mean(np.abs(w_std-w_rbs)):.{precision}f}" \ + f"\nMinimum change: {min(np.abs(w_std-w_rbs)):.{precision}f}" + ) + + +# MAIN + +# key problem variables +sigma, mubar, omega, n = load_problem( + "Example/04/A04.txt", + "Example/04/EBV04.txt", + "Example/04/S04.txt" +) + +# NOTE this trick of handling sex data is specific to the initial simulation data +# which is structured so that candidates alternate between sires (which have even +# indices) and dams (which have odd indices). +sires = range(0, n, 2) +dams = range(1, n, 2) + +lam = 0.5 +kap = 1 + +# actually finding the standard genetics solution +w_std, obj_std = gurobi_standard_genetics(sigma, mubar, sires, dams, lam, n) +z_std = -1 # HACK placeholder for missing value + +# not specifying the kappa argument, so will solve the non-robust problem +# w_std, z_std, obj_std = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n) +w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n, kappa=kap) + +print_compare_solutions(w_std, obj_std, w_rbs, z_rbs, obj_rbs, n) From 6a47becc74ae5cd8c5565b26be92de99936cac81 Mon Sep 17 00:00:00 2001 From: Joshua Fogg Date: Mon, 29 Apr 2024 17:57:55 +0100 Subject: [PATCH 3/4] Fixes for docs, typing, and PEP8 --- .vscode/settings.json | 5 +- robust-genetics.py | 204 ++++++++++++++++++++++++------------------ 2 files changed, 123 insertions(+), 86 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 1549fc7..856835b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,7 @@ "bilevel", "bmatrix", "diagflat", + "dtype", "Gorjanc", "Gregor", "Gregor's", @@ -15,6 +16,7 @@ "mathbb", "mathcal", "mubar", + "operatorname", "Pocrnić", "printoptions", "proxqp", @@ -22,5 +24,6 @@ "sqrtm", "vtype", "WNRM" - ] + ], + "python.analysis.typeCheckingMode": "basic" } diff --git a/robust-genetics.py b/robust-genetics.py index 7ec86f1..dff1abf 100644 --- a/robust-genetics.py +++ b/robust-genetics.py @@ -1,26 +1,25 @@ #!/usr/bin/env python3 import numpy as np # defines matrix structures +import numpy.typing as npt # variable typing definitions for NumPy import gurobipy as gp # Gurobi optimization interface (1) from gurobipy import GRB # Gurobi optimization interface (2) -from typing import Union # TODO replace with stock `|` from 3.10+ +from sys import maxsize # maximum precision for specific system # OUTPUT SETTINGS -# Numpy makes some odd choices with its default output formats, -# this adjusts the most intrusive of those for anything which -# uses this utility. - -# want to round rather than truncate when printing -np.set_printoptions(threshold=np.inf) - -# only show numpy output to eight decimal places -np.set_printoptions(formatter={'float_kind':"{:.8f}".format}) +# Numpy makes some odd choices with its default output formats, this adjusts +# the most intrusive of those for anything which uses this utility. +np.set_printoptions( + formatter={'float_kind': "{:.8f}".format}, # only show to 8 d.p. + threshold=maxsize # want to round rather than truncate when printing +) # READING GENETICS DATA -# TODO write a more descriptive section heading +# Genetics data could be presented to our solvers in multiple formats, these +# functions define the appropriate methods for loading those in correctly. def load_ped(filename: str) -> dict: """ @@ -37,15 +36,15 @@ def load_ped(filename: str) -> dict: return ped -def makeA(pedigree: dict) -> np.ndarray: +def makeA(pedigree: dict) -> npt.NDArray[np.float64]: """ Construct Wright's Numerator Relationship Matrix from a given pedigree structure. Takes the pedigree as a dictionary input and returns the matrix as output. """ m = len(pedigree) - # preallocate memory for A - A = np.zeros((m, m)) + # preallocate memory for A + A = np.zeros((m, m), dtype=float) # iterate over rows for i in range(0, m): @@ -65,8 +64,9 @@ def makeA(pedigree: dict) -> np.ndarray: def load_problem(A_filename: str, E_filename: str, S_filename: str, - dimension: bool = False, pedigree: bool = False - ) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]: + dimension: int | None = None, pedigree: bool = False + ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], + npt.NDArray[np.float64], int]: """ Used to load genetic selection problems into NumPy. It takes three string inputs for filenames where Sigma, Mu, and Omega are stored, @@ -77,13 +77,14 @@ def load_problem(A_filename: str, E_filename: str, S_filename: str, arrays, E is a length n NumPy array, and n is an integer. """ - def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray: + def load_symmetric_matrix(filename: str, dimension: int + ) -> npt.NDArray[np.float64]: """ Since NumPy doesn't have a stock way to load matrices stored in coordinate format format, this adds one. """ - matrix = np.zeros([dimension, dimension]) + matrix = np.zeros([dimension, dimension], dtype=float) with open(filename, 'r') as file: for line in file: @@ -94,17 +95,12 @@ def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray: return matrix - # if dimension wasn't supplied, need to find that + E = np.loadtxt(E_filename, dtype=float) + # if dimension not specified, use `E` which doesn't need preallocation if not dimension: - # get dimension from EBV, since it's the smallest file - with open(E_filename, 'r') as file: - dimension = sum(1 for _ in file) - - # TODO test whether it's better to handle dimension separately as - # above or to take the hit of doing `np.loadtxt`s work explicitly - # but then getting the dimension for free. - # EBV isn't in coordinate format so can be loaded directly - E = np.loadtxt(E_filename) + assert isinstance(E.size, int) # catches E being empty + dimension = E.size + # S is stored by coordinates so need special loader S = load_symmetric_matrix(S_filename, dimension) # A can be stored as a pedigree or by coordinates @@ -116,25 +112,33 @@ def load_symmetric_matrix(filename: str, dimension: int) -> np.ndarray: return A, E, S, dimension - - # DEFINING SOLVER +# With the problems properly loaded into Numpy, this section contains functions +# for solving those under various formulations and methods, as well as printing +# and comparing outputted solutions. def gurobi_standard_genetics( sigma: np.ndarray, mu: np.ndarray, sires, # type could be np.ndarray, sets[ints], lists[int], range, etc - dams, # type could be np.ndarray, sets[ints], lists[int], range, etc + dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, dimension: int, - upper_bound: Union[np.ndarray, float] = 1.0, - lower_bound: Union[np.ndarray, float] = 0.0, - time_limit: Union[float, None] = None, - min_duality_gap: Union[float, None] = None, + upper_bound: npt.NDArray[np.float64] | float = 1.0, + lower_bound: npt.NDArray[np.float64] | float = 0.0, + time_limit: float | None = None, + max_duality_gap: float | None = None, debug: bool = False -) -> tuple[float, float]: +): # -> tuple[npt.NDArray[np.float64], float]: # BUG Gurobi typing broken """ - TODO write a docstring for solving a robust genetics problem + Takes a sigma, mu, sire list, dam list, and dimension as inputs and solves + the standard genetic selection problem with Gurobi for a given value of + lambda (lam), returning the portfolio and objective value as outputs. The + default lower and upper bounds of 0 and 1 can be changed also. + + Optional arguments time_limit and max_duality_gap respectively control how + long Gurobi will spend on the problem and the maximum allowable duality + gap. Optional argument debug sets whether Gurobi prints output to terminal. """ # create models for standard and robust genetic selection @@ -151,12 +155,13 @@ def gurobi_standard_genetics( model.setObjective( # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here - w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w), - GRB.MAXIMIZE) + w.transpose()@mu - (lam/2)*w.transpose()@(sigma@w), + GRB.MAXIMIZE + ) # set up the two sum-to-half constraints - M = np.zeros((2, dimension)) - # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise + M = np.zeros((2, dimension), dtype=int) + # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise M[0, sires] = 1 M[1, dams] = 1 # define the right hand side of the constraint Mx = m @@ -165,7 +170,7 @@ def gurobi_standard_genetics( # optional controls to stop Gurobi taking too long if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) - if min_duality_gap: model.setParam('MIPGap', min_duality_gap) + if max_duality_gap: model.setParam('MIPGap', max_duality_gap) # model file can be used externally for verification if debug: model.write("standard-opt.mps") @@ -173,23 +178,32 @@ def gurobi_standard_genetics( model.optimize() return w.X, model.ObjVal + def gurobi_robust_genetics( - sigma: np.ndarray, - mubar: np.ndarray, - omega: np.ndarray, + sigma: npt.NDArray[np.float64], + mubar: npt.NDArray[np.float64], + omega: npt.NDArray[np.float64], sires, # type could be np.ndarray, sets[ints], lists[int], range, etc - dams, # type could be np.ndarray, sets[ints], lists[int], range, etc + dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, + kappa: float, dimension: int, - upper_bound: Union[np.ndarray, float] = 1.0, - lower_bound: Union[np.ndarray, float] = 0.0, - kappa: float = 0.0, - time_limit: Union[float, None] = None, - min_duality_gap: Union[float, None] = None, + upper_bound: npt.NDArray[np.float64] | float = 1.0, + lower_bound: npt.NDArray[np.float64] | float = 0.0, + time_limit: float | None = None, + max_duality_gap: float | None = None, debug: bool = False -) -> tuple[float, float, float]: +): # -> tuple[npt.NDArray, float, float]: # BUG Gurobi typing broken """ - TODO write a docstring for solving a robust genetics problem + Takes a sigma, mu-bar, omega, sire list, dam list, and dimension as inputs + and solves the robust genetic selection problem using Gurobi for given + values of lambda (lam) and kappa. It returns the portfolio and objective + value as outputs. The default lower and upper bounds of 0 and 1 can be + changed also. + + Optional arguments time_limit and max_duality_gap respectively control how + long Gurobi will spend on the problem and the maximum allowable duality + gap. Optional argument debug sets whether Gurobi prints output to terminal. """ # create models for standard and robust genetic selection @@ -208,15 +222,16 @@ def gurobi_robust_genetics( model.setObjective( # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z, - GRB.MAXIMIZE) + GRB.MAXIMIZE + ) # set up the two sum-to-half constraints - M = np.zeros((2, dimension)) - # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise + M = np.zeros((2, dimension), dtype=int) + # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise M[0, sires] = 1 M[1, dams] = 1 # define the right hand side of the constraint Mx = m - m = np.full(2, 0.5) + m = np.full(2, 0.5, dtype=float) model.addConstr(M@w == m, name="sum-to-half") # conic constraint which comes from robust optimization @@ -224,7 +239,7 @@ def gurobi_robust_genetics( # optional controls to stop Gurobi taking too long if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) - if min_duality_gap: model.setParam('MIPGap', min_duality_gap) + if max_duality_gap: model.setParam('MIPGap', max_duality_gap) # model file can be used externally for verification if debug: model.write("robust-opt.mps") @@ -234,37 +249,55 @@ def gurobi_robust_genetics( def print_compare_solutions( - w_std: np.ndarray, - objective_std: float, - w_rbs: np.ndarray, - z_rbs: float, - objective_rbs: float, - dimension: int, - precision: int = 5 + portfolio1, # : npt.NDArray[np.float64], # BUG Gurobi typing broken + portfolio2, # : npt.NDArray[np.float64], # BUG Gurobi typing broken + objective1: float, + objective2: float, + precision: int = 5, + z1: float | None = None, + z2: float | None = None, + name1: str = "First", + name2: str = "Second" ) -> None: """ - TODO write a docstring for comparing two robust genetics portfolios + Takes two solutions (comprised of at least a portfolio and objective value, + plus an optional z-value and/or solution name) as inputs, and prints a + comparison of the two solutions to the terminal. The number of decimals + values are displayed to defaults to 5, but can be changed through the + precision argument. """ + dimension = portfolio1.size order = len(str(dimension)) - print(" "*(order-1) + "i w_std w_rbs") + + # HACK header breaks if precision < 3 or len(problem1) != 5 + print(f"i{' '*(order-1)} {name1} {' '*(precision-3)}{name2}") for candidate in range(dimension): print( - f"{candidate+1:0{order}d} " \ - f"{w_std[candidate]:.{precision}f} " \ - f"{w_rbs[candidate]:.{precision}f}" + f"{candidate+1:0{order}d} " + f"{portfolio1[candidate]:.{precision}f} " + f"{portfolio2[candidate]:.{precision}f}" ) + def obj_string(name: str, value: float, precision: int, + z: float | None = None) -> str: + """Helper function which handles the optional z1 and z2 values""" + obj_str = f"{name}: {value:.{precision}f}" + return f"{obj_str} (z = {z:.{precision}f})" if z else f"{obj_str}" + + portfolio_abs_diff = np.abs(portfolio1-portfolio2) print( - f"\nStandard Obj.: {objective_std:.{precision}f}" \ - f"\nRobust Obj: {objective_rbs:.{precision}f} (z = {z_rbs:.{precision}f})" \ - f"\nMaximum change: {max(np.abs(w_std-w_rbs)):.{precision}f}" \ - f"\nAverage change: {np.mean(np.abs(w_std-w_rbs)):.{precision}f}" \ - f"\nMinimum change: {min(np.abs(w_std-w_rbs)):.{precision}f}" + f"\n{obj_string(f'{name1} objective', objective1, precision, z1)}" + f"\n{obj_string(f'{name2} objective', objective2, precision, z2)}" + f"\nMaximum change: {max(portfolio_abs_diff):.{precision}f}" + f"\nAverage change: {np.mean(portfolio_abs_diff):.{precision}f}" + f"\nMinimum change: {min(portfolio_abs_diff):.{precision}f}" ) # MAIN +# A temporary section which includes an example problem. This will live here +# while porting to module, but will be removed once done. # key problem variables sigma, mubar, omega, n = load_problem( @@ -273,21 +306,22 @@ def print_compare_solutions( "Example/04/S04.txt" ) -# NOTE this trick of handling sex data is specific to the initial simulation data -# which is structured so that candidates alternate between sires (which have even -# indices) and dams (which have odd indices). +# NOTE this trick of handling sex data is specific to the initial simulation +# data which is structured so that candidates alternate between sires (which +# have even indices) and dams (which have odd indices). sires = range(0, n, 2) -dams = range(1, n, 2) +dams = range(1, n, 2) lam = 0.5 kap = 1 -# actually finding the standard genetics solution +# computes the standard and robust genetic selection solutions w_std, obj_std = gurobi_standard_genetics(sigma, mubar, sires, dams, lam, n) -z_std = -1 # HACK placeholder for missing value +w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires, + dams, lam, kap, n) -# not specifying the kappa argument, so will solve the non-robust problem -# w_std, z_std, obj_std = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n) -w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires, dams, lam, n, kappa=kap) +print_compare_solutions(w_std, w_rbs, obj_std, obj_rbs, + z2=z_rbs, name1="w_std", name2="w_rbs") -print_compare_solutions(w_std, obj_std, w_rbs, z_rbs, obj_rbs, n) +# DONE +pass From 4e7ba824bc20c886b752142b6d2ae0982c1ac672 Mon Sep 17 00:00:00 2001 From: Joshua Fogg Date: Tue, 30 Apr 2024 16:50:10 +0100 Subject: [PATCH 4/4] Implemented module structure Had to rename from `robust-genetics` because of the `-` which Python doesn't support in module names. Chose `alphargs` b/c we're using `alphasim` output data --- .vscode/settings.json | 1 + Example/50/example.py | 32 +++++ Example/README.md | 2 +- README.md | 8 +- alphargs/__init__.py | 10 ++ alphargs/loaders.py | 101 +++++++++++++ alphargs/solvers.py | 142 ++++++++++++++++++ alphargs/utils.py | 55 +++++++ example.py | 31 ++++ robust-genetics.py | 327 ------------------------------------------ 10 files changed, 379 insertions(+), 330 deletions(-) create mode 100644 Example/50/example.py create mode 100644 alphargs/__init__.py create mode 100644 alphargs/loaders.py create mode 100644 alphargs/solvers.py create mode 100644 alphargs/utils.py create mode 100644 example.py delete mode 100644 robust-genetics.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 856835b..d3f60f8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "cSpell.words": [ + "alphargs", "bilevel", "bmatrix", "diagflat", diff --git a/Example/50/example.py b/Example/50/example.py new file mode 100644 index 0000000..6d649b3 --- /dev/null +++ b/Example/50/example.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import sys; sys.path.insert(1, '../../') # HACK pre-module workaround +import alphargs + +# key problem variables loaded from standard format txt files +sigma, mubar, omega, n = alphargs.load_problem( + "A50.txt", + "EBV50.txt", + "S50.txt" +) + +# NOTE this trick of handling sex data is specific to the initial simulation +# data which is structured so that candidates alternate between sires (which +# have even indices) and dams (which have odd indices). +sires = range(0, n, 2) +dams = range(1, n, 2) + +lam = 0.5 +kap = 1 + +# computes the standard and robust genetic selection solutions +w_std, obj_std = alphargs.gurobi_standard_genetics( + sigma, mubar, sires, dams, lam, n) +w_rbs, z_rbs, obj_rbs = alphargs.gurobi_robust_genetics( + sigma, mubar, omega, sires, dams, lam, kap, n) + +alphargs.print_compare_solutions( + w_std, w_rbs, obj_std, obj_rbs, z2=z_rbs, name1="w_std", name2="w_rbs") + + +print("\nDone!") diff --git a/Example/README.md b/Example/README.md index 2c3d0af..8e04305 100644 --- a/Example/README.md +++ b/Example/README.md @@ -1,6 +1,6 @@ # Examples -- [04/](04/) A scaled up version of the small $n = 2$ example created by Julian, replicating that problem across sires and dams. +- [04/](04/) A scaled up version of the small $n = 2$ example created by Julian, replicating the same problem across sires and dams. - [50/](50/) The 50 youngest individuals from a generated cohort of 12,000. Implications of this non-random example are unknown. ## File Description diff --git a/README.md b/README.md index 2b65d39..a5b3e0a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ -# Robust Genetics Optimization +# AlphaRGS -This repository is split four parts. The main Jupyter notebook which walks through how standard and robust optimization work for genetic selection from start to finish, part of which is [Example/](Example/), a realistic simulated example from Gregor Gorjanc and Ivan Pocrnić. We also have [Scratch/](Scratch/), a mix of Jupyter notebooks used as part of working out the problem with some additional data files and example scripts. Finally we have [Writing/](Writing/), a more formal writeup of the notebook and its findings in LaTeX (written by Julian, currently housed in Overleaf). +Tools for solving robust genetics selection problems in Python. It depends on Python 3.10+, [NumPy](https://pypi.org/project/numpy/), and [Gurobi](https://www.gurobi.com/) via [gurobipy](https://pypi.org/project/gurobipy/). + +## Structure + +This repository is split four parts. As well as the module in [`alphargs/`](alphargs/), there's [`Example/`](Example/) which includes a realistic simulated example from Gregor Gorjanc and Ivan Pocrnić. We also have [Scratch/](Scratch/) which includes a notebook which walks through how standard and robust optimization work for genetic selection from start to finish. Finally we have [Writing/](Writing/), a more formal writeup of the notebook and its findings in LaTeX (written by Julian, currently housed in Overleaf). diff --git a/alphargs/__init__.py b/alphargs/__init__.py new file mode 100644 index 0000000..496422d --- /dev/null +++ b/alphargs/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +AlphaRGS provides functions for loading and solving robust genetic selection +problems. It uses file structures from the AlphaGenes family, Numpy for its +internal data structures, and depends on Gurobi for its solvers. +""" + +from .loaders import * +from .solvers import * +from .utils import * diff --git a/alphargs/loaders.py b/alphargs/loaders.py new file mode 100644 index 0000000..b0db25f --- /dev/null +++ b/alphargs/loaders.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +"""Loading in Genetics Data + +Genetics data could be presented to our solvers in multiple formats, these +functions define the appropriate methods for loading those in correctly. +""" + +import numpy as np # defines matrix structures +import numpy.typing as npt # variable typing definitions for NumPy + + +def load_ped(filename: str) -> dict: # TODO add dict typing + """ + Function for reading *.ped files to a dictionary. Takes the file name + as a string input and returns the pedigree structure as a dictionary. + """ + with open(filename, "r") as file: + # first line of *.ped lists the headers; skip + file.readline() + # create a list of int lists from each line (dropping optional labels) + data = [[int(x) for x in line.split(",")[0:3]] for line in file] + # convert this list of lists into a dictionary + ped = {entry[0]: entry[1:3] for entry in data} + + return ped + + +def makeA(pedigree: dict) -> npt.NDArray[np.float64]: # TODO add dict typing + """ + Construct Wright's Numerator Relationship Matrix from a given pedigree + structure. Takes the pedigree as a dictionary input and returns the + matrix as output. + """ + m = len(pedigree) + # preallocate memory for A + A = np.zeros((m, m), dtype=float) + + # iterate over rows + for i in range(0, m): + # save parent indexes: pedigrees indexed from 1, Python from 0 + p = pedigree[i+1][0]-1 + q = pedigree[i+1][1]-1 + # iterate over columns sub-diagonal + for j in range(0, i): + # calculate sub-diagonal entries + A[i, j] = 0.5*(A[j, p] + A[j, q]) + # populate sup-diagonal (symmetric) + A[j, i] = A[i, j] + # calculate diagonal entries + A[i, i] = 1 + 0.5*A[p, q] + + return A + + +def load_problem(A_filename: str, E_filename: str, S_filename: str, + dimension: int | None = None, pedigree: bool = False + ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], + npt.NDArray[np.float64], int]: + """ + Used to load genetic selection problems into NumPy. It takes three + string inputs for filenames where Sigma, Mu, and Omega are stored, + as well as an optional integer input for problem dimension if this + is known. If it's know know, it's worked out based on E_filename. + + As output, it returns (A, E, S, n), where A and S are n-by-n NumPy + arrays, E is a length n NumPy array, and n is an integer. + """ + + def load_symmetric_matrix(filename: str, dimension: int + ) -> npt.NDArray[np.float64]: + """ + Since NumPy doesn't have a stock way to load matrices + stored in coordinate format format, this adds one. + """ + + matrix = np.zeros([dimension, dimension], dtype=float) + + with open(filename, 'r') as file: + for line in file: + i, j, entry = line.split(" ") + # data files indexed from 1, not 0 + matrix[int(i)-1, int(j)-1] = entry + matrix[int(j)-1, int(i)-1] = entry + + return matrix + + E = np.loadtxt(E_filename, dtype=float) + # if dimension not specified, use `E` which doesn't need preallocation + if not dimension: + assert isinstance(E.size, int) # catches E being empty + dimension = E.size + + # S is stored by coordinates so need special loader + S = load_symmetric_matrix(S_filename, dimension) + # A can be stored as a pedigree or by coordinates + if pedigree: + A = makeA(load_ped(A_filename)) + else: + A = load_symmetric_matrix(A_filename, dimension) + + return A, E, S, dimension diff --git a/alphargs/solvers.py b/alphargs/solvers.py new file mode 100644 index 0000000..f0dacc8 --- /dev/null +++ b/alphargs/solvers.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +"""Defining Solvers + +With the problems properly loaded into Numpy, this section contains functions +for solving those under various formulations and methods. +""" + +import numpy as np # defines matrix structures +import numpy.typing as npt # variable typing definitions for NumPy +import gurobipy as gp # Gurobi optimization interface (1) +from gurobipy import GRB # Gurobi optimization interface (2) + + +def gurobi_standard_genetics( + sigma: np.ndarray, + mu: np.ndarray, + sires, # type could be np.ndarray, sets[ints], lists[int], range, etc + dams, # type could be np.ndarray, sets[ints], lists[int], range, etc + lam: float, + dimension: int, + upper_bound: npt.NDArray[np.float64] | float = 1.0, + lower_bound: npt.NDArray[np.float64] | float = 0.0, + time_limit: float | None = None, + max_duality_gap: float | None = None, + debug: bool = False +): # -> tuple[npt.NDArray[np.float64], float]: # BUG Gurobi typing broken + """ + Takes a sigma, mu, sire list, dam list, and dimension as inputs and solves + the standard genetic selection problem with Gurobi for a given value of + lambda (lam), returning the portfolio and objective value as outputs. The + default lower and upper bounds of 0 and 1 can be changed also. + + Optional arguments time_limit and max_duality_gap respectively control how + long Gurobi will spend on the problem and the maximum allowable duality + gap. Optional argument debug sets whether Gurobi prints output to terminal. + """ + + # create models for standard and robust genetic selection + model = gp.Model("standard-genetics") + + # Gurobi spews all its output into the terminal by default, this restricts + # that behaviour to only happen when the `debug` flag is used. + if not debug: model.setParam('OutputFlag', 0) + + # integrating bounds within variable definitions is more efficient than + # as a separate constraint, which Gurobi would convert to bounds anyway + w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound, + vtype=GRB.CONTINUOUS, name="w") + + model.setObjective( + # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here + w.transpose()@mu - (lam/2)*w.transpose()@(sigma@w), + GRB.MAXIMIZE + ) + + # set up the two sum-to-half constraints + M = np.zeros((2, dimension), dtype=int) + # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise + M[0, sires] = 1 + M[1, dams] = 1 + # define the right hand side of the constraint Mx = m + m = np.full(2, 0.5) + model.addConstr(M@w == m, name="sum-to-half") + + # optional controls to stop Gurobi taking too long + if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) + if max_duality_gap: model.setParam('MIPGap', max_duality_gap) + + # model file can be used externally for verification + if debug: model.write("standard-opt.mps") + + model.optimize() + return w.X, model.ObjVal + + +def gurobi_robust_genetics( + sigma: npt.NDArray[np.float64], + mubar: npt.NDArray[np.float64], + omega: npt.NDArray[np.float64], + sires, # type could be np.ndarray, sets[ints], lists[int], range, etc + dams, # type could be np.ndarray, sets[ints], lists[int], range, etc + lam: float, + kappa: float, + dimension: int, + upper_bound: npt.NDArray[np.float64] | float = 1.0, + lower_bound: npt.NDArray[np.float64] | float = 0.0, + time_limit: float | None = None, + max_duality_gap: float | None = None, + debug: bool = False +): # -> tuple[npt.NDArray, float, float]: # BUG Gurobi typing broken + """ + Takes a sigma, mu-bar, omega, sire list, dam list, and dimension as inputs + and solves the robust genetic selection problem using Gurobi for given + values of lambda (lam) and kappa. It returns the portfolio and objective + value as outputs. The default lower and upper bounds of 0 and 1 can be + changed also. + + Optional arguments time_limit and max_duality_gap respectively control how + long Gurobi will spend on the problem and the maximum allowable duality + gap. Optional argument debug sets whether Gurobi prints output to terminal. + """ + + # create models for standard and robust genetic selection + model = gp.Model("robust-genetics") + + # Gurobi spews all its output into the terminal by default, this restricts + # that behaviour to only happen when the `debug` flag is used. + if not debug: model.setParam('OutputFlag', 0) + + # integrating bounds within variable definitions is more efficient than + # as a separate constraint, which Gurobi would convert to bounds anyway + w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound, + vtype=GRB.CONTINUOUS, name="w") + z = model.addVar(lb=0.0, name="z") + + model.setObjective( + # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here + w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z, + GRB.MAXIMIZE + ) + + # set up the two sum-to-half constraints + M = np.zeros((2, dimension), dtype=int) + # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise + M[0, sires] = 1 + M[1, dams] = 1 + # define the right hand side of the constraint Mx = m + m = np.full(2, 0.5, dtype=float) + model.addConstr(M@w == m, name="sum-to-half") + + # conic constraint which comes from robust optimization + model.addConstr(z**2 >= w.transpose()@omega@w, name="uncertainty") + + # optional controls to stop Gurobi taking too long + if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) + if max_duality_gap: model.setParam('MIPGap', max_duality_gap) + + # model file can be used externally for verification + if debug: model.write("robust-opt.mps") + + model.optimize() + return w.X, z.X, model.ObjVal diff --git a/alphargs/utils.py b/alphargs/utils.py new file mode 100644 index 0000000..d0a6cfe --- /dev/null +++ b/alphargs/utils.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +"""Utilities + +This file contains additional utilities, such as for printing and comparing +portfolios produced by the solvers. +""" + +import numpy as np + + +def print_compare_solutions( + portfolio1, # : npt.NDArray[np.float64], # BUG Gurobi typing broken + portfolio2, # : npt.NDArray[np.float64], # BUG Gurobi typing broken + objective1: float, + objective2: float, + precision: int = 5, + z1: float | None = None, + z2: float | None = None, + name1: str = "First", + name2: str = "Second" +) -> None: + """ + Takes two solutions (comprised of at least a portfolio and objective value, + plus an optional z-value and/or solution name) as inputs, and prints a + comparison of the two solutions to the terminal. The number of decimals + values are displayed to defaults to 5, but can be changed through the + precision argument. + """ + + dimension = portfolio1.size + order = len(str(dimension)) + + # HACK header breaks if precision < 3 or len(problem1) != 5 + print(f"i{' '*(order-1)} {name1} {' '*(precision-3)}{name2}") + for candidate in range(dimension): + print( + f"{candidate+1:0{order}d} " + f"{portfolio1[candidate]:.{precision}f} " + f"{portfolio2[candidate]:.{precision}f}" + ) + + def obj_string(name: str, value: float, precision: int, + z: float | None = None) -> str: + """Helper function which handles the optional z1 and z2 values""" + obj_str = f"{name}: {value:.{precision}f}" + return f"{obj_str} (z = {z:.{precision}f})" if z else f"{obj_str}" + + portfolio_abs_diff = np.abs(portfolio1-portfolio2) + print( + f"\n{obj_string(f'{name1} objective', objective1, precision, z1)}" + f"\n{obj_string(f'{name2} objective', objective2, precision, z2)}" + f"\nMaximum change: {max(portfolio_abs_diff):.{precision}f}" + f"\nAverage change: {np.mean(portfolio_abs_diff):.{precision}f}" + f"\nMinimum change: {min(portfolio_abs_diff):.{precision}f}" + ) diff --git a/example.py b/example.py new file mode 100644 index 0000000..80c7f49 --- /dev/null +++ b/example.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import alphargs + +# key problem variables loaded from standard format txt files +sigma, mubar, omega, n = alphargs.load_problem( + "Example/04/A04.txt", + "Example/04/EBV04.txt", + "Example/04/S04.txt" +) + +# NOTE this trick of handling sex data is specific to the initial simulation +# data which is structured so that candidates alternate between sires (which +# have even indices) and dams (which have odd indices). +sires = range(0, n, 2) +dams = range(1, n, 2) + +lam = 0.5 +kap = 1 + +# computes the standard and robust genetic selection solutions +w_std, obj_std = alphargs.gurobi_standard_genetics( + sigma, mubar, sires, dams, lam, n) +w_rbs, z_rbs, obj_rbs = alphargs.gurobi_robust_genetics( + sigma, mubar, omega, sires, dams, lam, kap, n) + +alphargs.print_compare_solutions( + w_std, w_rbs, obj_std, obj_rbs, z2=z_rbs, name1="w_std", name2="w_rbs") + + +print("\nDone!") diff --git a/robust-genetics.py b/robust-genetics.py deleted file mode 100644 index dff1abf..0000000 --- a/robust-genetics.py +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env python3 - -import numpy as np # defines matrix structures -import numpy.typing as npt # variable typing definitions for NumPy -import gurobipy as gp # Gurobi optimization interface (1) -from gurobipy import GRB # Gurobi optimization interface (2) -from sys import maxsize # maximum precision for specific system - - -# OUTPUT SETTINGS -# Numpy makes some odd choices with its default output formats, this adjusts -# the most intrusive of those for anything which uses this utility. - -np.set_printoptions( - formatter={'float_kind': "{:.8f}".format}, # only show to 8 d.p. - threshold=maxsize # want to round rather than truncate when printing -) - - -# READING GENETICS DATA -# Genetics data could be presented to our solvers in multiple formats, these -# functions define the appropriate methods for loading those in correctly. - -def load_ped(filename: str) -> dict: - """ - Function for reading *.ped files to a dictionary. Takes the file name - as a string input and returns the pedigree structure as a dictionary. - """ - with open(filename, "r") as file: - # first line of *.ped lists the headers; skip - file.readline() - # create a list of int lists from each line (dropping optional labels) - data = [[int(x) for x in line.split(",")[0:3]] for line in file] - # convert this list of lists into a dictionary - ped = {entry[0]: entry[1:3] for entry in data} - return ped - - -def makeA(pedigree: dict) -> npt.NDArray[np.float64]: - """ - Construct Wright's Numerator Relationship Matrix from a given pedigree - structure. Takes the pedigree as a dictionary input and returns the - matrix as output. - """ - m = len(pedigree) - # preallocate memory for A - A = np.zeros((m, m), dtype=float) - - # iterate over rows - for i in range(0, m): - # save parent indexes: pedigrees indexed from 1, Python from 0 - p = pedigree[i+1][0]-1 - q = pedigree[i+1][1]-1 - # iterate over columns sub-diagonal - for j in range(0, i): - # calculate sub-diagonal entries - A[i, j] = 0.5*(A[j, p] + A[j, q]) - # populate sup-diagonal (symmetric) - A[j, i] = A[i, j] - # calculate diagonal entries - A[i, i] = 1 + 0.5*A[p, q] - - return A - - -def load_problem(A_filename: str, E_filename: str, S_filename: str, - dimension: int | None = None, pedigree: bool = False - ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], - npt.NDArray[np.float64], int]: - """ - Used to load genetic selection problems into NumPy. It takes three - string inputs for filenames where Sigma, Mu, and Omega are stored, - as well as an optional integer input for problem dimension if this - is known. If it's know know, it's worked out based on E_filename. - - As output, it returns (A, E, S, n), where A and S are n-by-n NumPy - arrays, E is a length n NumPy array, and n is an integer. - """ - - def load_symmetric_matrix(filename: str, dimension: int - ) -> npt.NDArray[np.float64]: - """ - Since NumPy doesn't have a stock way to load matrices - stored in coordinate format format, this adds one. - """ - - matrix = np.zeros([dimension, dimension], dtype=float) - - with open(filename, 'r') as file: - for line in file: - i, j, entry = line.split(" ") - # data files indexed from 1, not 0 - matrix[int(i)-1, int(j)-1] = entry - matrix[int(j)-1, int(i)-1] = entry - - return matrix - - E = np.loadtxt(E_filename, dtype=float) - # if dimension not specified, use `E` which doesn't need preallocation - if not dimension: - assert isinstance(E.size, int) # catches E being empty - dimension = E.size - - # S is stored by coordinates so need special loader - S = load_symmetric_matrix(S_filename, dimension) - # A can be stored as a pedigree or by coordinates - if pedigree: - A = makeA(load_ped(A_filename)) - else: - A = load_symmetric_matrix(A_filename, dimension) - - return A, E, S, dimension - - -# DEFINING SOLVER -# With the problems properly loaded into Numpy, this section contains functions -# for solving those under various formulations and methods, as well as printing -# and comparing outputted solutions. - -def gurobi_standard_genetics( - sigma: np.ndarray, - mu: np.ndarray, - sires, # type could be np.ndarray, sets[ints], lists[int], range, etc - dams, # type could be np.ndarray, sets[ints], lists[int], range, etc - lam: float, - dimension: int, - upper_bound: npt.NDArray[np.float64] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | float = 0.0, - time_limit: float | None = None, - max_duality_gap: float | None = None, - debug: bool = False -): # -> tuple[npt.NDArray[np.float64], float]: # BUG Gurobi typing broken - """ - Takes a sigma, mu, sire list, dam list, and dimension as inputs and solves - the standard genetic selection problem with Gurobi for a given value of - lambda (lam), returning the portfolio and objective value as outputs. The - default lower and upper bounds of 0 and 1 can be changed also. - - Optional arguments time_limit and max_duality_gap respectively control how - long Gurobi will spend on the problem and the maximum allowable duality - gap. Optional argument debug sets whether Gurobi prints output to terminal. - """ - - # create models for standard and robust genetic selection - model = gp.Model("standard-genetics") - - # Gurobi spews all its output into the terminal by default, this restricts - # that behaviour to only happen when the `debug` flag is used. - if not debug: model.setParam('OutputFlag', 0) - - # integrating bounds within variable definitions is more efficient than - # as a separate constraint, which Gurobi would convert to bounds anyway - w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound, - vtype=GRB.CONTINUOUS, name="w") - - model.setObjective( - # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here - w.transpose()@mu - (lam/2)*w.transpose()@(sigma@w), - GRB.MAXIMIZE - ) - - # set up the two sum-to-half constraints - M = np.zeros((2, dimension), dtype=int) - # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise - M[0, sires] = 1 - M[1, dams] = 1 - # define the right hand side of the constraint Mx = m - m = np.full(2, 0.5) - model.addConstr(M@w == m, name="sum-to-half") - - # optional controls to stop Gurobi taking too long - if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) - if max_duality_gap: model.setParam('MIPGap', max_duality_gap) - - # model file can be used externally for verification - if debug: model.write("standard-opt.mps") - - model.optimize() - return w.X, model.ObjVal - - -def gurobi_robust_genetics( - sigma: npt.NDArray[np.float64], - mubar: npt.NDArray[np.float64], - omega: npt.NDArray[np.float64], - sires, # type could be np.ndarray, sets[ints], lists[int], range, etc - dams, # type could be np.ndarray, sets[ints], lists[int], range, etc - lam: float, - kappa: float, - dimension: int, - upper_bound: npt.NDArray[np.float64] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | float = 0.0, - time_limit: float | None = None, - max_duality_gap: float | None = None, - debug: bool = False -): # -> tuple[npt.NDArray, float, float]: # BUG Gurobi typing broken - """ - Takes a sigma, mu-bar, omega, sire list, dam list, and dimension as inputs - and solves the robust genetic selection problem using Gurobi for given - values of lambda (lam) and kappa. It returns the portfolio and objective - value as outputs. The default lower and upper bounds of 0 and 1 can be - changed also. - - Optional arguments time_limit and max_duality_gap respectively control how - long Gurobi will spend on the problem and the maximum allowable duality - gap. Optional argument debug sets whether Gurobi prints output to terminal. - """ - - # create models for standard and robust genetic selection - model = gp.Model("robust-genetics") - - # Gurobi spews all its output into the terminal by default, this restricts - # that behaviour to only happen when the `debug` flag is used. - if not debug: model.setParam('OutputFlag', 0) - - # integrating bounds within variable definitions is more efficient than - # as a separate constraint, which Gurobi would convert to bounds anyway - w = model.addMVar(shape=dimension, lb=lower_bound, ub=upper_bound, - vtype=GRB.CONTINUOUS, name="w") - z = model.addVar(lb=0.0, name="z") - - model.setObjective( - # NOTE Gurobi introduces error if we use `np.inner(w, sigma@w)` here - w.transpose()@mubar - (lam/2)*w.transpose()@(sigma@w) - kappa*z, - GRB.MAXIMIZE - ) - - # set up the two sum-to-half constraints - M = np.zeros((2, dimension), dtype=int) - # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise - M[0, sires] = 1 - M[1, dams] = 1 - # define the right hand side of the constraint Mx = m - m = np.full(2, 0.5, dtype=float) - model.addConstr(M@w == m, name="sum-to-half") - - # conic constraint which comes from robust optimization - model.addConstr(z**2 >= w.transpose()@omega@w, name="uncertainty") - - # optional controls to stop Gurobi taking too long - if time_limit: model.setParam(GRB.Param.TimeLimit, time_limit) - if max_duality_gap: model.setParam('MIPGap', max_duality_gap) - - # model file can be used externally for verification - if debug: model.write("robust-opt.mps") - - model.optimize() - return w.X, z.X, model.ObjVal - - -def print_compare_solutions( - portfolio1, # : npt.NDArray[np.float64], # BUG Gurobi typing broken - portfolio2, # : npt.NDArray[np.float64], # BUG Gurobi typing broken - objective1: float, - objective2: float, - precision: int = 5, - z1: float | None = None, - z2: float | None = None, - name1: str = "First", - name2: str = "Second" -) -> None: - """ - Takes two solutions (comprised of at least a portfolio and objective value, - plus an optional z-value and/or solution name) as inputs, and prints a - comparison of the two solutions to the terminal. The number of decimals - values are displayed to defaults to 5, but can be changed through the - precision argument. - """ - - dimension = portfolio1.size - order = len(str(dimension)) - - # HACK header breaks if precision < 3 or len(problem1) != 5 - print(f"i{' '*(order-1)} {name1} {' '*(precision-3)}{name2}") - for candidate in range(dimension): - print( - f"{candidate+1:0{order}d} " - f"{portfolio1[candidate]:.{precision}f} " - f"{portfolio2[candidate]:.{precision}f}" - ) - - def obj_string(name: str, value: float, precision: int, - z: float | None = None) -> str: - """Helper function which handles the optional z1 and z2 values""" - obj_str = f"{name}: {value:.{precision}f}" - return f"{obj_str} (z = {z:.{precision}f})" if z else f"{obj_str}" - - portfolio_abs_diff = np.abs(portfolio1-portfolio2) - print( - f"\n{obj_string(f'{name1} objective', objective1, precision, z1)}" - f"\n{obj_string(f'{name2} objective', objective2, precision, z2)}" - f"\nMaximum change: {max(portfolio_abs_diff):.{precision}f}" - f"\nAverage change: {np.mean(portfolio_abs_diff):.{precision}f}" - f"\nMinimum change: {min(portfolio_abs_diff):.{precision}f}" - ) - - -# MAIN -# A temporary section which includes an example problem. This will live here -# while porting to module, but will be removed once done. - -# key problem variables -sigma, mubar, omega, n = load_problem( - "Example/04/A04.txt", - "Example/04/EBV04.txt", - "Example/04/S04.txt" -) - -# NOTE this trick of handling sex data is specific to the initial simulation -# data which is structured so that candidates alternate between sires (which -# have even indices) and dams (which have odd indices). -sires = range(0, n, 2) -dams = range(1, n, 2) - -lam = 0.5 -kap = 1 - -# computes the standard and robust genetic selection solutions -w_std, obj_std = gurobi_standard_genetics(sigma, mubar, sires, dams, lam, n) -w_rbs, z_rbs, obj_rbs = gurobi_robust_genetics(sigma, mubar, omega, sires, - dams, lam, kap, n) - -print_compare_solutions(w_std, w_rbs, obj_std, obj_rbs, - z2=z_rbs, name1="w_std", name2="w_rbs") - -# DONE -pass