From 6d4b2e3bd1981092ff437738f8b7d07b4db02690 Mon Sep 17 00:00:00 2001 From: Josh Fogg Date: Wed, 24 Jul 2024 16:46:22 +0100 Subject: [PATCH] Typing and loading improvements (#22) Making better use of NumPy's generic C types, including `np.bool` for the binary matrix M. Loading wise, COO and CSR have been consolidated into a single function. --- robustocs/loaders.py | 88 +++++++++++++++++++------------------------- robustocs/solvers.py | 77 +++++++++++++++++++------------------- robustocs/utils.py | 8 ++-- 3 files changed, 79 insertions(+), 94 deletions(-) diff --git a/robustocs/loaders.py b/robustocs/loaders.py index 3498865..ac8cce4 100644 --- a/robustocs/loaders.py +++ b/robustocs/loaders.py @@ -21,8 +21,8 @@ def count_sparse_nnz(filename: str) -> int: """ - Return the number of non-zero entries for a matrix stored in symmetric - sparse coordinate format. + Return the number of non-zero entries for a matrix stored in a file + in symmetric sparse coordinate (COO) format. Parameters ---------- @@ -48,7 +48,7 @@ def count_sparse_nnz(filename: str) -> int: def load_symmetric_matrix(filename: str, dimension: int - ) -> npt.NDArray[np.float64]: + ) -> npt.NDArray[np.floating]: """ Since NumPy doesn't have a stock way to load symmetric matrices stored in symmetric coordinate format, this adds one. @@ -66,7 +66,7 @@ def load_symmetric_matrix(filename: str, dimension: int The matrix represented by the file. """ - matrix = np.zeros([dimension, dimension], dtype=float) + matrix = np.zeros([dimension, dimension], dtype=np.floating) with open(filename, 'r') as file: for line in file: @@ -78,11 +78,11 @@ def load_symmetric_matrix(filename: str, dimension: int return matrix -def load_symmetric_matrix_coo(filename: str, dimension: int, nnz: int - ) -> sparse.spmatrix: +def load_sparse_symmetric_matrix(filename: str, dimension: int, nnz: int, + format: str = 'csr') -> sparse.spmatrix: """ Since neither NumPy or SciPy have a stock way to load symmetric matrices - into sparse coordinate format, this adds one. + stored in symmetric coordinate format into SciPy's formats, this adds one. Parameters ---------- @@ -93,17 +93,21 @@ def load_symmetric_matrix_coo(filename: str, dimension: int, nnz: int nnz : int Number of non-zero entries in the matrix stored in the file. Note that due to symmetry, this may be larger than the number of lines in file. + format : str + Format to use for the sparse matrix. Options available are 'coo' (for + sparse coordinate format) or 'csr' (compressed sparse row format). + Default value is `csr`. Returns ------- - ndarray + spmatrix The matrix represented by the file. """ # preallocate storage arrays - rows: npt.NDArray[np.int8] = np.zeros(nnz) - cols: npt.NDArray[np.int8] = np.zeros(nnz) - vals: npt.NDArray[np.float64] = np.zeros(nnz) + rows: npt.NDArray[np.integer] = np.zeros(nnz, dtype=np.integer) + cols: npt.NDArray[np.integer] = np.zeros(nnz, dtype=np.integer) + vals: npt.NDArray[np.floating] = np.zeros(nnz, dtype=np.floating) with open(filename, 'r') as file: index: int = 0 @@ -126,38 +130,20 @@ def load_symmetric_matrix_coo(filename: str, dimension: int, nnz: int index += 1 - return sparse.coo_matrix( - (vals, (rows, cols)), - shape=(dimension, dimension), - dtype=np.float64 - ) - - -def load_symmetric_matrix_csr(filename: str, dimension: int, nnz: int - ) -> sparse.spmatrix: - """ - Loads a symmetric matrix into compressed sparse row format. It does this - by first loading into sparse coordinate format and then converting with - Scipy. TODO: add a more efficient way which loads into CSR directly. - - Parameters - ---------- - filename : str - Filename, including extension. Space-separated data file. - dimension : int - Number of rows (and columns) of the matrix stored in the file. - nnz : int - Number of non-zero entries in the matrix stored in the file. Note that - due to symmetry, this may be larger than the number of lines in file. - - Returns - ------- - ndarray - The matrix represented by the file. - """ - - matrix = load_symmetric_matrix_coo(filename, dimension, nnz) - return sparse.csr_matrix(matrix) + if format == 'coo': + return sparse.coo_matrix( + (vals, (rows, cols)), + shape=(dimension, dimension), + dtype=np.floating + ) + elif format == 'csr': + return sparse.csr_matrix( + (vals, (rows, cols)), + shape=(dimension, dimension), + dtype=np.floating + ) + else: + raise ValueError("Format must be 'coo' or 'csr'.") def load_ped(filename: str) -> dict[int, list[int]]: @@ -191,7 +177,7 @@ def load_ped(filename: str) -> dict[int, list[int]]: # MATRIX GENERATORS # Utility functions for generating matrices from pedigree data. -def makeA(pedigree: dict[int, list[int]]) -> npt.NDArray[np.float64]: +def makeA(pedigree: dict[int, list[int]]) -> npt.NDArray[np.floating]: """ Constructs Wright's Numerator Relationship Matrix (WNRM) from a given pedigree structure. @@ -210,7 +196,7 @@ def makeA(pedigree: dict[int, list[int]]) -> npt.NDArray[np.float64]: m = len(pedigree) # preallocate memory for A - A = np.zeros((m, m), dtype=float) + A = np.zeros((m, m), dtype=np.floating) # iterate over rows for i in range(0, m): @@ -238,9 +224,9 @@ def load_problem(A_filename: str, E_filename: str, S_filename: str, nnzA: int | None = None, nnzS: int | None = None, dimension: int | None = None, pedigree: bool = False, issparse: bool = False - ) -> tuple[npt.NDArray[np.float64] | sparse.spmatrix, - npt.NDArray[np.float64], - npt.NDArray[np.float64] | sparse.spmatrix, + ) -> tuple[npt.NDArray[np.floating] | sparse.spmatrix, + npt.NDArray[np.floating], + npt.NDArray[np.floating] | sparse.spmatrix, int]: """ Load a robust genetic selection problem into Python. @@ -286,7 +272,7 @@ def load_problem(A_filename: str, E_filename: str, S_filename: str, Dimension of the problem. """ - E = np.loadtxt(E_filename, dtype=float) + E = np.loadtxt(E_filename, dtype=np.floating) # if dimension not specified, use `E` which doesn't need preallocation if not dimension: assert isinstance(E.size, int) # catches E being empty @@ -296,7 +282,7 @@ def load_problem(A_filename: str, E_filename: str, S_filename: str, if issparse: if not nnzS: nnzS = count_sparse_nnz(S_filename) - S = load_symmetric_matrix_csr(S_filename, dimension, nnzS) + S = load_sparse_symmetric_matrix(S_filename, dimension, nnzS) else: # if nnzS was defined here, it's ignored as a parameter S = load_symmetric_matrix(S_filename, dimension) @@ -312,7 +298,7 @@ def load_problem(A_filename: str, E_filename: str, S_filename: str, if issparse: if not nnzA: nnzA = count_sparse_nnz(A_filename) - A = load_symmetric_matrix_csr(A_filename, dimension, nnzA) + A = load_sparse_symmetric_matrix(A_filename, dimension, nnzA) else: # if nnzA was defined here, it's ignored as a parameter A = load_symmetric_matrix(A_filename, dimension) diff --git a/robustocs/solvers.py b/robustocs/solvers.py index 58ba4e7..60d4d18 100644 --- a/robustocs/solvers.py +++ b/robustocs/solvers.py @@ -23,19 +23,19 @@ def gurobi_standard_genetics( - sigma: npt.NDArray[np.float64] | sparse.spmatrix, - mu: npt.NDArray[np.float64], + sigma: npt.NDArray[np.floating] | sparse.spmatrix, + mu: npt.NDArray[np.floating], sires, # type could be np.ndarray, sets[ints], lists[int], range, etc dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, # cannot be called `lambda`, that's reserved in Python dimension: int, - upper_bound: npt.NDArray[np.float64] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | float = 0.0, + upper_bound: npt.NDArray[np.floating] | float = 1.0, + lower_bound: npt.NDArray[np.floating] | float = 0.0, time_limit: float | None = None, max_duality_gap: float | None = None, model_output: str = '', debug: bool = False -) -> tuple[npt.NDArray[np.float64], float]: +) -> tuple[npt.NDArray[np.floating], float]: """ Solve the standard genetic selection problem using Gurobi. @@ -118,7 +118,7 @@ def gurobi_standard_genetics( ) # set up the two sum-to-half constraints - M = np.zeros((2, dimension), dtype=int) + M = np.zeros((2, dimension), dtype=np.bool) # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise M[0, sires] = 1 M[1, dams] = 1 @@ -141,21 +141,21 @@ def gurobi_standard_genetics( def gurobi_robust_genetics( - sigma: npt.NDArray[np.float64] | sparse.spmatrix, - mubar: npt.NDArray[np.float64], - omega: npt.NDArray[np.float64] | sparse.spmatrix, + sigma: npt.NDArray[np.floating] | sparse.spmatrix, + mubar: npt.NDArray[np.floating], + omega: npt.NDArray[np.floating] | sparse.spmatrix, sires, # type could be np.ndarray, sets[ints], lists[int], range, etc dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, # cannot be called `lambda`, that's reserved in Python kappa: float, dimension: int, - upper_bound: npt.NDArray[np.float64] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | float = 0.0, + upper_bound: npt.NDArray[np.floating] | float = 1.0, + lower_bound: npt.NDArray[np.floating] | float = 0.0, time_limit: float | None = None, max_duality_gap: float | None = None, model_output: str = '', debug: bool = False -) -> tuple[npt.NDArray[np.float64], float, float]: +) -> tuple[npt.NDArray[np.floating], float, float]: """ Solve the robust genetic selection problem using Gurobi. @@ -227,7 +227,7 @@ def gurobi_robust_genetics( ndarray Portfolio vector which Gurobi has determined is a solution. float - Auxillary variable corresponding to uncertainty associated with the + Auxiliary variable corresponding to uncertainty associated with the portfolio vector which Gurobi has determined is a solution. float Value of the objective function for returned solution vector. @@ -254,12 +254,12 @@ def gurobi_robust_genetics( ) # set up the two sum-to-half constraints - M = np.zeros((2, dimension), dtype=int) + M = np.zeros((2, dimension), dtype=np.bool) # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise M[0, sires] = 1 M[1, dams] = 1 # define the right hand side of the constraint Mx = m - m = np.full(2, 0.5, dtype=float) + m = np.full(2, 0.5, dtype=np.floating) model.addConstr(M@w == m, name="sum-to-half") # conic constraint which comes from robust optimization @@ -280,23 +280,23 @@ def gurobi_robust_genetics( def gurobi_robust_genetics_sqp( - sigma: npt.NDArray[np.float64] | sparse.spmatrix, - mubar: npt.NDArray[np.float64], - omega: npt.NDArray[np.float64] | sparse.spmatrix, + sigma: npt.NDArray[np.floating] | sparse.spmatrix, + mubar: npt.NDArray[np.floating], + omega: npt.NDArray[np.floating] | sparse.spmatrix, sires, # type could be np.ndarray, sets[ints], lists[int], range, etc dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, # cannot be called `lambda`, that's reserved in Python kappa: float, dimension: int, - upper_bound: npt.NDArray[np.float64] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | float = 0.0, + upper_bound: npt.NDArray[np.floating] | float = 1.0, + lower_bound: npt.NDArray[np.floating] | float = 0.0, time_limit: float | None = None, max_duality_gap: float | None = None, max_iterations: int = 1000, robust_gap_tol: float = 1e-7, model_output: str = '', debug: bool = False -) -> tuple[npt.NDArray[np.float64], float, float]: +) -> tuple[npt.NDArray[np.floating], float, float]: """ Solve the robust genetic selection problem using SQP in Gurobi. @@ -403,12 +403,12 @@ def gurobi_robust_genetics_sqp( ) # set up the two sum-to-half constraints - M = np.zeros((2, dimension), dtype=int) + M = np.zeros((2, dimension), dtype=np.bool) # define the M so that column i is [1;0] if i is a sire and [0;1] otherwise M[0, sires] = 1 M[1, dams] = 1 # define the right hand side of the constraint Mx = m - m = np.full(2, 0.5, dtype=float) + m = np.full(2, 0.5, dtype=np.floating) model.addConstr(M@w == m, name="sum-to-half") # optional controls to stop Gurobi taking too long @@ -438,7 +438,7 @@ def gurobi_robust_genetics_sqp( print("No active constraints!") # z coefficient for the new constraint - w_star: npt.NDArray[np.float64] = np.array(w.X) + w_star: npt.NDArray[np.floating] = np.array(w.X) alpha: float = sqrt(w_star.transpose()@omega@w_star) # if gap between z and w'Omega w has converged, done @@ -452,8 +452,8 @@ def gurobi_robust_genetics_sqp( def highs_bound_like(dimension: int, - value: float | list[float] | npt.NDArray[np.float64] - ): # -> npt.NDArray[np.float64] | list[float] # BUG broke + value: float | list[float] | npt.NDArray[np.floating] + ): # -> npt.NDArray[np.floating] | list[float] # BUG broke """ Helper function which allows HiGHS to interpret variable bounds specified either as a vector or a single floating point value. If `value` is an array @@ -466,18 +466,18 @@ def highs_bound_like(dimension: int, def highs_standard_genetics( sigma: sparse.spmatrix, - mu: npt.NDArray[np.float64], + mu: npt.NDArray[np.floating], sires, # type could be np.ndarray, sets[ints], lists[int], range, etc dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, # cannot be called `lambda`, that's reserved in Python dimension: int, - upper_bound: npt.NDArray[np.float64] | list[float] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | list[float] | float = 0.0, + upper_bound: npt.NDArray[np.floating] | list[float] | float = 1.0, + lower_bound: npt.NDArray[np.floating] | list[float] | float = 0.0, time_limit: float | None = None, max_duality_gap: float | None = None, model_output: str = '', debug: bool = False -) -> tuple[npt.NDArray[np.float64], float]: +) -> tuple[npt.NDArray[np.floating], float]: """ Solve the standard genetic selection problem using HiGHS. @@ -557,7 +557,6 @@ def highs_standard_genetics( model.lp_.col_upper_ = highs_bound_like(dimension, upper_bound) # define the quadratic term in the objective - sigma = sparse.csc_matrix(sigma) # BUG is sigma in CSR or CSC format? model.hessian_.dim_ = dimension model.hessian_.start_ = sigma.indptr model.hessian_.index_ = sigma.indices @@ -609,7 +608,7 @@ def highs_standard_genetics( raise RuntimeError # by default, col_value is a stock-Python list - solution: npt.NDArray[np.float64] = np.array(h.getSolution().col_value) + solution: npt.NDArray[np.floating] = np.array(h.getSolution().col_value) # we negated the objective function, so negate it back objective_value: float = -h.getInfo().objective_function_value @@ -618,22 +617,22 @@ def highs_standard_genetics( def highs_robust_genetics_sqp( sigma: sparse.spmatrix, - mubar: npt.NDArray[np.float64], - omega: npt.NDArray[np.float64] | sparse.spmatrix, + mubar: npt.NDArray[np.floating], + omega: npt.NDArray[np.floating] | sparse.spmatrix, sires, # type could be np.ndarray, sets[ints], lists[int], range, etc dams, # type could be np.ndarray, sets[ints], lists[int], range, etc lam: float, # cannot be called `lambda`, that's reserved in Python kappa: float, dimension: int, - upper_bound: npt.NDArray[np.float64] | list[float] | float = 1.0, - lower_bound: npt.NDArray[np.float64] | list[float] | float = 0.0, + upper_bound: npt.NDArray[np.floating] | list[float] | float = 1.0, + lower_bound: npt.NDArray[np.floating] | list[float] | float = 0.0, time_limit: float | None = None, max_duality_gap: float | None = None, max_iterations: int = 1000, robust_gap_tol: float = 1e-7, model_output: str = '', debug: bool = False -) -> tuple[npt.NDArray[np.float64], float, float]: +) -> tuple[npt.NDArray[np.floating], float, float]: """ Solve the robust genetic selection problem using SQP in HiGHS. @@ -795,7 +794,7 @@ def highs_robust_genetics_sqp( # by default, col_value is a stock-Python list solution: list[float] = h.getSolution().col_value - w_star: npt.NDArray[np.float64] = np.array(solution[:-1]) + w_star: npt.NDArray[np.floating] = np.array(solution[:-1]) z_star: float = solution[-1] # we negated the objective function, so negate it back @@ -825,7 +824,7 @@ def highs_robust_genetics_sqp( # add a new plane to the approximation of the uncertainty cone num_nz: int = dimension + 1 # HACK assuming entirely dense index: range = range(dimension + 1) - value: npt.NDArray[np.float64] = np.append(-omega@w_star, alpha) + value: npt.NDArray[np.floating] = np.append(-omega@w_star, alpha) h.addRow(0, inf, num_nz, index, value) # final value of solution is the z value, return separately diff --git a/robustocs/utils.py b/robustocs/utils.py index 4353ec1..42d5e50 100644 --- a/robustocs/utils.py +++ b/robustocs/utils.py @@ -15,8 +15,8 @@ def print_compare_solutions( - portfolio1: npt.NDArray[np.float64], - portfolio2: npt.NDArray[np.float64], + portfolio1: npt.NDArray[np.floating], + portfolio2: npt.NDArray[np.floating], objective1: float, objective2: float, precision: int = 5, @@ -106,8 +106,8 @@ def obj_string(name: str, value: float, precision: int, def check_uncertainty_constraint( z: float, - w: npt.NDArray[np.float64], - omega: npt.NDArray[np.float64] | sparse.spmatrix, + w: npt.NDArray[np.floating], + omega: npt.NDArray[np.floating] | sparse.spmatrix, tol: float = 1e-7, debug: bool = False ) -> bool: