implemented and tested array compare score from biosimulations-runutils

biosimulators · Jan 27, 2025 · 99ca7b8 · 99ca7b8
1 parent 62beb37
commit 99ca7b8
Show file tree

Hide file tree

Showing 14 changed files with 446 additions and 330 deletions.
diff --git a/biosim_server/api/main.py b/biosim_server/api/main.py
@@ -131,8 +131,9 @@ async def start_verify_omex(
         include_outputs: bool = Query(default=False,
                                       description="Whether to include the output data on which the comparison is based."),
         user_description: str = Query(default="my-omex-compare", description="User description of the verification run."),
-        rel_tol: float = Query(default=0.000001, description="Relative tolerance to use for proximity comparison."),
-        abs_tol: float = Query(default=0.0000000001, description="Absolute tolerance to use for proximity comparison."),
+        rel_tol: float = Query(default=0.0001, description="Relative tolerance for proximity comparison."),
+        abs_tol_min: float = Query(default=0.001, description="Min absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale."),
+        abs_tol_scale: float = Query(default=0.00001, description="Scale for absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale."),
         observables: Optional[list[str]] = Query(default=None,
                                                  description="List of observables to include in the return data.")
 ) -> OmexVerifyWorkflowOutput:
@@ -164,7 +165,8 @@ async def start_verify_omex(
         requested_simulators=simulator_specs,
         include_outputs=include_outputs,
         rel_tol=rel_tol,
-        abs_tol=abs_tol,
+        abs_tol_min=abs_tol_min,
+        abs_tol_scale=abs_tol_scale,
         observables=observables)
 
     # ---- invoke workflow ---- #
@@ -232,8 +234,9 @@ async def start_verify_runs(
         include_outputs: bool = Query(default=False,
                                       description="Whether to include the output data on which the comparison is based."),
         user_description: str = Query(default="my-verify-job", description="User description of the verification run."),
-        rel_tol: float = Query(default=0.000001, description="Relative tolerance to use for proximity comparison."),
-        abs_tol: float = Query(default=0.000000001, description="Absolute tolerance to use for proximity comparison."),
+        rel_tol: float = Query(default=0.0001, description="Relative tolerance for proximity comparison."),
+        abs_tol_min: float = Query(default=0.001, description="Min absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale."),
+        abs_tol_scale: float = Query(default=0.00001, description="Scale for absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale."),
         observables: Optional[list[str]] = Query(default=None,
                                                  description="List of observables to include in the return data.")
 ) -> RunsVerifyWorkflowOutput:
@@ -245,7 +248,8 @@ async def start_verify_runs(
         user_description=user_description,
         include_outputs=include_outputs,
         rel_tol=rel_tol,
-        abs_tol=abs_tol,
+        abs_tol_min=abs_tol_min,
+        abs_tol_scale=abs_tol_scale,
         observables=observables)
 
     # ---- invoke workflow ---- #

diff --git a/biosim_server/api/spec/openapi_3_1_0_generated.yaml b/biosim_server/api/spec/openapi_3_1_0_generated.yaml
@@ -74,19 +74,28 @@ paths:
         required: false
         schema:
           type: number
-          description: Relative tolerance to use for proximity comparison.
-          default: 1.0e-06
+          description: Relative tolerance for proximity comparison.
+          default: 0.0001
           title: Rel Tol
-        description: Relative tolerance to use for proximity comparison.
-      - name: abs_tol
+        description: Relative tolerance for proximity comparison.
+      - name: abs_tol_min
         in: query
         required: false
         schema:
           type: number
-          description: Absolute tolerance to use for proximity comparison.
-          default: 1.0e-10
-          title: Abs Tol
-        description: Absolute tolerance to use for proximity comparison.
+          description: Min absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
+          default: 0.001
+          title: Abs Tol Min
+        description: Min absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
+      - name: abs_tol_scale
+        in: query
+        required: false
+        schema:
+          type: number
+          description: Scale for absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
+          default: 1.0e-05
+          title: Abs Tol Scale
+        description: Scale for absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
       - name: observables
         in: query
         required: false
@@ -198,19 +207,28 @@ paths:
         required: false
         schema:
           type: number
-          description: Relative tolerance to use for proximity comparison.
-          default: 1.0e-06
+          description: Relative tolerance for proximity comparison.
+          default: 0.0001
           title: Rel Tol
-        description: Relative tolerance to use for proximity comparison.
-      - name: abs_tol
+        description: Relative tolerance for proximity comparison.
+      - name: abs_tol_min
+        in: query
+        required: false
+        schema:
+          type: number
+          description: Min absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
+          default: 0.001
+          title: Abs Tol Min
+        description: Min absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
+      - name: abs_tol_scale
         in: query
         required: false
         schema:
           type: number
-          description: Absolute tolerance to use for proximity comparison.
-          default: 1.0e-09
-          title: Abs Tol
-        description: Absolute tolerance to use for proximity comparison.
+          description: Scale for absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
+          default: 1.0e-05
+          title: Abs Tol Scale
+        description: Scale for absolute tolerance, where atol = max(atol_min, max(arr1,arr2)*atol_scale.
       - name: observables
         in: query
         required: false
@@ -404,13 +422,13 @@ components:
             type: string
           type: array
           title: Var Names
-        mse:
+        score:
           anyOf:
           - items:
               type: number
             type: array
           - type: 'null'
-          title: Mse
+          title: Score
         is_close:
           anyOf:
           - items:
@@ -596,9 +614,12 @@ components:
         rel_tol:
           type: number
           title: Rel Tol
-        abs_tol:
+        abs_tol_min:
           type: number
-          title: Abs Tol
+          title: Abs Tol Min
+        abs_tol_scale:
+          type: number
+          title: Abs Tol Scale
         observables:
           anyOf:
           - items:
@@ -613,7 +634,8 @@ components:
       - requested_simulators
       - include_outputs
       - rel_tol
-      - abs_tol
+      - abs_tol_min
+      - abs_tol_scale
       title: OmexVerifyWorkflowInput
     OmexVerifyWorkflowOutput:
       properties:
@@ -696,9 +718,12 @@ components:
         rel_tol:
           type: number
           title: Rel Tol
-        abs_tol:
+        abs_tol_min:
+          type: number
+          title: Abs Tol Min
+        abs_tol_scale:
           type: number
-          title: Abs Tol
+          title: Abs Tol Scale
         observables:
           anyOf:
           - items:
@@ -712,7 +737,8 @@ components:
       - biosimulations_run_ids
       - include_outputs
       - rel_tol
-      - abs_tol
+      - abs_tol_min
+      - abs_tol_scale
       title: RunsVerifyWorkflowInput
     RunsVerifyWorkflowOutput:
       properties:

diff --git a/biosim_server/workflows/verify/activities.py b/biosim_server/workflows/verify/activities.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, TypeAlias
 
 import numpy as np
 from numpy.typing import NDArray
@@ -8,6 +8,11 @@
 from biosim_server.common.biosim1_client import BiosimSimulationRun, Hdf5DataValues, HDF5File
 from biosim_server.workflows.simulate import get_hdf5_data, GetHdf5DataInput
 
+NDArray1b: TypeAlias = np.ndarray[tuple[int], np.dtype[np.bool]]
+NDArray1f: TypeAlias = np.ndarray[tuple[int], np.dtype[np.float64]]
+NDArray2f: TypeAlias = np.ndarray[tuple[int, int], np.dtype[np.float64]]
+NDArray3f: TypeAlias = np.ndarray[tuple[int, int, int], np.dtype[np.float64]]
+
 
 class SimulationRunInfo(BaseModel):
     biosim_sim_run: BiosimSimulationRun
@@ -17,7 +22,8 @@ class SimulationRunInfo(BaseModel):
 class GenerateStatisticsInput(BaseModel):
     sim_run_info_list: list[SimulationRunInfo]
     include_outputs: bool
-    abs_tol: float
+    abs_tol_min: float
+    abs_tol_scale: float
     rel_tol: float
 
 
@@ -33,7 +39,7 @@ class ComparisonStatistics(BaseModel):
     simulator_version_i: str  # version of simulator used for run i <simulator_name>:<version>
     simulator_version_j: str  # version of simulator used for run j <simulator_name>:<version>
     var_names: list[str]
-    mse: Optional[list[float]] = None
+    score: Optional[list[float]] = None
     is_close: Optional[list[bool]] = None
     error_message: Optional[str] = None
 
@@ -125,14 +131,16 @@ async def generate_statistics(gen_stats_input: GenerateStatisticsInput) -> Gener
                     continue
 
                 # compute statistics
-                is_close, mse = await calc_stats(a=array_i, b=array_j,
-                                                 rel_tol=gen_stats_input.rel_tol, abs_tol=gen_stats_input.abs_tol)
-                is_close_list: list[bool] = is_close.tolist()  # type: ignore
+                is_close, score = calc_stats(arr1=array_i, arr2=array_j, rel_tol=gen_stats_input.rel_tol,
+                                             abs_tol_min=gen_stats_input.abs_tol_min,
+                                             atol_scale=gen_stats_input.abs_tol_scale)
+                is_close_list: list[bool] = is_close.tolist()
+                score_list: list[float] = score.tolist()
 
                 activity.logger.info(
-                    f"Comparing {simulation_version_j}:run={run_id_i} and {simulation_version_j}:run={run_id_j} for dataset {dataset_name} MSE: {mse} is_close: {is_close}")
+                    f"Comparing {simulation_version_j}:run={run_id_i} and {simulation_version_j}:run={run_id_j} for dataset {dataset_name} score: {score} is_close: {is_close}")
 
-                stats_i_j.mse = mse.tolist()  # type: ignore # convert to list for serialization
+                stats_i_j.score = score_list
                 stats_i_j.is_close = is_close_list
                 stats_i_j.error_message = None
                 ds_comparison_i.append(stats_i_j)
@@ -147,10 +155,42 @@ async def generate_statistics(gen_stats_input: GenerateStatisticsInput) -> Gener
     return gen_stats_output
 
 
-async def calc_stats(a: NDArray[np.float64], b: NDArray[np.float64],
-                     rel_tol: float, abs_tol: float) -> tuple[NDArray[np.bool], NDArray[np.float64]]:
-    mse = np.mean((a - b) ** 2, axis=1)
-    is_close_a_b: NDArray[np.bool] = np.isclose(a, b, rtol=rel_tol, atol=abs_tol, equal_nan=True).all(axis=1)  # type: ignore
-    is_close_b_a: NDArray[np.bool] = np.isclose(a, b, rtol=rel_tol, atol=abs_tol, equal_nan=True).all(axis=1)  # type: ignore
-    is_close = np.logical_and(is_close_a_b, is_close_b_a)
-    return is_close, mse
+def calc_stats(arr1: NDArray[np.float64], arr2: NDArray[np.float64],
+                     rel_tol: float, abs_tol_min: float, atol_scale: float) -> tuple[NDArray1b, NDArray1f]:
+    """
+    Calculate the statistics for comparing two arrays.
+
+    computes the same function as hdf5_compare.compare_arrays:
+
+       atol = np.nanmax([atol_min, max1*atol_scale, max2*atol_scale])
+       score = np.nanmax(abs(arr1 - arr2) / (atol + rtol * abs(arr2)))
+       close = np.allclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=False)
+
+    but as vectors to retain the score and is_close for each variable
+    """
+    assert arr1.shape == arr2.shape
+    assert len(arr1.shape) == 2
+
+    # atol = np.nanmax([atol_min, max1*atol_scale, max2*atol_scale])
+    #
+    #   using temporary 3D array to hold the absolute tolerance arrays to get an element-wise max)
+    # ... there is probably a simpler way to do this with numpy while still avoiding loops
+    max1 = np.nanmax(a=arr1, axis=1)  # shape=(arr1.shape[0],) - max value for each variable in arr1
+    max2 = np.nanmax(a=arr2, axis=1)  # shape=(arr2.shape[0],) - max value for each variable in arr2
+    abs_tol_arrays = np.zeros((3, arr1.shape[0]))
+    abs_tol_arrays[0, :] = np.multiply(np.ones(shape=arr1.shape[0]), abs_tol_min)
+    abs_tol_arrays[1, :] = np.multiply(max1, atol_scale)
+    abs_tol_arrays[2, :] = np.multiply(max2, atol_scale)
+    atol_array = np.max(abs_tol_arrays, axis=0)
+
+    # score = np.nanmax(abs(arr1 - arr2) / (atol + rtol * abs(arr2)))
+    rel_tol_array = np.multiply(np.ones(shape=arr1.shape[0]), rel_tol)  # shape=(arr1.shape[0],)
+    numerator = np.abs(arr1 - arr2)
+    scaled_arr2 = np.multiply(rel_tol_array[:, np.newaxis], np.abs(arr2))
+    denominator = np.add(atol_array[:,np.newaxis], scaled_arr2)
+    score: NDArray1f = np.nanmax(a=np.divide(numerator, denominator), axis=1)
+
+    # close = np.allclose(arr1, arr2, rtol=rel_tol, atol=atol, equal_nan=False)
+    is_close: NDArray1b = np.less(score, 1.0)  # type: ignore
+    assert len(is_close.shape) == 1 and len(score.shape) == 1 and is_close.shape == score.shape
+    return is_close, score
diff --git a/biosim_server/workflows/verify/hdf5_compare.py b/biosim_server/workflows/verify/hdf5_compare.py
@@ -35,15 +35,15 @@ def get_results(results_zip_file: Path) -> dict[str, dict[str, NDArray[np.float6
     return results
 
 
-def compare_arrays(arr1: NDArray[np.float64], arr2: NDArray[np.float64]) -> tuple[bool, float]:
+def compare_arrays(arr1: NDArray[np.float64], arr2: NDArray[np.float64],
+                   rtol: float = 1e-4, atol_min: float = 1e-3, atol_scale: float = 1e-5) -> tuple[bool, float]:
     # np.seterr(divide='raise')
     if type(arr1[0]) == np.float64:
         if np.isnan(arr1).any() or np.isnan(arr2).any():
             return False, 1e10
         max1 = np.nanmax(arr1)
         max2 = np.nanmax(arr2)
-        atol = np.nanmax([1e-3, max1*1e-5, max2*1e-5])
-        rtol = 1e-4
+        atol = np.nanmax([atol_min, max1*atol_scale, max2*atol_scale])
         try:
             score = np.nanmax(abs(arr1 - arr2) / (atol + rtol * abs(arr2)))
         except FloatingPointError as e:

diff --git a/biosim_server/workflows/verify/omex_verify_workflow.py b/biosim_server/workflows/verify/omex_verify_workflow.py
@@ -31,7 +31,8 @@ class OmexVerifyWorkflowInput(BaseModel):
     requested_simulators: list[BiosimSimulatorSpec]
     include_outputs: bool
     rel_tol: float
-    abs_tol: float
+    abs_tol_min: float
+    abs_tol_scale: float
     observables: Optional[list[str]] = None
 
 
@@ -100,7 +101,8 @@ async def run(self, verify_input: OmexVerifyWorkflowInput) -> OmexVerifyWorkflow
         generate_statistics_output: GenerateStatisticsOutput = await workflow.execute_activity(
             generate_statistics,
             arg=GenerateStatisticsInput(sim_run_info_list=run_data, include_outputs=verify_input.include_outputs,
-                                        abs_tol=verify_input.abs_tol, rel_tol=verify_input.rel_tol),
+                                        abs_tol_min=verify_input.abs_tol_min, abs_tol_scale=verify_input.abs_tol_scale,
+                                        rel_tol=verify_input.rel_tol),
             start_to_close_timeout=timedelta(minutes=10),
             retry_policy=RetryPolicy(maximum_attempts=100, backoff_coefficient=2.0,
                                      maximum_interval=timedelta(seconds=10)), )

diff --git a/biosim_server/workflows/verify/runs_verify_workflow.py b/biosim_server/workflows/verify/runs_verify_workflow.py
@@ -5,9 +5,7 @@
 
 from pydantic import BaseModel
 from temporalio import workflow
-from temporalio.client import WorkflowFailureError
 from temporalio.common import RetryPolicy
-from temporalio.exceptions import ActivityError
 
 from biosim_server.common.biosim1_client import BiosimSimulationRun, BiosimSimulatorSpec, HDF5File, \
     BiosimSimulationRunStatus
@@ -34,7 +32,8 @@ class RunsVerifyWorkflowInput(BaseModel):
     biosimulations_run_ids: list[str]
     include_outputs: bool
     rel_tol: float
-    abs_tol: float
+    abs_tol_min: float
+    abs_tol_scale: float
     observables: Optional[list[str]] = None
 
 
@@ -104,7 +103,9 @@ async def run(self, verify_input: RunsVerifyWorkflowInput) -> RunsVerifyWorkflow
 
         generate_statistics_input = GenerateStatisticsInput(sim_run_info_list=run_data,
                                                             include_outputs=self.verify_input.include_outputs,
-                                                            abs_tol=self.verify_input.abs_tol, rel_tol=self.verify_input.rel_tol)
+                                                            abs_tol_min=self.verify_input.abs_tol_min,
+                                                            abs_tol_scale=self.verify_input.abs_tol_scale,
+                                                            rel_tol=self.verify_input.rel_tol)
         # Generate comparison report
         generate_statistics_output: GenerateStatisticsOutput = await workflow.execute_activity(
             generate_statistics,

diff --git a/biosim_server/workflows/verify/trigger_verify_workflow.py b/biosim_server/workflows/verify/trigger_verify_workflow.py
@@ -20,8 +20,9 @@ async def start_workflow() -> None:
             requested_simulators=[BiosimSimulatorSpec(simulator="vcell", version="latest"),
                                   BiosimSimulatorSpec(simulator="copasi", version="latest")],
             include_outputs=True,
-            rel_tol=1e-6,
-            abs_tol=1e-9,
+            rel_tol=1e-4,
+            abs_tol_min=1e-3,
+            abs_tol_scale=1e-5,
             observables=["time", "concentration"]
         )],
         task_queue="verification_tasks",