From 32470c2915fae928b1c72d619cbd8cb13faf9234 Mon Sep 17 00:00:00 2001
From: JoepVanlier <joep.vanlier@gmail.com>
Date: Wed, 25 Oct 2023 11:26:42 +0200
Subject: [PATCH] calibration: split out fitting function

---
 .../power_spectrum_calibration.py             | 174 ++++++++++++------
 1 file changed, 113 insertions(+), 61 deletions(-)

diff --git a/lumicks/pylake/force_calibration/power_spectrum_calibration.py b/lumicks/pylake/force_calibration/power_spectrum_calibration.py
index ce30338e9..f3b7d81a2 100644
--- a/lumicks/pylake/force_calibration/power_spectrum_calibration.py
+++ b/lumicks/pylake/force_calibration/power_spectrum_calibration.py
@@ -176,16 +176,114 @@ def calculate_power_spectrum(
     return power_spectrum
 
 
-def lorentzian_loss(p, model: ScaledModel, power_spectrum: PowerSpectrum):
-    expectation = model(power_spectrum.frequency, *p)
-    gamma = expectation / power_spectrum.num_points_per_block**0.5
+def lorentzian_loss(p, model: ScaledModel, frequency, power, num_points_per_block):
+    expectation = model(frequency, *p)
+    gamma = expectation / num_points_per_block**0.5
     # fmt: off
-    return np.sum(np.log(
-        1 + 0.5 * ((power_spectrum.power - model(power_spectrum.frequency, *p)) / gamma) ** 2
-    ))
+    return np.sum(np.log(1 + 0.5 * ((power - model(frequency, *p)) / gamma) ** 2))
     # fmt: on
 
 
+def _fit_power_spectra(
+    model,
+    frequency,
+    power,
+    num_points_per_block,
+    initial_params,
+    lower_bounds,
+    upper_bounds,
+    ftol,
+    max_function_evals,
+    loss_function,
+):
+    """Fit power spectral data.
+
+    Parameters
+    ----------
+    model : callable
+        Function that takes a list of frequencies and parameters and returns a power spectral
+        density when called.
+    frequency : np.ndarray
+        List of frequencies
+    power : np.ndarray
+        List of powers
+    num_points_per_block: int
+        Number of points per block used to compute the power spectral density.
+    initial_params : np.ndarray
+        Initial guess for the model parameters
+    lower_bounds, upper_bounds : np.ndarray
+        Bounds for the model parameters
+    ftol : float
+        Termination tolerance for the model fit.
+    max_function_evals : int
+        Maximum number of function evaluations during the fit.
+    loss_function : string
+        Loss function to use during fitting. Options: "gaussian" (default), "lorentzian"
+        (robust fitting).
+
+    Returns
+    -------
+    solution_params : np.ndarray
+        Optimized parameter vector
+    perr : np.ndarray
+        Parameter error estimates
+    chi_squared : float
+        Chi-squared value after optimization
+    """
+    # The actual curve fitting process is driven by a set of fit parameters that are of order unity.
+    # This increases the robustness of the fit (see ref. 3). The `ScaledModel` model class takes
+    # care of this parameter rescaling.
+    scaled_model = ScaledModel(lambda f, *params: 1 / model(f, *params), initial_params)
+    lower_bounds = scaled_model.normalize_params(lower_bounds)
+    upper_bounds = scaled_model.normalize_params(upper_bounds)
+
+    # What we *actually* have to minimize, is the chi^2 expression in Eq. 39 of ref. 1. We're
+    # "hacking" `scipy.optimize.curve_fit` for this purpose, and passing in "y" values of "1/ps.power"
+    # instead of just "ps.power", and a model function "1/model(...)" instead of "model(...)". This
+    # effectively transforms the curve fitter's objective function
+    # "np.sum( ((f(xdata, *popt) - ydata) / sigma)**2 )" into the expression in Eq. 39 of ref. 1.
+    sigma = (1 / power) / math.sqrt(num_points_per_block)
+    (solution_params_rescaled, pcov) = scipy.optimize.curve_fit(
+        scaled_model,
+        frequency,
+        1 / power,
+        p0=np.ones(len(initial_params)),
+        sigma=sigma,
+        absolute_sigma=True,
+        method="trf",
+        ftol=ftol,
+        maxfev=max_function_evals,
+        bounds=(lower_bounds, upper_bounds),
+    )
+    solution_params_rescaled = np.abs(solution_params_rescaled)
+    perr = np.sqrt(np.diag(pcov))
+
+    # Undo the scaling
+    solution_params = scaled_model.scale_params(solution_params_rescaled)
+    perr = scaled_model.scale_params(perr)
+
+    # Use the least squares method as a starting point for a robust fit
+    if loss_function == "lorentzian":
+        scaled_model = ScaledModel(lambda f, *p: model(f, *p), solution_params)
+        bounds = [(lower, upper) for lower, upper in zip(lower_bounds, upper_bounds)]
+        minimize_result = scipy.optimize.minimize(
+            lambda p: lorentzian_loss(p, scaled_model, frequency, power, num_points_per_block),
+            np.ones(len(solution_params)),
+            method="L-BFGS-B",
+            bounds=bounds,
+            options={"maxiter": max_function_evals, "ftol": ftol},
+        )
+        solution_params_scaled = np.abs(minimize_result.x)
+        solution_params = scaled_model.scale_params(solution_params_scaled)
+
+        # Return NaN for perr, as scipy.optimize.minimize does not return the covariance matrix
+        perr = np.NaN * np.ones_like(solution_params)
+
+    chi_squared = np.sum(((1 / model(frequency, *solution_params) - 1 / power) / sigma) ** 2)
+
+    return solution_params, perr, chi_squared
+
+
 def fit_power_spectrum(
     power_spectrum,
     model,
@@ -290,66 +388,20 @@ def fit_power_spectrum(
         )
     anl_fit_res = fit_analytical_lorentzian(analytical_power_spectrum)
 
-    initial_params = np.array([anl_fit_res.fc, anl_fit_res.D, *model._filter.initial_values])
-
-    # The actual curve fitting process is driven by a set of fit parameters that are of order unity.
-    # This increases the robustness of the fit (see ref. 3). The `ScaledModel` model class takes
-    # care of this parameter rescaling.
-    scaled_model = ScaledModel(lambda f, *params: 1 / model(f, *params), initial_params)
-    lower_bounds = scaled_model.normalize_params([0.0, 0.0, *model._filter.lower_bounds()])
-    upper_bounds = scaled_model.normalize_params(
-        [np.inf, np.inf, *model._filter.upper_bounds(power_spectrum.sample_rate)]
-    )
-    
-    # What we *actually* have to minimize, is the chi^2 expression in Eq. 39 of ref. 1. We're
-    # "hacking" `scipy.optimize.curve_fit` for this purpose, and passing in "y" values of "1/ps.power"
-    # instead of just "ps.power", and a model function "1/model(...)" instead of "model(...)". This
-    # effectively transforms the curve fitter's objective function
-    # "np.sum( ((f(xdata, *popt) - ydata) / sigma)**2 )" into the expression in Eq. 39 of ref. 1.
-    sigma = (1 / power_spectrum.power) / math.sqrt(power_spectrum.num_points_per_block)
-    (solution_params_rescaled, pcov) = scipy.optimize.curve_fit(
-        scaled_model,
+    solution_params, perr, chi_squared = _fit_power_spectra(
+        model,
         power_spectrum.frequency,
-        1 / power_spectrum.power,
-        p0=np.ones(len(initial_params)),
-        sigma=sigma,
-        absolute_sigma=True,
-        method="trf",
+        power_spectrum.power,
+        power_spectrum.num_points_per_block,
+        initial_params=np.array([anl_fit_res.fc, anl_fit_res.D, *model._filter.initial_values]),
+        lower_bounds=[0.0, 0.0, *model._filter.lower_bounds()],
+        upper_bounds=[np.inf, np.inf, *model._filter.upper_bounds(power_spectrum.sample_rate)],
         ftol=ftol,
-        maxfev=max_function_evals,
-        bounds=(lower_bounds, upper_bounds),
+        max_function_evals=max_function_evals,
+        loss_function=loss_function,
     )
-    solution_params_rescaled = np.abs(solution_params_rescaled)
-    perr = np.sqrt(np.diag(pcov))
-
-    # Undo the scaling
-    solution_params = scaled_model.scale_params(solution_params_rescaled)
-    perr = scaled_model.scale_params(perr)
-
-    # Use the least squares method as a starting point for a robust fit
-    if loss_function == "lorentzian":
-        scaled_model = ScaledModel(
-            lambda f, fc, D, *filter_pars: model(f, fc, D, *filter_pars), solution_params
-        )
-        bounds = [(lower, upper) for lower, upper in zip(lower_bounds, upper_bounds)]
-        minimize_result = scipy.optimize.minimize(
-            lambda p: lorentzian_loss(p, scaled_model, power_spectrum),
-            np.ones(len(solution_params)),
-            method="L-BFGS-B",
-            bounds=bounds,
-            options={"maxiter": max_function_evals, "ftol": ftol},
-        )
-        solution_params_scaled = np.abs(minimize_result.x)
-        solution_params = scaled_model.scale_params(solution_params_scaled)
-
-        # Return NaN for perr, as scipy.optimize.minimize does not return the covariance matrix
-        perr = np.NaN * np.ones_like(solution_params)
 
     # Calculate goodness-of-fit, in terms of the statistical backing (see ref. 1).
-    chi_squared = np.sum(
-        ((1 / model(power_spectrum.frequency, *solution_params) - 1 / power_spectrum.power) / sigma)
-        ** 2
-    )
     n_degrees_of_freedom = power_spectrum.power.size - len(solution_params)
     chi_squared_per_deg = chi_squared / n_degrees_of_freedom
     backing = (1 - scipy.special.gammainc(chi_squared / 2, n_degrees_of_freedom / 2)) * 100