From 14d2503ced2319fbafc5f6de7a2acb819febfb88 Mon Sep 17 00:00:00 2001 From: ppegolo Date: Thu, 19 Sep 2024 20:19:26 +0200 Subject: [PATCH] Refactoring --- sportran/current/current.py | 444 +++++++++++++++++++++-------- sportran/current/units/electric.py | 22 +- sportran/md/maxlike.py | 21 +- 3 files changed, 354 insertions(+), 133 deletions(-) diff --git a/sportran/current/current.py b/sportran/current/current.py index 3b71f6c..6e3fd04 100644 --- a/sportran/current/current.py +++ b/sportran/current/current.py @@ -487,6 +487,8 @@ def bayesian_analysis( with open("bayesian_analysis_{}".format(n_parameters), "w+") as g: g.write("{}\t{}\n".format(self.offdiag, self.offdiag_std)) + #################################################################################### + # MAXLIKE methods def maxlike_estimate( self, model, @@ -495,23 +497,18 @@ def maxlike_estimate( likelihood="wishart", solver="BFGS", guess_runave_window=50, - minimize_kwargs={}, + minimize_kwargs=None, ): + """ + Perform maximum likelihood estimation and optionally select the optimal number of parameters using AIC. + """ + minimize_kwargs = minimize_kwargs or {} - if likelihood.lower() == "wishart": - data = self.cospectrum.real * self.N_CURRENTS - elif likelihood.lower() == "chisquare" or likelihood.lower() == "chisquared": - data = self.psd * self.N_CURRENTS - elif likelihood == "variancegamma" or likelihood == "variance-gamma": - data = self.cospectrum.real[0, 1] # * self.N_CURRENTS - else: - raise ValueError( - "Likelihood must be Wishart, Chi-square, or Variance-Gamma." - ) + # Get the appropriate data based on likelihood type + data = self._get_data_by_likelihood(likelihood) # Define MaxLikeFilter object _maxlike = MaxLikeFilter( - # self.maxlike = MaxLikeFilter( data=data, model=model, n_components=self.N_EQUIV_COMPONENTS, @@ -521,125 +518,171 @@ def maxlike_estimate( ) if isinstance(n_parameters, int): - do_AIC = False - # Minimize the negative log-likelihood with a fixed number of parameters - # self.maxlike.maxlike( - _maxlike.maxlike( - mask=mask, - guess_runave_window=guess_runave_window, - n_parameters=n_parameters, - minimize_kwargs=minimize_kwargs, + self._run_maxlike_fixed_parameters( + _maxlike, n_parameters, mask, guess_runave_window, minimize_kwargs + ) + else: + n_parameters = self._prepare_n_parameters(n_parameters) + self._run_maxlike_aic( + _maxlike, + n_parameters, + mask, + guess_runave_window, + minimize_kwargs, + data=data, # TODO find a simple way to avoid this + ) + + # Extract results and scale the matrices + self._extract_and_scale_results(_maxlike, model, likelihood) + + def _get_data_by_likelihood(self, likelihood): + """ + Get the data to be used for the likelihood estimation based on the provided likelihood type. + """ + likelihood = likelihood.lower() + if likelihood == "wishart": + return self.cospectrum.real * self.N_CURRENTS + elif likelihood in ["chisquare", "chisquared"]: + return self.psd * self.N_CURRENTS + elif likelihood in ["variancegamma", "variance-gamma"]: + return self.cospectrum.real[0, 1] # * self.N_CURRENTS + else: + raise ValueError( + "Likelihood must be Wishart, Chi-square, or Variance-Gamma." ) - elif isinstance(n_parameters, list) or isinstance(n_parameters, np.ndarray): - do_AIC = True + def _run_maxlike_fixed_parameters( + self, maxlike_filter, n_parameters, mask, guess_runave_window, minimize_kwargs + ): + """ + Run maximum likelihood estimation with a fixed number of parameters. + """ + maxlike_filter.maxlike( + mask=mask, + guess_runave_window=guess_runave_window, + n_parameters=n_parameters, + minimize_kwargs=minimize_kwargs, + ) + self.maxlike = maxlike_filter + + def _prepare_n_parameters(self, n_parameters): + """ + Prepare the number of parameters array based on input. + """ + if isinstance(n_parameters, (list, np.ndarray)): assert np.issubdtype( np.asarray(n_parameters).dtype, np.integer ), "`n_parameter` must be an integer array-like" log.write_log( f"Optimal number of parameters between {np.min(n_parameters)} and {np.max(n_parameters)} chosen by AIC" ) - elif isinstance(n_parameters, str) and n_parameters.lower() == "aic": - do_AIC = True n_parameters = np.arange(3, 40) log.write_log("Optimal number of parameters between 3 and 40 chosen by AIC") + return n_parameters - if do_AIC: - # Minimize the negative log-likelihood on a range of parameters and choose the best one with the AIC - _aic = [] - _filters = [] - # for n_par in n_parameters: - n_par = n_parameters[0] - convergence_is_reached = False - _aic_max = -np.inf - while n_par <= n_parameters[-1] and not convergence_is_reached: - log.write_log(f"n_parameters = {n_par}") - # self.maxlike.maxlike( - _maxlike.maxlike( - data=data, # FIXME: needs to be passed because maxlike.data has permuted dimensions - mask=mask, - guess_runave_window=guess_runave_window, - n_parameters=int(n_par), - omega_fixed=None, - write_log=False, - minimize_kwargs=minimize_kwargs, - ) + def _run_maxlike_aic( + self, + maxlike_filter, + n_parameters, + mask, + guess_runave_window, + minimize_kwargs, + data=None, + ): + """ + Run maximum likelihood estimation over a range of parameters and choose the best one with AIC. + """ + _aic = [] + _aic_max = -np.inf + _steps_since_last_aic_update = 0 + convergence_is_reached = False + + for n_par in n_parameters: + if convergence_is_reached: + break + log.write_log(f"n_parameters = {n_par}") + maxlike_filter.maxlike( + data=data, + omega_fixed=None, + mask=mask, + guess_runave_window=guess_runave_window, + n_parameters=int(n_par), + minimize_kwargs=minimize_kwargs, + ) - # _new_aic = self.maxlike.log_likelihood_value - n_par - _new_aic = _maxlike.log_likelihood_value - n_par - _aic.append(_new_aic) - if np.max(_aic) > _aic_max: - _aic_max = np.max(_aic) - self.optimal_nparameters = n_par - self.maxlike = deepcopy(_maxlike) - _steps_since_last_aic_update = 0 - else: - _steps_since_last_aic_update += 1 - if _steps_since_last_aic_update > 5: - convergence_is_reached = True - - # _filters.append(deepcopy(self.maxlike)) - n_par += 1 - print( - "aic:", - _new_aic, - "; Steps since last aic update:", - _steps_since_last_aic_update, - flush=True, - ) - # self.optimal_nparameters = n_parameters[np.argmax(_aic)] - # self.maxlike = _filters[np.argmax(_aic)] - self.aic_values = np.array(_aic) - del _filters - del _aic - else: - self.maxlike = _maxlike + _new_aic = maxlike_filter.log_likelihood_value - n_par + _aic.append(_new_aic) - omega_fixed = self.maxlike.omega_fixed - params = self.maxlike.parameters_mean - params_std = self.maxlike.parameters_std + if _new_aic > _aic_max: + _aic_max = _new_aic + self.optimal_nparameters = n_par + self.maxlike = deepcopy(maxlike_filter) + _steps_since_last_aic_update = 0 + else: + _steps_since_last_aic_update += 1 + + if _steps_since_last_aic_update > 5: + convergence_is_reached = True + + print( + "aic:", + _new_aic, + "; Steps since last aic update:", + _steps_since_last_aic_update, + flush=True, + ) + + self.aic_values = np.array(_aic) + + def _extract_and_scale_results(self, maxlike_filter, model, likelihood): + """ + Extract results from maxlike_filter and scale the matrices accordingly. + """ + omega_fixed = maxlike_filter.omega_fixed + params = maxlike_filter.parameters_mean + params_std = maxlike_filter.parameters_std + om = maxlike_filter.omega - om = self.maxlike.omega if likelihood == "wishart": - # nw = params.shape[0]//2 - # re_NLL_spline = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params[:nw])(x))) - # im_NLL_spline = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params[nw:])(x))) - # self.NLL_mean = re_NLL_spline(om) + 1j*im_NLL_spline(om) - # _spl = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params)(x))) - # self.NLL_mean = _spl(om) - from sportran.md.maxlike import scale_matrix + from sportran.md.maxlike import scale_matrix, scale_matrix_std_mc self.NLL_mean = ( scale_matrix(model, params, om, omega_fixed, self.N_CURRENTS) - * self.N_EQUIV_COMPONENTS + # * self.N_EQUIV_COMPONENTS / self.N_CURRENTS ) - try: - # _NLL_spline_upper = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params + params_std)(x))) - # _NLL_spline_lower = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params - params_std)(x))) - # re_NLL_spline_upper = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params[:nw] + params_std[:nw])(x))) - # re_NLL_spline_lower = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params[:nw] - params_std[:nw])(x))) - # im_NLL_spline_upper = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params[nw:] + params_std[nw:])(x))) - # im_NLL_spline_lower = lambda x: np.einsum('wba,wbc->wac', *(lambda y: (y, y))(model(omega_fixed, params[nw:] - params_std[nw:])(x))) - # self.NLL_upper = re_NLL_spline_upper(om) + 1j*im_NLL_spline_upper(om) - # self.NLL_lower = re_NLL_spline_lower(om) + 1j*im_NLL_spline_lower(om) - self.NLL_upper = ( - scale_matrix( - model, params + params_std, om, omega_fixed, self.N_CURRENTS - ) - * self.N_EQUIV_COMPONENTS - / self.N_CURRENTS - ) - self.NLL_lower = ( - scale_matrix( - model, params - params_std, om, omega_fixed, self.N_CURRENTS - ) - * self.N_EQUIV_COMPONENTS - / self.N_CURRENTS + + self.NLL_std = ( + scale_matrix_std_mc( + model, + params, + om, + omega_fixed, + self.N_CURRENTS, + maxlike_filter.parameters_cov, + size=1000, ) - except TypeError: - pass + # * self.N_EQUIV_COMPONENTS + / self.N_CURRENTS + ) + # try: + # self.NLL_upper = ( + # scale_matrix( + # model, params + params_std, om, omega_fixed, self.N_CURRENTS + # ) + # * self.N_EQUIV_COMPONENTS + # / self.N_CURRENTS + # ) + # self.NLL_lower = ( + # scale_matrix( + # model, params - params_std, om, omega_fixed, self.N_CURRENTS + # ) + # * self.N_EQUIV_COMPONENTS + # / self.N_CURRENTS + # ) + # except TypeError: + # pass else: _NLL_spline = model(omega_fixed, params) fact = np.sqrt(self.N_EQUIV_COMPONENTS) / self.N_CURRENTS / np.sqrt(2) @@ -652,21 +695,174 @@ def maxlike_estimate( except TypeError: pass - # self.estimate = self.maxlike.parameters_mean[0]*self.maxlike.factor - - # try: - # self.estimate_std = self.bayes.parameters_std[0]*self.maxlike.factor - # except: - # self.estimate_std = None - - # self.maxlike_log = \ - # '-----------------------------------------------------\n' +\ - # ' MAXIMUM LIKELIHOOD PARAMETER ESTIMATION\n' +\ - # '-----------------------------------------------------\n' - # self.maxlike_log += \ - # ' L_01 = {:18f} +/- {:10f}\n'.format(self.estimate, self.estimate_std) +\ - # '-----------------------------------------------------\n' - # log.write_log(self.maxlike_log) + # def maxlike_estimate( + # self, + # model, + # n_parameters="AIC", + # mask=None, + # likelihood="wishart", + # solver="BFGS", + # guess_runave_window=50, + # minimize_kwargs={}, + # ): + + # if likelihood.lower() == "wishart": + # data = self.cospectrum.real * self.N_CURRENTS + # elif likelihood.lower() == "chisquare" or likelihood.lower() == "chisquared": + # data = self.psd * self.N_CURRENTS + # elif likelihood == "variancegamma" or likelihood == "variance-gamma": + # data = self.cospectrum.real[0, 1] # * self.N_CURRENTS + # else: + # raise ValueError( + # "Likelihood must be Wishart, Chi-square, or Variance-Gamma." + # ) + + # # Define MaxLikeFilter object + # _maxlike = MaxLikeFilter( + # # self.maxlike = MaxLikeFilter( + # data=data, + # model=model, + # n_components=self.N_EQUIV_COMPONENTS, + # n_currents=self.N_CURRENTS, + # likelihood=likelihood, + # solver=solver, + # ) + + # if isinstance(n_parameters, int): + # do_AIC = False + # # Minimize the negative log-likelihood with a fixed number of parameters + # # self.maxlike.maxlike( + # _maxlike.maxlike( + # mask=mask, + # guess_runave_window=guess_runave_window, + # n_parameters=n_parameters, + # minimize_kwargs=minimize_kwargs, + # ) + + # elif isinstance(n_parameters, list) or isinstance(n_parameters, np.ndarray): + # do_AIC = True + # assert np.issubdtype( + # np.asarray(n_parameters).dtype, np.integer + # ), "`n_parameter` must be an integer array-like" + # log.write_log( + # f"Optimal number of parameters between {np.min(n_parameters)} and {np.max(n_parameters)} chosen by AIC" + # ) + + # elif isinstance(n_parameters, str) and n_parameters.lower() == "aic": + # do_AIC = True + # n_parameters = np.arange(3, 40) + # log.write_log("Optimal number of parameters between 3 and 40 chosen by AIC") + + # if do_AIC: + # # Minimize the negative log-likelihood on a range of parameters and choose the best one with the AIC + # _aic = [] + # _filters = [] + # # for n_par in n_parameters: + # n_par = n_parameters[0] + # convergence_is_reached = False + # _aic_max = -np.inf + # while n_par <= n_parameters[-1] and not convergence_is_reached: + # log.write_log(f"n_parameters = {n_par}") + # # self.maxlike.maxlike( + # _maxlike.maxlike( + # data=data, # FIXME: needs to be passed because maxlike.data has permuted dimensions + # mask=mask, + # guess_runave_window=guess_runave_window, + # n_parameters=int(n_par), + # omega_fixed=None, + # write_log=False, + # minimize_kwargs=minimize_kwargs, + # ) + + # # _new_aic = self.maxlike.log_likelihood_value - n_par + # _new_aic = _maxlike.log_likelihood_value - n_par + # _aic.append(_new_aic) + # if np.max(_aic) > _aic_max: + # _aic_max = np.max(_aic) + # self.optimal_nparameters = n_par + # self.maxlike = deepcopy(_maxlike) + # _steps_since_last_aic_update = 0 + # else: + # _steps_since_last_aic_update += 1 + # if _steps_since_last_aic_update > 5: + # convergence_is_reached = True + + # # _filters.append(deepcopy(self.maxlike)) + # n_par += 1 + # print( + # "aic:", + # _new_aic, + # "; Steps since last aic update:", + # _steps_since_last_aic_update, + # flush=True, + # ) + # # self.optimal_nparameters = n_parameters[np.argmax(_aic)] + # # self.maxlike = _filters[np.argmax(_aic)] + # self.aic_values = np.array(_aic) + # del _filters + # del _aic + # else: + # self.maxlike = _maxlike + + # omega_fixed = self.maxlike.omega_fixed + # params = self.maxlike.parameters_mean + # params_std = self.maxlike.parameters_std + + # om = self.maxlike.omega + # if likelihood == "wishart": + # from sportran.md.maxlike import scale_matrix + + # self.NLL_mean = ( + # scale_matrix(model, params, om, omega_fixed, self.N_CURRENTS) + # * self.N_EQUIV_COMPONENTS + # / self.N_CURRENTS + # ) + # try: + # self.NLL_upper = ( + # scale_matrix( + # model, params + params_std, om, omega_fixed, self.N_CURRENTS + # ) + # * self.N_EQUIV_COMPONENTS + # / self.N_CURRENTS + # ) + # self.NLL_lower = ( + # scale_matrix( + # model, params - params_std, om, omega_fixed, self.N_CURRENTS + # ) + # * self.N_EQUIV_COMPONENTS + # / self.N_CURRENTS + # ) + # except TypeError: + # pass + # else: + # _NLL_spline = model(omega_fixed, params) + # fact = np.sqrt(self.N_EQUIV_COMPONENTS) / self.N_CURRENTS / np.sqrt(2) + # self.NLL_mean = _NLL_spline(om) * fact + # try: + # _NLL_spline_upper = model(omega_fixed, params + params_std) + # _NLL_spline_lower = model(omega_fixed, params - params_std) + # self.NLL_upper = _NLL_spline_upper(om) * fact + # self.NLL_lower = _NLL_spline_lower(om) * fact + # except TypeError: + # pass + + # # self.estimate = self.maxlike.parameters_mean[0]*self.maxlike.factor + + # # try: + # # self.estimate_std = self.bayes.parameters_std[0]*self.maxlike.factor + # # except: + # # self.estimate_std = None + + # # self.maxlike_log = \ + # # '-----------------------------------------------------\n' +\ + # # ' MAXIMUM LIKELIHOOD PARAMETER ESTIMATION\n' +\ + # # '-----------------------------------------------------\n' + # # self.maxlike_log += \ + # # ' L_01 = {:18f} +/- {:10f}\n'.format(self.estimate, self.estimate_std) +\ + # # '-----------------------------------------------------\n' + # # log.write_log(self.maxlike_log) + + ################################################################################################################################################ def cepstral_analysis( self, aic_type="aic", aic_Kmin_corrfactor=1.0, manual_cutoffK=None diff --git a/sportran/current/units/electric.py b/sportran/current/units/electric.py index 12e62f6..16b00f6 100644 --- a/sportran/current/units/electric.py +++ b/sportran/current/units/electric.py @@ -20,7 +20,7 @@ def scale_kappa_real(TEMPERATURE, VOLUME): VOLUME cell VOLUME [A^3] Input current is in units of electrons_charge * Angstrom/femtosecond. Current is EXTENSIVE. """ - return constants.charge**2 / TEMPERATURE / constants.kB / VOLUME * 10000. * 1.0e6 + return constants.charge**2 / TEMPERATURE / constants.kB / VOLUME * 10000.0 * 1.0e6 def scale_kappa_metal(TEMPERATURE, VOLUME): @@ -31,7 +31,7 @@ def scale_kappa_metal(TEMPERATURE, VOLUME): VOLUME cell VOLUME [A^3] Input current is in units of electrons_charge * Angstrom/picosecond. Current is EXTENSIVE. """ - return constants.charge**2 / TEMPERATURE / constants.kB / VOLUME * 10000. + return constants.charge**2 / TEMPERATURE / constants.kB / VOLUME * 10000.0 def scale_kappa_qepw(TEMPERATURE, VOLUME): @@ -45,7 +45,14 @@ def scale_kappa_qepw(TEMPERATURE, VOLUME): tau_{a.u.} = 4.8378 10^{-17} s """ - return constants.charge**2 / TEMPERATURE / constants.kB / VOLUME * 10000. * constants.J_PWtoMETAL**2 + return ( + constants.charge**2 + / TEMPERATURE + / constants.kB + / VOLUME + * 10000.0 + * constants.J_PWtoMETAL**2 + ) def scale_kappa_gpumd(TEMPERATURE, VOLUME): @@ -57,4 +64,11 @@ def scale_kappa_gpumd(TEMPERATURE, VOLUME): TEMPERATURE [K] VOLUME cell VOLUME [A^3] """ - return constants.charge**3 / TEMPERATURE / constants.massunit / constants.kB / VOLUME * 1.0e8 + return ( + constants.charge**3 + / TEMPERATURE + / constants.massunit + / constants.kB + / VOLUME + * 1.0e8 + ) diff --git a/sportran/md/maxlike.py b/sportran/md/maxlike.py index 7d0f5c6..dfbb4f9 100644 --- a/sportran/md/maxlike.py +++ b/sportran/md/maxlike.py @@ -147,9 +147,6 @@ def maxlike( self.data, self.data.shape.index(max(self.data.shape)), 0 ) - print("data", self.data.shape) - print("guess", guess_data.shape) - # Perform optimization self._optimize_parameters(guess_data, minimize_kwargs, write_log) @@ -182,8 +179,10 @@ def _update_parameters( self.n_components = n_components if n_currents is not None: self.n_currents = n_currents - if omega_fixed is not None: - self.omega_fixed = omega_fixed + # if omega_fixed is not None: + self.omega_fixed = omega_fixed + # else: + # self.omega_fixed = None def _prepare_data(self, mask): """ @@ -264,6 +263,7 @@ def _store_optimization_results(self, res, write_log): "Covariance matrix estimated through Laplace approximation." ) ) + self.parameters_cov = cov self.parameters_std = np.sqrt(cov.diagonal()) else: if write_log: @@ -457,6 +457,17 @@ def scale_matrix(model, w, omega, omega_fixed, n): return S +def scale_matrix_std_mc(model, w, omega, omega_fixed, n, cov_w, size=1000): + sample = w + np.random.multivariate_normal( + mean=np.zeros_like(w), cov=cov_w, size=size + ) + sample_S = np.stack( + [scale_matrix(model, ww, omega, omega_fixed, 2) for ww in sample] + ) + S_std = sample_S.std(axis=0) + return S_std + + def normalize_parameters(p, guess): mini, maxi = 1.2 * np.abs(guess), -1.2 * np.abs(guess) return (p - mini) / (maxi - mini)