From 5fbc48d40b8c69ccf6fea094de6897f13d6e3c28 Mon Sep 17 00:00:00 2001 From: vloison Date: Thu, 30 May 2024 11:29:22 +0200 Subject: [PATCH 01/25] condense init parameters --- fadin/solver.py | 129 +++++++++++++++++++++++------------------------- 1 file changed, 63 insertions(+), 66 deletions(-) diff --git a/fadin/solver.py b/fadin/solver.py index 18a3a7f..2aa98fd 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -48,28 +48,31 @@ class FaDIn(object): kernel : `str` or `callable` Either define a kernel in ``{'raised_cosine' | 'truncated_gaussian' | 'truncated_exponential'}`` or a custom kernel. - - kernel_params_init : `list` of tensor of shape (n_dim, n_dim) - Initial parameters of the kernel. - - baseline_init : `tensor`, shape (n_dim,) - Initial baseline parameters of the intensity of the Hawkes process. - + TODO: ADD MOMENT MATCHING TO FADIN + init: dict default={'strategy': 'random'} + Initialization strategy of the parameters of the Hawkes process. + Must contain key 'strategy' in + ``{'random' | 'moment_matching', 'given'}``. + If 'strategy' is set to 'custom', the dictionary must contain the + following keys: + - 'baseline': `tensor` or `None`, shape (n_dim,): Initial baseline + - 'alpha': `tensor` or `None`, shape (n_dim, n_dim): Initial alpha + - 'kernel': `list` of tensor or `None`: Initial kernel parameters + Note that when one of 'baseline', 'alpha', or 'kernel' is set to None, + the corresponding parameters are initialized randomly. + + TODO: CONDENSE IN MASK PARAMETERS baseline_mask : `tensor` of shape (n_dim,), or `None` Tensor of same shape as the baseline vector, with values in (0, 1). `baseline` coordinates where `baseline_mask` is equal to 0 will stay constant equal to zero and not be optimized. If set to `None`, all coordinates of baseline will be optimized. - - alpha_init : `tensor`, shape (n_dim, n_dim) - Initial alpha parameters of the intensity of the Hawkes process. - alpha_mask : `tensor` of shape (n_dim, n_dim), or `None` Tensor of same shape as the `alpha` tensor, with values in (0, 1). `alpha` coordinates and kernel parameters where `alpha_mask` = 0 will not be optimized. If set to `None`, all coordinates of alpha will be optimized, - and all kernel parameters will be optimized if optimize_kernel=`True`. + and all kernel parameters will be optimized. kernel_length : `float`, `default=1.` Length of kernels in the Hawkes process. @@ -86,10 +89,6 @@ class FaDIn(object): max_iter : `int`, `default=1000` Maximum number of iterations during fit. - optimize_kernel : `boolean`, `default=True` - If optimize_kernel is false, kernel parameters are not optimized - and only the baseline and alpha are optimized. - precomputations : `boolean`, `default=True` If precomputations is false, pytorch autodiff is applied on the loss. If precomputations is true, then FaDIn is computed. @@ -141,11 +140,11 @@ class FaDIn(object): If no early stopping, `n_iter` is equal to `max_iter`. """ - def __init__(self, n_dim, kernel, kernel_params_init=None, - baseline_init=None, baseline_mask=None, - alpha_init=None, alpha_mask=None, + def __init__(self, n_dim, kernel, init={'strategy': 'random'}, + baseline_mask=None, + alpha_mask=None, kernel_length=1, delta=0.01, optim='RMSprop', - params_optim=dict(), max_iter=2000, optimize_kernel=True, + params_optim=dict(), max_iter=2000, precomputations=True, ztzG_approx=True, device='cpu', log=False, grad_kernel=None, criterion='l2', tol=10e-5, random_state=None): @@ -163,10 +162,11 @@ def __init__(self, n_dim, kernel, kernel_params_init=None, # params model self.n_dim = n_dim - if baseline_init is None: + + if init['strategy'] in ['random', 'moment_matching']: self.baseline = torch.rand(self.n_dim) - else: - self.baseline = baseline_init.float() + elif init['strategy'] == 'given': + self.baseline = init['baseline'].float() if baseline_mask is None: self.baseline_mask = torch.ones([n_dim]) else: @@ -174,10 +174,11 @@ def __init__(self, n_dim, kernel, kernel_params_init=None, "Invalid baseline_mask shape, must be (n_dim,)" self.baseline_mask = baseline_mask self.baseline = (self.baseline * self.baseline_mask).requires_grad_(True) - if alpha_init is None: + + if init['strategy'] in ['random', 'moment_matching']: self.alpha = torch.rand(self.n_dim, self.n_dim) - else: - self.alpha = alpha_init.float() + elif init['strategy'] == 'given': + self.alpha = init['alpha'].float() if alpha_mask is None: self.alpha_mask = torch.ones([self.n_dim, self.n_dim]) else: @@ -186,7 +187,10 @@ def __init__(self, n_dim, kernel, kernel_params_init=None, self.alpha_mask = alpha_mask self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) - if kernel_params_init is None: + if init['strategy'] == 'given': + kernel_params_init = init['kernel'] + + elif init['strategy'] in ['random', 'custom']: kernel_params_init = [] if kernel == 'raised_cosine': temp = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) @@ -217,12 +221,9 @@ def __init__(self, n_dim, kernel, kernel_params_init=None, # Set optimizer self.params_intens = [self.baseline, self.alpha] - self.optimize_kernel = optimize_kernel - - if self.optimize_kernel: - for i in range(self.n_kernel_params): - self.params_intens.append( - kernel_params_init[i].float().clip(1e-4).requires_grad_(True)) + for i in range(self.n_kernel_params): + self.params_intens.append( + kernel_params_init[i].float().clip(1e-4).requires_grad_(True)) self.precomputations = precomputations @@ -302,10 +303,8 @@ def fit(self, events, end_time): self.param_baseline[0] = self.params_intens[0].detach() self.param_alpha[0] = self.params_intens[1].detach() - # If kernel parameters are optimized - if self.optimize_kernel: - for i in range(self.n_kernel_params): - self.param_kernel[i, 0] = self.params_intens[2 + i].detach() + for i in range(self.n_kernel_params): + self.param_kernel[i, 0] = self.params_intens[2 + i].detach() #################################################### start = time.time() @@ -315,16 +314,15 @@ def fit(self, events, end_time): self.opt.zero_grad() if self.precomputations: - if self.optimize_kernel: - # Update kernel - kernel = self.kernel_model.kernel_eval(self.params_intens[2:], - discretization) - # print('kernel', kernel) - grad_theta = self.kernel_model.grad_eval(self.params_intens[2:], - discretization) - else: - kernel = self.kernel_model.kernel_eval(self.kernel_params_fixed, - discretization) + # Compute kernel and gradient + kernel = self.kernel_model.kernel_eval( + self.params_intens[2:], + discretization + ) + grad_theta = self.kernel_model.grad_eval( + self.params_intens[2:], + discretization + ) if self.log: self.v_loss[i] = \ @@ -349,18 +347,20 @@ def fit(self, events, end_time): kernel, self.delta, n_events) - if self.optimize_kernel: - for j in range(self.n_kernel_params): - self.params_intens[2 + j].grad = \ - get_grad_eta(zG, - zN, - ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, - grad_theta[j], - self.delta, - n_events) + # Update kernel + for j in range(self.n_kernel_params): + self.params_intens[2 + j].grad = \ + get_grad_eta( + zG, + zN, + ztzG, + self.params_intens[0], + self.params_intens[1], + kernel, + grad_theta[j], + self.delta, + n_events + ) else: intens = self.kernel_model.intensity_eval(self.params_intens[0], @@ -383,13 +383,10 @@ def fit(self, events, end_time): self.alpha_mask self.param_baseline[i + 1] = self.params_intens[0].detach() self.param_alpha[i + 1] = self.params_intens[1].detach() - - # If kernel parameters are optimized - if self.optimize_kernel: - for j in range(self.n_kernel_params): - self.params_intens[2 + j].data = \ - self.params_intens[2 + j].data.clip(0) - self.param_kernel[j, i + 1] = self.params_intens[2 + j].detach() + for j in range(self.n_kernel_params): + self.params_intens[2 + j].data = \ + self.params_intens[2 + j].data.clip(0) + self.param_kernel[j, i + 1] = self.params_intens[2 + j].detach() # Early stopping if i % 100 == 0: From 895b88caa81e3a27e738be2f5a878683224cb5f7 Mon Sep 17 00:00:00 2001 From: vloison Date: Thu, 30 May 2024 15:07:16 +0200 Subject: [PATCH 02/25] compact parameters initialization in one argument in FaDIn.__init__ --- examples/plot_multivariate_fadin.py | 3 +- experiments/benchmark/run_benchmark_exp.py | 25 +++++-- experiments/benchmark/run_benchmark_rc.py | 10 ++- experiments/benchmark/run_benchmark_tg.py | 10 ++- experiments/benchmark/run_comparison_ll.py | 13 ++-- experiments/benchmark/run_nonparam_exp.py | 9 ++- experiments/benchmark/run_nonparam_rc.py | 10 ++- experiments/benchmark/run_nonparam_tg.py | 10 ++- experiments/example_univariate.py | 12 +-- .../inference_error/run_comp_autodiff.py | 22 +++--- .../inference_error/run_error_discrete_EXP.py | 9 ++- .../run_error_discrete_EXP_m.py | 9 ++- .../inference_error/run_error_discrete_RC.py | 10 ++- .../run_error_discrete_RC_m.py | 10 ++- .../run_error_discrete_TG_m.py | 10 ++- .../run_sensi_analysis_length.py | 9 ++- experiments/inference_error/run_time_dim.py | 9 ++- fadin/solver.py | 30 ++++---- fadin/tests/test_mask.py | 75 +++++++++++-------- 19 files changed, 176 insertions(+), 119 deletions(-) diff --git a/examples/plot_multivariate_fadin.py b/examples/plot_multivariate_fadin.py index e61620d..bef3fb2 100644 --- a/examples/plot_multivariate_fadin.py +++ b/examples/plot_multivariate_fadin.py @@ -35,7 +35,8 @@ discretization = torch.linspace(0, kernel_length, L) ############################################################################### -# Here, we set the parameters of a Hawkes process with a Exponential(1) distributions. +# Here, we set the parameters of a Hawkes process with a Exponential(1) +# distribution. baseline = np.array([.1, .5]) alpha = np.array([[0.6, 0.3], [0.25, 0.7]]) diff --git a/experiments/benchmark/run_benchmark_exp.py b/experiments/benchmark/run_benchmark_exp.py index b5a6ecb..70597d7 100644 --- a/experiments/benchmark/run_benchmark_exp.py +++ b/experiments/benchmark/run_benchmark_exp.py @@ -25,8 +25,13 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): L = int(1 / dt) discretization = torch.linspace(0, 1, L) n_dim = decay.shape[0] - EXP = DiscreteKernelFiniteSupport(dt, n_dim=n_dim, kernel='truncated_exponential', - lower=0, upper=1) + EXP = DiscreteKernelFiniteSupport( + dt, + n_dim=n_dim, + kernel='truncated_exponential', + lower=0, + upper=1 + ) kernel_values = EXP.kernel_eval([torch.Tensor(decay)], discretization) @@ -62,11 +67,14 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): def run_fadin(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(2, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, optimize_kernel=True, precomputations=True, @@ -258,14 +266,17 @@ def run_experiment(baseline, alpha, decay, T, dt, seed=0): alpha_hat = results['param_alpha'] decay_hat = results['param_kernel'][0] - RC = DiscreteKernelFiniteSupport(dt, n_dim=2, kernel='truncated_exponential', + RC = DiscreteKernelFiniteSupport(dt, n_dim=2, + kernel='truncated_exponential', lower=0, upper=1) intens_fadin = RC.intensity_eval(torch.tensor(baseline_hat), torch.tensor(alpha_hat), [torch.Tensor(decay_hat)], events_grid, torch.linspace(0, 1, L)) - res['err_fadin'] = np.absolute(intens.numpy() - intens_fadin.numpy()).mean() + res['err_fadin'] = np.absolute( + intens.numpy() - intens_fadin.numpy() + ).mean() res['time_fadin'] = results['time'] results = run_gibbs(S, size_grid, dt, seed=seed) diff --git a/experiments/benchmark/run_benchmark_rc.py b/experiments/benchmark/run_benchmark_rc.py index 633697d..4e99899 100644 --- a/experiments/benchmark/run_benchmark_rc.py +++ b/experiments/benchmark/run_benchmark_rc.py @@ -63,12 +63,14 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): def run_fadin(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)], + 'baseline': torch.tensor(baseline_init), + 'alpha': torch.tensor(alpha_init) + } solver = FaDIn(2, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, optimize_kernel=True, precomputations=True, diff --git a/experiments/benchmark/run_benchmark_tg.py b/experiments/benchmark/run_benchmark_tg.py index 4e5c97e..9387e0f 100644 --- a/experiments/benchmark/run_benchmark_tg.py +++ b/experiments/benchmark/run_benchmark_tg.py @@ -62,12 +62,14 @@ def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): def run_fadin(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] + } solver = FaDIn(2, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, optimize_kernel=True, precomputations=True, diff --git a/experiments/benchmark/run_comparison_ll.py b/experiments/benchmark/run_comparison_ll.py index c3c893a..e9200d6 100644 --- a/experiments/benchmark/run_comparison_ll.py +++ b/experiments/benchmark/run_comparison_ll.py @@ -56,14 +56,17 @@ def simulate_data(baseline, alpha, kernel_params, def run_solver(criterion, events, kernel_params_init, - baseline_init, alpha_init, T, dt, seed=0, kernel='raised_cosine'): - k_params_init = [torch.tensor(a) for a in kernel_params_init] + baseline_init, alpha_init, T, dt, seed=0, + kernel='raised_cosine'): max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(a) for a in kernel_params_init] + } solver = FaDIn(1, kernel, - k_params_init, - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/benchmark/run_nonparam_exp.py b/experiments/benchmark/run_nonparam_exp.py index 6e001dd..c5a3c25 100644 --- a/experiments/benchmark/run_nonparam_exp.py +++ b/experiments/benchmark/run_nonparam_exp.py @@ -61,11 +61,14 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): # @mem.cache def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(1, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/benchmark/run_nonparam_rc.py b/experiments/benchmark/run_nonparam_rc.py index bef6635..3a5bdc5 100644 --- a/experiments/benchmark/run_nonparam_rc.py +++ b/experiments/benchmark/run_nonparam_rc.py @@ -64,12 +64,14 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): # @mem.cache def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver = FaDIn(1, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/benchmark/run_nonparam_tg.py b/experiments/benchmark/run_nonparam_tg.py index 5147d3c..ad51a1a 100644 --- a/experiments/benchmark/run_nonparam_tg.py +++ b/experiments/benchmark/run_nonparam_tg.py @@ -63,12 +63,14 @@ def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): # @mem.cache def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] + } solver = FaDIn(1, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/example_univariate.py b/experiments/example_univariate.py index 3335b4c..f44727d 100644 --- a/experiments/example_univariate.py +++ b/experiments/example_univariate.py @@ -53,15 +53,17 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, kernel_length, dt, T, seed=0): start = time.time() max_iter = 10000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver = FaDIn(1, "kumaraswamy", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, kernel_length=kernel_length, delta=dt, optim="RMSprop", - max_iter=max_iter, criterion='l2' + max_iter=max_iter, criterion='l2', ) print(time.time() - start) diff --git a/experiments/inference_error/run_comp_autodiff.py b/experiments/inference_error/run_comp_autodiff.py index b8de2fd..30f6c6b 100644 --- a/experiments/inference_error/run_comp_autodiff.py +++ b/experiments/inference_error/run_comp_autodiff.py @@ -58,27 +58,29 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): @mem.cache def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): - max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver_autodiff = FaDIn(1, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, precomputations=False, random_state=0) start_autodiff = time.time() solver_autodiff.fit(events, T) time_autodiff = time.time() - start_autodiff - + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver_FaDIn = FaDIn(1, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, precomputations=True, random_state=0) diff --git a/experiments/inference_error/run_error_discrete_EXP.py b/experiments/inference_error/run_error_discrete_EXP.py index d75f882..731ddfb 100644 --- a/experiments/inference_error/run_error_discrete_EXP.py +++ b/experiments/inference_error/run_error_discrete_EXP.py @@ -60,11 +60,14 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(2, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/inference_error/run_error_discrete_EXP_m.py b/experiments/inference_error/run_error_discrete_EXP_m.py index c6c3fa4..644661b 100644 --- a/experiments/inference_error/run_error_discrete_EXP_m.py +++ b/experiments/inference_error/run_error_discrete_EXP_m.py @@ -66,11 +66,14 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(2, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/inference_error/run_error_discrete_RC.py b/experiments/inference_error/run_error_discrete_RC.py index f1d6a3e..51cc9ab 100644 --- a/experiments/inference_error/run_error_discrete_RC.py +++ b/experiments/inference_error/run_error_discrete_RC.py @@ -62,12 +62,14 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver = FaDIn(2, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, diff --git a/experiments/inference_error/run_error_discrete_RC_m.py b/experiments/inference_error/run_error_discrete_RC_m.py index c0159d1..b6830e2 100644 --- a/experiments/inference_error/run_error_discrete_RC_m.py +++ b/experiments/inference_error/run_error_discrete_RC_m.py @@ -66,12 +66,14 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver = FaDIn(2, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, diff --git a/experiments/inference_error/run_error_discrete_TG_m.py b/experiments/inference_error/run_error_discrete_TG_m.py index 93dc325..4954921 100644 --- a/experiments/inference_error/run_error_discrete_TG_m.py +++ b/experiments/inference_error/run_error_discrete_TG_m.py @@ -62,12 +62,14 @@ def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): start = time.time() max_iter = 2000 + init = { + "baseline": baseline_init, + "alpha": alpha_init, + 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] + } solver = FaDIn(2, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, diff --git a/experiments/inference_error/run_sensi_analysis_length.py b/experiments/inference_error/run_sensi_analysis_length.py index 0105199..5b5cf1a 100644 --- a/experiments/inference_error/run_sensi_analysis_length.py +++ b/experiments/inference_error/run_sensi_analysis_length.py @@ -69,11 +69,14 @@ def run_solver(events, decay_init, baseline_init, alpha_init, kernel_length, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(1, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, kernel_length=kernel_length, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter diff --git a/experiments/inference_error/run_time_dim.py b/experiments/inference_error/run_time_dim.py index 7e1c959..d574e58 100644 --- a/experiments/inference_error/run_time_dim.py +++ b/experiments/inference_error/run_time_dim.py @@ -55,11 +55,14 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 800 n_dim = baseline_init.shape[0] + init = { + "baseline": baseline_init, + "alpha": alpha_init, + "kernel": [decay_init] + } solver = FaDIn(n_dim, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, diff --git a/fadin/solver.py b/fadin/solver.py index 2aa98fd..bf3489b 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -48,13 +48,14 @@ class FaDIn(object): kernel : `str` or `callable` Either define a kernel in ``{'raised_cosine' | 'truncated_gaussian' | 'truncated_exponential'}`` or a custom kernel. - TODO: ADD MOMENT MATCHING TO FADIN - init: dict default={'strategy': 'random'} + + init: `str` or `dict`, default='random' Initialization strategy of the parameters of the Hawkes process. - Must contain key 'strategy' in - ``{'random' | 'moment_matching', 'given'}``. - If 'strategy' is set to 'custom', the dictionary must contain the - following keys: + If set to 'random', the parameters are initialized randomly. + If set to 'moment_matching', the parameters are initialized + using the moment matching method. + Otherwise, the parameters are initialized using the given dictionary, + , which must contain the following keys: - 'baseline': `tensor` or `None`, shape (n_dim,): Initial baseline - 'alpha': `tensor` or `None`, shape (n_dim, n_dim): Initial alpha - 'kernel': `list` of tensor or `None`: Initial kernel parameters @@ -140,7 +141,7 @@ class FaDIn(object): If no early stopping, `n_iter` is equal to `max_iter`. """ - def __init__(self, n_dim, kernel, init={'strategy': 'random'}, + def __init__(self, n_dim, kernel, init='random', baseline_mask=None, alpha_mask=None, kernel_length=1, delta=0.01, optim='RMSprop', @@ -163,9 +164,9 @@ def __init__(self, n_dim, kernel, init={'strategy': 'random'}, # params model self.n_dim = n_dim - if init['strategy'] in ['random', 'moment_matching']: + if init in ['random', 'moment_matching'] or init['baseline'] is None: self.baseline = torch.rand(self.n_dim) - elif init['strategy'] == 'given': + else: self.baseline = init['baseline'].float() if baseline_mask is None: self.baseline_mask = torch.ones([n_dim]) @@ -175,9 +176,9 @@ def __init__(self, n_dim, kernel, init={'strategy': 'random'}, self.baseline_mask = baseline_mask self.baseline = (self.baseline * self.baseline_mask).requires_grad_(True) - if init['strategy'] in ['random', 'moment_matching']: + if init in ['random', 'moment_matching'] or init['alpha'] is None: self.alpha = torch.rand(self.n_dim, self.n_dim) - elif init['strategy'] == 'given': + else: self.alpha = init['alpha'].float() if alpha_mask is None: self.alpha_mask = torch.ones([self.n_dim, self.n_dim]) @@ -187,10 +188,7 @@ def __init__(self, n_dim, kernel, init={'strategy': 'random'}, self.alpha_mask = alpha_mask self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) - if init['strategy'] == 'given': - kernel_params_init = init['kernel'] - - elif init['strategy'] in ['random', 'custom']: + if init in ['random', 'moment_matching'] or init['kernel'] is None: kernel_params_init = [] if kernel == 'raised_cosine': temp = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) @@ -208,6 +206,8 @@ def __init__(self, n_dim, kernel, init={'strategy': 'random'}, else: raise NotImplementedError('kernel initial parameters of not \ implemented kernel have to be given') + elif init['kernel'] is None: + kernel_params_init = init['kernel'] self.kernel_params_fixed = kernel_params_init diff --git a/fadin/tests/test_mask.py b/fadin/tests/test_mask.py index 8185517..e606bff 100644 --- a/fadin/tests/test_mask.py +++ b/fadin/tests/test_mask.py @@ -9,22 +9,22 @@ # %% Function def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, - baseline_mask=None, baseline_init=None, - alpha_mask=None, alpha_init=None, + baseline_mask=None, init='random', alpha_mask=None, random_state=0): - solver = FaDIn(n_dim=n_dim, - kernel=kernel, - baseline_mask=baseline_mask, - baseline_init=baseline_init, - alpha_init=alpha_init, - alpha_mask=alpha_mask, - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - params_optim=params_optim, - max_iter=max_iter, criterion='l2', - ztzG_approx=ztzG_approx, - random_state=random_state - ) + solver = FaDIn( + n_dim=n_dim, + kernel=kernel, + baseline_mask=baseline_mask, + init=init, + alpha_mask=alpha_mask, + kernel_length=kernel_length, + delta=dt, optim="RMSprop", + params_optim=params_optim, + max_iter=max_iter, + criterion='l2', + ztzG_approx=ztzG_approx, + random_state=random_state + ) solver.fit(events, T) estimated_baseline = solver.params_intens[0] estimated_alpha = solver.params_intens[1] @@ -57,8 +57,12 @@ def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, discretization = torch.linspace(0, kernel_length, L) baseline_mask = torch.Tensor([0, 0]) alpha_mask = torch.Tensor([[0, 0], [1, 0]]) -alpha_init = torch.Tensor([[0.2, 0.4], [0.7, 0.9]]) -baseline_init = torch.Tensor([0.7, 0.4]) +init1 = { + 'alpha': torch.Tensor([[0.2, 0.4], [0.7, 0.9]]), + 'baseline': torch.Tensor([0.7, 0.4]), + 'kernel': None +} +init2 = 'random' max_iter = 5000 ztzG_approx = True params_optim = {'lr': 1e-3} @@ -75,14 +79,16 @@ def test_exp_mask(): random_state=simu_random_state) # Fit Hawkes process to exponential simulation - exp_bl, exp_alpha = maskedsolver(kernel='truncated_exponential', - events=events, T=T, - baseline_mask=baseline_mask, - alpha_mask=alpha_mask, - baseline_init=baseline_init, - alpha_init=alpha_init, - ztzG_approx=ztzG_approx, - random_state=simu_random_state) + for init in [init1, init2]: + exp_bl, exp_alpha = maskedsolver( + kernel='truncated_exponential', + events=events, T=T, + baseline_mask=baseline_mask, + alpha_mask=alpha_mask, + init=init, + ztzG_approx=ztzG_approx, + random_state=simu_random_state + ) assert torch.allclose(exp_bl, torch.Tensor([0., 0.])) assert torch.allclose(exp_alpha * torch.Tensor([[1., 1.], [0., 1.]]), torch.zeros(2, 2)) @@ -105,14 +111,17 @@ def test_rc_mask(): random_state=simu_random_state) # %% Fit Hawkes process to raised_cosine simulation - rc_bl, rc_alpha = maskedsolver(kernel='raised_cosine', events=events_rc, - T=T, - baseline_mask=baseline_mask, - alpha_mask=alpha_mask, - baseline_init=baseline_init, - alpha_init=alpha_init, - ztzG_approx=ztzG_approx, - random_state=simu_random_state) + for init in [init1, init2]: + rc_bl, rc_alpha = maskedsolver( + kernel='raised_cosine', + events=events_rc, + T=T, + baseline_mask=baseline_mask, + alpha_mask=alpha_mask, + init=init, + ztzG_approx=ztzG_approx, + random_state=simu_random_state + ) assert torch.allclose(rc_bl, torch.Tensor([0., 0.])) assert torch.allclose(rc_alpha * torch.Tensor([[1., 1.], [0., 1.]]), torch.zeros(2, 2)) From 6448db460db35fd3caf10c9368c87c920b59c78c Mon Sep 17 00:00:00 2001 From: vloison Date: Fri, 31 May 2024 13:39:42 +0200 Subject: [PATCH 03/25] condense optim_mask and add moment_matching in FaDIn init --- experiments/benchmark/run_nonparam_exp.py | 2 +- .../inference_error/run_error_discrete_RC.py | 3 +- .../inference_error/run_error_discrete_TG.py | 13 ++- experiments/meg/sample.py | 37 ++---- fadin/solver.py | 84 ++++++++----- fadin/tests/test_mask.py | 18 ++- fadin/utils/utils.py | 110 ++++++++++++++++++ fadin/utils/utils_meg.py | 2 +- 8 files changed, 195 insertions(+), 74 deletions(-) diff --git a/experiments/benchmark/run_nonparam_exp.py b/experiments/benchmark/run_nonparam_exp.py index c5a3c25..d2d0210 100644 --- a/experiments/benchmark/run_nonparam_exp.py +++ b/experiments/benchmark/run_nonparam_exp.py @@ -76,7 +76,7 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): log=False, random_state=seed, device="cpu", - optimize_kernel=True) + ) results = solver.fit(events, T) return results diff --git a/experiments/inference_error/run_error_discrete_RC.py b/experiments/inference_error/run_error_discrete_RC.py index 51cc9ab..ffdec14 100644 --- a/experiments/inference_error/run_error_discrete_RC.py +++ b/experiments/inference_error/run_error_discrete_RC.py @@ -76,8 +76,7 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see max_iter=max_iter, log=False, random_state=0, - device="cpu", - optimize_kernel=True) + device="cpu") print(time.time() - start) results = solver.fit(events, T) results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), diff --git a/experiments/inference_error/run_error_discrete_TG.py b/experiments/inference_error/run_error_discrete_TG.py index 86891a7..8cb2d7e 100644 --- a/experiments/inference_error/run_error_discrete_TG.py +++ b/experiments/inference_error/run_error_discrete_TG.py @@ -63,20 +63,21 @@ def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] + } solver = FaDIn(2, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, log=False, random_state=0, - device="cpu", - optimize_kernel=True) + device="cpu") print(time.time() - start) results = solver.fit(events, T) diff --git a/experiments/meg/sample.py b/experiments/meg/sample.py index a24ae68..b5bacfe 100644 --- a/experiments/meg/sample.py +++ b/experiments/meg/sample.py @@ -7,16 +7,17 @@ get_atoms_timestamps # Load CDL output -with open("experiments/meg/dict_sample.json", "r") as fp: +with open("experiments/meg/cdl_sample.json", "r") as fp: dict_cdl = json.load(fp) BL_MASK = torch.Tensor([1, 1, 1]) ALPHA_MASK = torch.Tensor([[0, 0, 0], [0, 0, 0], [1, 1, 0]]) +OPTIM_MASK = {'baseline': BL_MASK, 'alpha': ALPHA_MASK} def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, - kernel, baseline_mask, alpha_mask, + kernel, optim_mask, plotfig=False, figtitle=None, savefig=None, **fadin_init): """ @@ -119,7 +120,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # Fit Hawkes process to data solver = FaDIn(n_dim=len(events_acti_tt), kernel=kernel, - baseline_mask=baseline_mask, alpha_mask=alpha_mask, + optim_mask=optim_mask, **fadin_init) solver.fit(events_acti_tt, T) # Return results @@ -141,8 +142,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, filter_interval=0.01, thresh_acti=0.6e-10, kernel='truncated_gaussian', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, + optim_mask=OPTIM_MASK, kernel_length=0.5, delta=0.02, optim='RMSprop', @@ -151,7 +151,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, ztzG_approx=False, figtitle='Masked FaDIn with TG kernels, atom 6 filt and thresh actis', savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_practi.png', - plotfig=True + plotfig=True, ) print('Truncated gaussian, atom 6, with mask') print('Estimated baseline:', tg_atom6_mask[0]) @@ -165,8 +165,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, filter_interval=0.01, thresh_acti=0.6e-10, kernel='truncated_gaussian', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, + optim_mask=OPTIM_MASK, kernel_length=0.5, delta=0.02, optim='RMSprop', @@ -188,8 +187,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, filter_interval=0.01, thresh_acti=0.6e-10, kernel='raised_cosine', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, + optim_mask=OPTIM_MASK, kernel_length=0.5, delta=0.02, optim='RMSprop', @@ -211,8 +209,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, filter_interval=0.01, thresh_acti=0.6e-10, kernel='raised_cosine', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, + optim_mask=OPTIM_MASK, kernel_length=0.5, delta=0.02, optim='RMSprop', @@ -239,8 +236,6 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=0.01, # thresh_acti=0.6e-10, # kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', @@ -263,8 +258,6 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=0.01, # thresh_acti=0.6e-10, # kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', @@ -287,8 +280,6 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=None, # thresh_acti=0., # kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', @@ -306,8 +297,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=None, # thresh_acti=0., # kernel='truncated_gaussian', -# baseline_mask=BL_MASK, -# alpha_mask=ALPHA_MASK, +# optim_mask=OPTIM_MASK, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', @@ -344,8 +334,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=None, # thresh_acti=0., # kernel='truncated_gaussian', -# baseline_mask=BL_MASK, -# alpha_mask=ALPHA_MASK, +# optim_mask=OPTIM_MASK, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', @@ -367,8 +356,6 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=0.01, # thresh_acti=0.6e-10, # kernel='raised_cosine', -# baseline_mask=None, -# alpha_mask=None, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', @@ -390,8 +377,6 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # filter_interval=0.01, # thresh_acti=0.6e-10, # kernel='raised_cosine', -# baseline_mask=None, -# alpha_mask=None, # kernel_length=0.5, # delta=0.02, # optim='RMSprop', diff --git a/fadin/solver.py b/fadin/solver.py index bf3489b..6d36b15 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt import numpy as np -from fadin.utils.utils import optimizer, projected_grid +from fadin.utils.utils import optimizer, projected_grid, momentmatching from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ get_ztzG_approx from fadin.loss_and_gradient import discrete_l2_loss_precomputation, \ @@ -48,7 +48,7 @@ class FaDIn(object): kernel : `str` or `callable` Either define a kernel in ``{'raised_cosine' | 'truncated_gaussian' | 'truncated_exponential'}`` or a custom kernel. - + TODO: update moment_matching methods with DataFrame inputs init: `str` or `dict`, default='random' Initialization strategy of the parameters of the Hawkes process. If set to 'random', the parameters are initialized randomly. @@ -58,22 +58,25 @@ class FaDIn(object): , which must contain the following keys: - 'baseline': `tensor` or `None`, shape (n_dim,): Initial baseline - 'alpha': `tensor` or `None`, shape (n_dim, n_dim): Initial alpha - - 'kernel': `list` of tensor or `None`: Initial kernel parameters - Note that when one of 'baseline', 'alpha', or 'kernel' is set to None, - the corresponding parameters are initialized randomly. - - TODO: CONDENSE IN MASK PARAMETERS - baseline_mask : `tensor` of shape (n_dim,), or `None` - Tensor of same shape as the baseline vector, with values in (0, 1). - `baseline` coordinates where `baseline_mask` is equal to 0 - will stay constant equal to zero and not be optimized. - If set to `None`, all coordinates of baseline will be optimized. - alpha_mask : `tensor` of shape (n_dim, n_dim), or `None` - Tensor of same shape as the `alpha` tensor, with values in (0, 1). - `alpha` coordinates and kernel parameters where `alpha_mask` = 0 - will not be optimized. - If set to `None`, all coordinates of alpha will be optimized, - and all kernel parameters will be optimized. + - 'kernel': `list` of tensors of shape (n_dim, n_dim) or `None`: + Initial kernel parameters + Note that when at least one of 'baseline', 'alpha', or 'kernel' + is set to None, the corresponding parameters are initialized randomly. + + optim_mask: `dict` of `tensor` or `None`, default=`None`. + Dictionary containing the masks for the optimization of the parameters + of the Hawkes process. If set to `None`, all parameters are optimized. + The dictionary must contain the following keys: + - 'baseline': `tensor` of shape (n_dim,), or `None`. + Tensor of same shape as the baseline vector, with values in (0, 1). + `baseline` coordinates where then tensor is equal to 0 + will not be optimized. If set to `None`, all coordinates of + baseline will be optimized. + - 'alpha': `tensor` of shape (n_dim, n_dim), or `None`. + Tensor of same shape as the `alpha` tensor, with values in (0, 1). + `alpha` coordinates and kernel parameters where `alpha_mask` = 0 + will not be optimized. If set to `None`, all coordinates of alpha + and kernel parameters will be optimized. kernel_length : `float`, `default=1.` Length of kernels in the Hawkes process. @@ -142,8 +145,7 @@ class FaDIn(object): """ def __init__(self, n_dim, kernel, init='random', - baseline_mask=None, - alpha_mask=None, + optim_mask=None, kernel_length=1, delta=0.01, optim='RMSprop', params_optim=dict(), max_iter=2000, precomputations=True, ztzG_approx=True, @@ -160,32 +162,34 @@ def __init__(self, n_dim, kernel, init='random', self.max_iter = max_iter self.log = log self.tol = tol + if optim_mask is None: + optim_mask = {'baseline': None, 'alpha': None} # params model self.n_dim = n_dim - + self.moment_matching = (init == 'moment_matching') if init in ['random', 'moment_matching'] or init['baseline'] is None: self.baseline = torch.rand(self.n_dim) else: self.baseline = init['baseline'].float() - if baseline_mask is None: + if optim_mask['baseline'] is None: self.baseline_mask = torch.ones([n_dim]) else: - assert baseline_mask.shape == self.baseline.shape, \ + assert optim_mask['baseline'].shape == self.baseline.shape, \ "Invalid baseline_mask shape, must be (n_dim,)" - self.baseline_mask = baseline_mask + self.baseline_mask = optim_mask['baseline'] self.baseline = (self.baseline * self.baseline_mask).requires_grad_(True) if init in ['random', 'moment_matching'] or init['alpha'] is None: self.alpha = torch.rand(self.n_dim, self.n_dim) else: self.alpha = init['alpha'].float() - if alpha_mask is None: + if optim_mask['alpha'] is None: self.alpha_mask = torch.ones([self.n_dim, self.n_dim]) else: - assert alpha_mask.shape == self.alpha.shape, \ + assert optim_mask['alpha'].shape == self.alpha.shape, \ "Invalid alpha_mask shape, must be (n_dim, n_dim)" - self.alpha_mask = alpha_mask + self.alpha_mask = optim_mask['alpha'] self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) if init in ['random', 'moment_matching'] or init['kernel'] is None: @@ -230,8 +234,9 @@ def __init__(self, n_dim, kernel, init='random', # If the learning rate is not given, fix it to 1e-3 if 'lr' in params_optim.keys(): params_optim['lr'] = 1e-3 + self.params_solver = params_optim - self.opt = optimizer(self.params_intens, params_optim, solver=optim) + # self.opt = optimizer(self.params_intens, params_optim, solver=optim) if criterion == 'll': self.precomputations = False @@ -268,6 +273,29 @@ def fit(self, events, end_time): discretization = torch.linspace(0, self.W, self.L) events_grid = projected_grid(events, self.delta, n_grid) n_events = events_grid.sum(1) + n_ground_events = [events[i].shape[0] for i in range(len(events))] + print('number of events is:', n_ground_events) + n_ground_events = torch.tensor(n_ground_events) + + if self.moment_matching: + # Moment matching initialization of Hawkes parameters + baseline, alpha, kernel_params_init = momentmatching( + self, events, n_ground_events, end_time + ) + self.baseline = baseline + self.alpha = alpha + # Set optimizer with moment_matching parameters + self.params_intens = [self.baseline, self.alpha] + + for i in range(self.n_params_kernel): + self.params_intens.append( + kernel_params_init[i].float().clip(1e-4).requires_grad_(True) + ) + self.opt = optimizer( + self.params_intens, + self.params_solver, + solver=self.solver + ) #################################################### # Precomputations diff --git a/fadin/tests/test_mask.py b/fadin/tests/test_mask.py index e606bff..945e7e3 100644 --- a/fadin/tests/test_mask.py +++ b/fadin/tests/test_mask.py @@ -9,14 +9,12 @@ # %% Function def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, - baseline_mask=None, init='random', alpha_mask=None, - random_state=0): + optim_mask=None, init='random', random_state=0): solver = FaDIn( n_dim=n_dim, kernel=kernel, - baseline_mask=baseline_mask, + optim_mask=optim_mask, init=init, - alpha_mask=alpha_mask, kernel_length=kernel_length, delta=dt, optim="RMSprop", params_optim=params_optim, @@ -55,8 +53,10 @@ def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, L = int(1 / dt) size_grid = int(T / dt) + 1 discretization = torch.linspace(0, kernel_length, L) -baseline_mask = torch.Tensor([0, 0]) -alpha_mask = torch.Tensor([[0, 0], [1, 0]]) +optim_mask = { + 'baseline': torch.Tensor([0, 0]), + 'alpha': torch.Tensor([[0, 0], [1, 0]]) +} init1 = { 'alpha': torch.Tensor([[0.2, 0.4], [0.7, 0.9]]), 'baseline': torch.Tensor([0.7, 0.4]), @@ -83,8 +83,7 @@ def test_exp_mask(): exp_bl, exp_alpha = maskedsolver( kernel='truncated_exponential', events=events, T=T, - baseline_mask=baseline_mask, - alpha_mask=alpha_mask, + optim_mask=optim_mask, init=init, ztzG_approx=ztzG_approx, random_state=simu_random_state @@ -116,8 +115,7 @@ def test_rc_mask(): kernel='raised_cosine', events=events_rc, T=T, - baseline_mask=baseline_mask, - alpha_mask=alpha_mask, + optim_mask=optim_mask, init=init, ztzG_approx=ztzG_approx, random_state=simu_random_state diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index 36e9270..b0ea138 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -1,5 +1,6 @@ import numpy as np import torch +import matplotlib.pyplot as plt def kernel_normalization(kernel_values, time_values, delta, lower=0, upper=1): @@ -127,3 +128,112 @@ def check_random_state(seed): return seed raise ValueError('%r cannot be used to seed a numpy.random.RandomState' ' instance' % seed) + + +def momentmatching_kernel(solver, events, n_ground_events, + plot_delta=False, mode='max'): + """Moment matching initialization of kernel parameters. Implemented for + 'truncated_gaussian' and 'raised_cosine' kernels. + For the truncated gaussian kernel, the means $m_{i,j}$ and std + $\\sigma_{i,j}$ are: + $m_{i, j} = + \\frac{1}{N_{g_i}(T)}\\sum_{t_n^i \\in \\mathscr{F}_T^i} \\delta t^{i, j}_n$ + $\\sigma_{i, j} = + \\sqrt{\\dfrac{ + \\sum_{t_n^i \\in \\mathscr{F}_T^i} (\\delta t^{i, j}_n - m_{i, j})^2 + }{N_{g_i}(T) - 1}}. + For the raised cosine kernel, the parameters $u_{i,j}$ and $s_{i,j} are: + $u^{\\text{m}}_{i, j} = + \\max{(0, m^{\\text{m}}_{i, j} - \\sigma^{\\text{m}}_{i, j})}$ + $s^{\\text{m}}_{i, j} = \\sigma_{i, j}^{\\text{m}}$ + + Parameters + ---------- + solver : `FaDIn` or `MarkedFaDIn` object + The solver object + events : list of torch.tensor + List of events for each dimension + n_ground_events : torch.tensor + Number of ground events for each dimension + plot_delta : bool, default=False + Whether to plot the delta_t distribution + mode : str, default='max' + Mode to compute the delta_t distribution. Supported values are 'max' + and 'mean'. + + Returns + ------- + list of torch.tensor + List of kernel parameters + + """ + kernel_params_init = [torch.ones(solver.n_dim, solver.n_dim), + torch.ones(solver.n_dim, solver.n_dim)] + for i in range(solver.n_dim): + print('i', i) + for j in range(solver.n_dim): + # Mean, std of time delta of [i, j] kernel + if mode == 'max': + delta_t = torch.zeros(int(n_ground_events[i].item())) + for n in range(int(n_ground_events[i])): + print('n', n) + t_n_i = events[i][n][0] + t_n_j = torch.max( + torch.where(torch.tensor(events[j][:, 0]) < t_n_i, + torch.tensor(events[j][:, 0]), + 0.) + ) + delta_t[n] = t_n_i - t_n_j + avg = torch.mean(delta_t) + std = torch.std(delta_t) + if mode == 'mean': + delta_t = [] + for n in range(int(n_ground_events[i])): + t_n_i = events[i][n, 0] + for t_n_j in events[j][:, 0]: + if t_n_j < t_n_i - solver.W: + continue + if t_n_j >= t_n_i: + break + delta_t.append(t_n_i - t_n_j) + avg = torch.mean(torch.tensor(delta_t)) + std = torch.std(torch.tensor(delta_t)) + # Plot delta_t distribution + if plot_delta: + fig_delta, ax_delta = plt.subplots() + ax_delta.hist(delta_t, bins=20) + ax_delta.set_xlim([0, solver.W]) + ax_delta.set_xlabel('Time') + ax_delta.set_ylabel('Histogram') + fig_delta.suptitle('Moment Matching delta_t') + fig_delta.show() + # Parameters of [i, j] kernel + if solver.kernel == 'truncated_gaussian': + kernel_params_init[0][i, j] = avg + if solver.kernel == 'raised_cosine': + u = max(avg - std, 0) + kernel_params_init[0][i, j] = u + kernel_params_init[1][i, j] = std + return kernel_params_init + + +def momentmatching(solver, events, n_ground_events, end_time): + """Moment matching initialization of baseline, alpha and kernel parameters. + $\\mu_i^s = \frac{\\#\\mathscr{F}^i_T}{(D+1)T} \forall i \\in [1, D]$ + $\\alpha_{i, j}^s = \\frac{1}{D+1} \\forall i,j \\in [1, D]$ + Kernel parameters are initialized by `momentmatching_kernel`. + """ + assert solver.kernel in ['truncated_gaussian', 'raised_cosine'], ( + f"Smart initialization not implemented for kernel {solver.kernel}" + ) + # Baseline init + baseline = n_ground_events / (end_time * (solver.n_dim + 1)) + baseline = (baseline * solver.baseline_mask).requires_grad_(True) + + # Alpha init + alpha = torch.ones(solver.n_dim, solver.n_dim) / (solver.n_dim + 1) + alpha = (alpha * solver.alpha_mask).requires_grad_(True) + + # Kernel parameters init + kernel_params_init = momentmatching_kernel(solver, events, n_ground_events) + return baseline, alpha, kernel_params_init diff --git a/fadin/utils/utils_meg.py b/fadin/utils/utils_meg.py index 6ff1579..8cdfd9f 100644 --- a/fadin/utils/utils_meg.py +++ b/fadin/utils/utils_meg.py @@ -169,6 +169,6 @@ def get_atoms_timestamps(acti, sfreq=None, info=None, threshold=0, threshold = np.percentile(acti[acti > 0], threshold) atoms_timestamps = [np.where(acti[i] > threshold)[0] / sfreq - for i in range(n_atoms)] + for i in range(n_atoms)][0] return atoms_timestamps From 98b71719a07283efcf0c0b6a01e70c4750f54724 Mon Sep 17 00:00:00 2001 From: vloison Date: Fri, 31 May 2024 15:28:54 +0200 Subject: [PATCH 04/25] clean code --- experiments/benchmark/run_nonparam_rc.py | 2 +- experiments/benchmark/run_nonparam_tg.py | 2 +- experiments/example_multivariate.py | 2 +- .../inference_error/run_error_discrete_EXP.py | 1 - .../run_error_discrete_EXP_m.py | 1 - .../run_error_discrete_RC_m.py | 1 - .../run_error_discrete_TG_m.py | 1 - experiments/inference_error/run_time_dim.py | 1 - experiments/meg/sample.py | 309 +++++------------- fadin/loss_and_gradient.py | 11 +- fadin/solver.py | 61 ++-- fadin/utils/utils.py | 2 +- 12 files changed, 122 insertions(+), 272 deletions(-) diff --git a/experiments/benchmark/run_nonparam_rc.py b/experiments/benchmark/run_nonparam_rc.py index 3a5bdc5..5fc6345 100644 --- a/experiments/benchmark/run_nonparam_rc.py +++ b/experiments/benchmark/run_nonparam_rc.py @@ -79,7 +79,7 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, see log=False, random_state=seed, device="cpu", - optimize_kernel=True) + ) results = solver.fit(events, T) return results diff --git a/experiments/benchmark/run_nonparam_tg.py b/experiments/benchmark/run_nonparam_tg.py index ad51a1a..abeedb3 100644 --- a/experiments/benchmark/run_nonparam_tg.py +++ b/experiments/benchmark/run_nonparam_tg.py @@ -78,7 +78,7 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, see log=False, random_state=seed, device="cpu", - optimize_kernel=True) + ) results = solver.fit(events, T) return results diff --git a/experiments/example_multivariate.py b/experiments/example_multivariate.py index 989da2d..5536405 100644 --- a/experiments/example_multivariate.py +++ b/experiments/example_multivariate.py @@ -64,7 +64,7 @@ def run_solver(events, dt, T, solver = FaDIn(2, "raised_cosine", delta=dt, optim="RMSprop", max_iter=max_iter, - optimize_kernel=True, precomputations=True, + precomputations=True, ztzG_approx=ztzG_approx, device='cpu', log=False, tol=10e-6 ) diff --git a/experiments/inference_error/run_error_discrete_EXP.py b/experiments/inference_error/run_error_discrete_EXP.py index 731ddfb..d9bc853 100644 --- a/experiments/inference_error/run_error_discrete_EXP.py +++ b/experiments/inference_error/run_error_discrete_EXP.py @@ -76,7 +76,6 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): log=False, random_state=0, device="cpu", - optimize_kernel=True ) print(time.time() - start) results = solver.fit(events, T) diff --git a/experiments/inference_error/run_error_discrete_EXP_m.py b/experiments/inference_error/run_error_discrete_EXP_m.py index 644661b..a05d33d 100644 --- a/experiments/inference_error/run_error_discrete_EXP_m.py +++ b/experiments/inference_error/run_error_discrete_EXP_m.py @@ -81,7 +81,6 @@ def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): log=False, random_state=0, device="cpu", - optimize_kernel=True, precomputations=True, ztzG_approx=True) diff --git a/experiments/inference_error/run_error_discrete_RC_m.py b/experiments/inference_error/run_error_discrete_RC_m.py index b6830e2..ea17564 100644 --- a/experiments/inference_error/run_error_discrete_RC_m.py +++ b/experiments/inference_error/run_error_discrete_RC_m.py @@ -80,7 +80,6 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see log=False, random_state=0, device="cpu", - optimize_kernel=True, precomputations=True, ztzG_approx=True) diff --git a/experiments/inference_error/run_error_discrete_TG_m.py b/experiments/inference_error/run_error_discrete_TG_m.py index 4954921..a2b0c50 100644 --- a/experiments/inference_error/run_error_discrete_TG_m.py +++ b/experiments/inference_error/run_error_discrete_TG_m.py @@ -76,7 +76,6 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, see log=False, random_state=0, device="cpu", - optimize_kernel=True, precomputations=True, ztzG_approx=True) diff --git a/experiments/inference_error/run_time_dim.py b/experiments/inference_error/run_time_dim.py index d574e58..9c47bba 100644 --- a/experiments/inference_error/run_time_dim.py +++ b/experiments/inference_error/run_time_dim.py @@ -69,7 +69,6 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): log=False, random_state=0, device="cpu", - optimize_kernel=True, precomputations=True, ztzG_approx=True) diff --git a/experiments/meg/sample.py b/experiments/meg/sample.py index b5bacfe..4a6d216 100644 --- a/experiments/meg/sample.py +++ b/experiments/meg/sample.py @@ -133,262 +133,99 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, ############################################################################## -# REPRODUCE SAMPLE RESULTS IN [1] +# REPRODUCE SAMPLE RESULTS IN FaDIn PAPER ############################################################################## -fig, tg_atom6_mask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=6, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='truncated_gaussian', - optim_mask=OPTIM_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=10000, - ztzG_approx=False, - figtitle='Masked FaDIn with TG kernels, atom 6 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_practi.png', - plotfig=True, - ) +fig, tg_atom6_mask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=6, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='truncated_gaussian', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=10000, + ztzG_approx=False, + figtitle='Masked FaDIn with TG kernels, atom 6 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_practi.png', + plotfig=True +) print('Truncated gaussian, atom 6, with mask') print('Estimated baseline:', tg_atom6_mask[0]) print('Estimated alpha:', tg_atom6_mask[1]) print('Estimated kernel parameters', tg_atom6_mask[2:]) -fig, tg_atom3_allmask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=3, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='truncated_gaussian', - optim_mask=OPTIM_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=10000, - ztzG_approx=False, - figtitle='Masked FaDIn with TG kernels, atom 3 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_practi.png', - plotfig=True - ) +fig, tg_atom3_allmask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=3, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='truncated_gaussian', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=10000, + ztzG_approx=False, + figtitle='Masked FaDIn with TG kernels, atom 3 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_practi.png', + plotfig=True +) print('Truncated gaussian, atom 3, with mask') print('Estimated baseline:', tg_atom3_allmask[0]) print('Estimated alpha:', tg_atom3_allmask[1]) print('Estimated kernel parameters', tg_atom3_allmask[2:]) -fig, rc_atom3_mask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=3, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='raised_cosine', - optim_mask=OPTIM_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=20000, - ztzG_approx=False, - figtitle='Masked FaDIn with RC kernels, atom 3 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_rc_3_practi.png', - plotfig=True - ) +fig, rc_atom3_mask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=3, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='raised_cosine', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=20000, + ztzG_approx=False, + figtitle='Masked FaDIn with RC kernels, atom 3 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_rc_3_practi.png', + plotfig=True +) print('Raised_cosine, atom 3, with mask') print('Estimated baseline:', rc_atom3_mask[0]) print('Estimated alpha:', 2 * rc_atom3_mask[3] * rc_atom3_mask[1]) print('Estimated kernel parameters u and s', rc_atom3_mask[2:]) -fig, rc_atom6_mask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=6, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='raised_cosine', - optim_mask=OPTIM_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=20000, - ztzG_approx=False, - figtitle='Masked FaDIn with RC kernels, atom 6 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_rc_6_practi.png', - plotfig=True - ) +fig, rc_atom6_mask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=6, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='raised_cosine', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=20000, + ztzG_approx=False, + figtitle='Masked FaDIn with RC kernels, atom 6 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_rc_6_practi.png', + plotfig=True +) print('Raised_cosine, atom 6, with mask') print('Estimated baseline:', rc_atom6_mask[0]) print('Estimated alpha:', 2 * rc_atom6_mask[3] * rc_atom6_mask[1]) print('Estimated kernel parameters u and s', rc_atom6_mask[2:]) - -# ############################################################################## -# # OTHER EXPERIMENTS WITH TRUNCATED GAUSSIAN KERNELS -# ############################################################################## - -# fig, tg_atom6_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='truncated_gaussian', -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 6 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_tg_6_practi.png', -# plotfig=True -# ) - -# print('Truncated gaussian, atom 6, without mask') -# print('Estimated baseline:', tg_atom6_nomask[0]) -# print('Estimated alpha:', 2 * tg_atom6_nomask[3] * tg_atom6_nomask[1]) -# print('Estimated kernel parameters u and s', tg_atom6_nomask[2:]) - -# fig, tg_atom3_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='truncated_gaussian', -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 3 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_tg_3_practi.png', -# plotfig=True -# ) - -# print('Truncated gaussian, atom 3 without mask') -# print('Estimated baseline:', tg_atom3_nomask[0]) -# print('Estimated alpha:', 2 * tg_atom3_nomask[3] * tg_atom3_nomask[1]) -# print('Estimated kernel parameters u and s', tg_atom3_nomask[2:]) - -# fig, tg_atom3_nofilt_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 3 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_tg_3_rawacti.png', -# plotfig=True -# ) - -# fig, tg_atom3_nofilt = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# optim_mask=OPTIM_MASK, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='Masked FaDIn with TG kernels, atom 3 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_rawacti.png', -# plotfig=True -# ) - -# fig, tg_atom6_nofilt_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 6 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_rawacti.png', -# plotfig=True -# ) - -# fig, tg_atom6_nofilt = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# optim_mask=OPTIM_MASK, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='Masked FaDIn with TG kernels, atom 6 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_rawacti.png', -# plotfig=True -# ) - -############################################################################## -# OTHER EXPERIMENTS WITH RAISED_COSINE KERNELS -############################################################################## - -# fig, rc_atom3_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='raised_cosine', -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=20000, -# ztzG_approx=False, -# figtitle='FaDIn with RC kernels, atom 3 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_rc_3_practi.png', -# plotfig=True -# ) -# print('Raised_cosine, atom 3, no mask') -# print('Estimated baseline:', rc_atom3_nomask[0]) -# print('Estimated alpha:', 2 * rc_atom3_nomask[3] * rc_atom3_nomask[1]) -# print('Estimated kernel parameters', rc_atom3_nomask[2:]) - -# fig, rc_atom6_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='raised_cosine', -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=20000, -# ztzG_approx=False, -# figtitle='FaDIn with RC kernels, atom 6 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_rc_6_practi.png', -# plotfig=True -# ) - -# print('Raised_cosine, atom 6, without mask') -# print('Estimated baseline:', rc_atom6_nomask[0]) -# print('Estimated alpha:', 2 * rc_atom6_nomask[3] * rc_atom6_nomask[1]) -# print('Estimated kernel parameters', rc_atom6_nomask[2:]) diff --git a/fadin/loss_and_gradient.py b/fadin/loss_and_gradient.py index 6f1cded..b8b1757 100644 --- a/fadin/loss_and_gradient.py +++ b/fadin/loss_and_gradient.py @@ -109,7 +109,8 @@ def squared_compensator_2(zG, baseline, alpha, kernel): .. math:: \\sum_{i=1}^{p}\\mu_i \\sum_{j=1}^{p} \\sum_{\tau=1}^{L} - \\phi_{ij}^\\Delta[\\tau] \\left(\\sum_{s=1}^{G} z_{j}[s-\\tau] \\right) + \\phi_{ij}^\\Delta[\\tau] \\left(\\sum_{s=1}^{G} z_{j}[s-\\tau] + \\right) Parameters ---------- @@ -175,8 +176,9 @@ def squared_compensator_3(ztzG, alpha, kernel): for k in range(n_dim): for j in range(n_dim): alpha_prod_ijk = alpha[i, j] * alpha[i, k] - temp2 = kernel[i, k].view(1, L) * (ztzG[j, k] - * kernel[i, j].view(L, 1)).sum(0) + temp2 = kernel[i, k].view(1, L) * ( + ztzG[j, k] * kernel[i, j].view(L, 1) + ).sum(0) res += alpha_prod_ijk * temp2.sum() return res @@ -357,7 +359,8 @@ def get_grad_alpha(zG, zN, ztzG, baseline, alpha, kernel, delta, n_events): def get_grad_eta(zG, zN, ztzG, baseline, alpha, kernel, grad_kernel, delta, n_events): - """Return the gradient of the discrete l2 loss w.r.t. one kernel parameters. + """Return the gradient of the discrete l2 loss w.r.t. one kernel + parameter. .. math:: N_T\\frac{\\partial\\mathcal{L}_G}{\\partial \\eta{ml}} = diff --git a/fadin/solver.py b/fadin/solver.py index 6d36b15..b2eba80 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -208,8 +208,10 @@ def __init__(self, n_dim, kernel, init='random', kernel_params_init.append(2 * torch.rand(self.n_dim, self.n_dim)) else: - raise NotImplementedError('kernel initial parameters of not \ - implemented kernel have to be given') + raise NotImplementedError( + 'kernel initial parameters of not \ + implemented kernel have to be given' + ) elif init['kernel'] is None: kernel_params_init = init['kernel'] @@ -361,20 +363,26 @@ def fit(self, events, end_time): self.delta, end_time).detach() # Update baseline - self.params_intens[0].grad = get_grad_baseline(zG, - self.params_intens[0], - self.params_intens[1], - kernel, self.delta, - n_events, end_time) + self.params_intens[0].grad = get_grad_baseline( + zG, + self.params_intens[0], + self.params_intens[1], + kernel, + self.delta, + n_events, + end_time + ) # Update alpha - self.params_intens[1].grad = get_grad_alpha(zG, - zN, - ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, - self.delta, - n_events) + self.params_intens[1].grad = get_grad_alpha( + zG, + zN, + ztzG, + self.params_intens[0], + self.params_intens[1], + kernel, + self.delta, + n_events + ) # Update kernel for j in range(self.n_kernel_params): self.params_intens[2 + j].grad = \ @@ -391,15 +399,21 @@ def fit(self, events, end_time): ) else: - intens = self.kernel_model.intensity_eval(self.params_intens[0], - self.params_intens[1], - self.params_intens[2:], - events_grid, - discretization) + intens = self.kernel_model.intensity_eval( + self.params_intens[0], + self.params_intens[1], + self.params_intens[2:], + events_grid, + discretization + ) if self.criterion == 'll': - loss = discrete_ll_loss_conv(intens, events_grid, self.delta) + loss = discrete_ll_loss_conv( + intens, events_grid, self.delta + ) else: - loss = discrete_l2_loss_conv(intens, events_grid, self.delta) + loss = discrete_l2_loss_conv( + intens, events_grid, self.delta + ) loss.backward() self.opt.step() @@ -438,7 +452,8 @@ def fit(self, events, end_time): return self -def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, savefig=None): +def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, + savefig=None): """ Plots estimated kernels and baselines of solver. Should be called after calling the `fit` method on solver. diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index b0ea138..05152e4 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -75,7 +75,7 @@ def projected_grid(events, grid_step, n_grid): events_grid = torch.zeros(n_dim, n_grid) for i in range(n_dim): ei_torch = torch.tensor(events[i]) - temp = torch.round(ei_torch / grid_step).long() # * grid_step + temp = torch.round(ei_torch / grid_step).long() # * grid_step # temp2 = torch.round(temp * size_discret) idx, data = np.unique(temp, return_counts=True) events_grid[i, idx] += torch.tensor(data) From fbaf0b0b9796338061555b2ad995f941bc444c50 Mon Sep 17 00:00:00 2001 From: vloison Date: Fri, 31 May 2024 18:41:21 +0200 Subject: [PATCH 05/25] remove precomputations and criterion from FaDIn init, replace with custom solvers. --- examples/plot_multivariate_fadin.py | 15 +- experiments/benchmark/run_benchmark_exp.py | 1 - experiments/benchmark/run_benchmark_rc.py | 1 - experiments/benchmark/run_benchmark_tg.py | 1 - experiments/benchmark/run_comparison_ll.py | 40 ++++-- experiments/example_multivariate.py | 12 +- .../inference_error/run_comp_autodiff.py | 30 ++-- .../run_error_discrete_EXP_m.py | 1 - .../run_error_discrete_RC_m.py | 1 - .../run_error_discrete_TG_m.py | 1 - experiments/inference_error/run_time_dim.py | 1 - fadin/loss_and_gradient.py | 91 ++++++++++++ fadin/solver.py | 129 ++++-------------- 13 files changed, 179 insertions(+), 145 deletions(-) diff --git a/examples/plot_multivariate_fadin.py b/examples/plot_multivariate_fadin.py index bef3fb2..8e1f886 100644 --- a/examples/plot_multivariate_fadin.py +++ b/examples/plot_multivariate_fadin.py @@ -51,13 +51,14 @@ ############################################################################### # Here, we apply FaDIn. -solver = FaDIn(n_dim=n_dim, - kernel="truncated_exponential", - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - params_optim={'lr': 1e-3}, - max_iter=10000, criterion='l2' - ) +solver = FaDIn( + n_dim=n_dim, + kernel="truncated_exponential", + kernel_length=kernel_length, + delta=dt, optim="RMSprop", + params_optim={'lr': 1e-3}, + max_iter=10000 +) solver.fit(events, T) # We average on the 10 last values of the optimization. diff --git a/experiments/benchmark/run_benchmark_exp.py b/experiments/benchmark/run_benchmark_exp.py index 70597d7..c25d467 100644 --- a/experiments/benchmark/run_benchmark_exp.py +++ b/experiments/benchmark/run_benchmark_exp.py @@ -77,7 +77,6 @@ def run_fadin(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, - optimize_kernel=True, precomputations=True, ztzG_approx=True, device='cpu', log=False ) diff --git a/experiments/benchmark/run_benchmark_rc.py b/experiments/benchmark/run_benchmark_rc.py index 4e99899..fbc8f4d 100644 --- a/experiments/benchmark/run_benchmark_rc.py +++ b/experiments/benchmark/run_benchmark_rc.py @@ -73,7 +73,6 @@ def run_fadin(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, - optimize_kernel=True, precomputations=True, ztzG_approx=True, device='cpu', log=False ) diff --git a/experiments/benchmark/run_benchmark_tg.py b/experiments/benchmark/run_benchmark_tg.py index 9387e0f..24ffd2b 100644 --- a/experiments/benchmark/run_benchmark_tg.py +++ b/experiments/benchmark/run_benchmark_tg.py @@ -72,7 +72,6 @@ def run_fadin(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, - optimize_kernel=True, precomputations=True, ztzG_approx=True, device='cpu', log=False ) diff --git a/experiments/benchmark/run_comparison_ll.py b/experiments/benchmark/run_comparison_ll.py index e9200d6..87a50c1 100644 --- a/experiments/benchmark/run_comparison_ll.py +++ b/experiments/benchmark/run_comparison_ll.py @@ -9,7 +9,7 @@ from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn +from fadin.solver import FaDIn, FaDIn_loglikelihood # %% simulate data # Simulated data @@ -64,18 +64,32 @@ def run_solver(criterion, events, kernel_params_init, 'baseline': torch.tensor(baseline_init), 'kernel': [torch.tensor(a) for a in kernel_params_init] } - solver = FaDIn(1, - kernel, - init=init, - delta=dt, - optim="RMSprop", - step_size=1e-3, - max_iter=max_iter, - log=False, - random_state=seed, - device="cpu", - optimize_kernel=True, - criterion=criterion) + if criterion == 'l2': + solver = FaDIn( + 1, + kernel, + init=init, + delta=dt, + optim="RMSprop", + step_size=1e-3, + max_iter=max_iter, + log=False, + random_state=seed, + device="cpu" + ) + elif criterion == 'll': + solver = FaDIn_loglikelihood( + 1, + kernel, + init=init, + delta=dt, + optim="RMSprop", + step_size=1e-3, + max_iter=max_iter, + log=False, + random_state=seed, + device="cpu" + ) results = solver.fit(events, T) if kernel == 'truncated_exponential': results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), diff --git a/experiments/example_multivariate.py b/experiments/example_multivariate.py index 5536405..16da103 100644 --- a/experiments/example_multivariate.py +++ b/experiments/example_multivariate.py @@ -61,12 +61,12 @@ def run_solver(events, dt, T, ztzG_approx, seed=0): start = time.time() max_iter = 2000 - solver = FaDIn(2, - "raised_cosine", - delta=dt, optim="RMSprop", max_iter=max_iter, - precomputations=True, - ztzG_approx=ztzG_approx, device='cpu', log=False, tol=10e-6 - ) + solver = FaDIn( + 2, + "raised_cosine", + delta=dt, optim="RMSprop", max_iter=max_iter, + ztzG_approx=ztzG_approx, device='cpu', log=False, tol=10e-6 +) print(time.time() - start) solver.fit(events, T) diff --git a/experiments/inference_error/run_comp_autodiff.py b/experiments/inference_error/run_comp_autodiff.py index 30f6c6b..3982d2f 100644 --- a/experiments/inference_error/run_comp_autodiff.py +++ b/experiments/inference_error/run_comp_autodiff.py @@ -10,7 +10,7 @@ from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn +from fadin.solver import FaDIn, FaDIn_no_precomputations ################################ # Meta parameters @@ -64,12 +64,14 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, see 'baseline': torch.tensor(baseline_init), 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] } - solver_autodiff = FaDIn(1, - "raised_cosine", - init=init, - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, - precomputations=False, random_state=0) + solver_autodiff = FaDIn_no_precomputations( + 1, + "raised_cosine", + init=init, + delta=dt, optim="RMSprop", + step_size=1e-3, max_iter=max_iter, + random_state=0 + ) start_autodiff = time.time() solver_autodiff.fit(events, T) time_autodiff = time.time() - start_autodiff @@ -78,12 +80,14 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, see 'baseline': torch.tensor(baseline_init), 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] } - solver_FaDIn = FaDIn(1, - "raised_cosine", - init=init, - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, - precomputations=True, random_state=0) + solver_FaDIn = FaDIn( + 1, + "raised_cosine", + init=init, + delta=dt, optim="RMSprop", + step_size=1e-3, max_iter=max_iter, + random_state=0 + ) start_FaDIn = time.time() solver_FaDIn.fit(events, T) time_FaDIn = time.time() - start_FaDIn diff --git a/experiments/inference_error/run_error_discrete_EXP_m.py b/experiments/inference_error/run_error_discrete_EXP_m.py index a05d33d..1d1d444 100644 --- a/experiments/inference_error/run_error_discrete_EXP_m.py +++ b/experiments/inference_error/run_error_discrete_EXP_m.py @@ -81,7 +81,6 @@ def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): log=False, random_state=0, device="cpu", - precomputations=True, ztzG_approx=True) print(time.time() - start) diff --git a/experiments/inference_error/run_error_discrete_RC_m.py b/experiments/inference_error/run_error_discrete_RC_m.py index ea17564..e170803 100644 --- a/experiments/inference_error/run_error_discrete_RC_m.py +++ b/experiments/inference_error/run_error_discrete_RC_m.py @@ -80,7 +80,6 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see log=False, random_state=0, device="cpu", - precomputations=True, ztzG_approx=True) print(time.time() - start) diff --git a/experiments/inference_error/run_error_discrete_TG_m.py b/experiments/inference_error/run_error_discrete_TG_m.py index a2b0c50..56ddfbc 100644 --- a/experiments/inference_error/run_error_discrete_TG_m.py +++ b/experiments/inference_error/run_error_discrete_TG_m.py @@ -76,7 +76,6 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, see log=False, random_state=0, device="cpu", - precomputations=True, ztzG_approx=True) print(time.time() - start) diff --git a/experiments/inference_error/run_time_dim.py b/experiments/inference_error/run_time_dim.py index 9c47bba..6b37d68 100644 --- a/experiments/inference_error/run_time_dim.py +++ b/experiments/inference_error/run_time_dim.py @@ -69,7 +69,6 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): log=False, random_state=0, device="cpu", - precomputations=True, ztzG_approx=True) print(time.time() - start) diff --git a/fadin/loss_and_gradient.py b/fadin/loss_and_gradient.py index b8b1757..6de6e3f 100644 --- a/fadin/loss_and_gradient.py +++ b/fadin/loss_and_gradient.py @@ -1,6 +1,97 @@ import torch +def optim_iteration_fadin(solver, events_grid, discretization, + i, n_events, end_time): + """One optimizer iteration of FaDIn solver, with l2 loss and + precomputations. + """ + # Compute kernel and gradient + kernel = solver.kernel_model.kernel_eval( + solver.params_intens[2:], + discretization + ) + grad_theta = solver.kernel_model.grad_eval( + solver.params_intens[2:], + discretization + ) + + if solver.log: + solver.v_loss[i] = \ + discrete_l2_loss_precomputation(solver.zG, solver.zN, solver.ztzG, + solver.params_intens[0], + solver.params_intens[1], + kernel, n_events, + solver.delta, + end_time).detach() + # Update baseline + solver.params_intens[0].grad = get_grad_baseline( + solver.zG, + solver.params_intens[0], + solver.params_intens[1], + kernel, + solver.delta, + n_events, + end_time + ) + # Update alpha + solver.params_intens[1].grad = get_grad_alpha( + solver.zG, + solver.zN, + solver.ztzG, + solver.params_intens[0], + solver.params_intens[1], + kernel, + solver.delta, + n_events + ) + # Update kernel + for j in range(solver.n_kernel_params): + solver.params_intens[2 + j].grad = \ + get_grad_eta( + solver.zG, + solver.zN, + solver.ztzG, + solver.params_intens[0], + solver.params_intens[1], + kernel, + grad_theta[j], + solver.delta, + n_events + ) + + +def optim_iteration_l2_noprecomput(solver, events_grid, discretization, + i, n_events, end_time): + """One optimizer iteration of FaDIn_no_precomputations solver, + with l2 loss and no precomputations.""" + intens = solver.kernel_model.intensity_eval( + solver.params_intens[0], + solver.params_intens[1], + solver.params_intens[2:], + events_grid, + discretization + ) + loss = discrete_l2_loss_conv(intens, events_grid, solver.delta) + loss.backward() + + +def optim_iteration_loglikelihood(solver, events_grid, discretization, + i, n_events, end_time): + """One optimizer iteration of FaDIn_loglikelihood solver, + with loglikelihood loss. + """ + intens = solver.kernel_model.intensity_eval( + solver.params_intens[0], + solver.params_intens[1], + solver.params_intens[2:], + events_grid, + discretization + ) + loss = discrete_ll_loss_conv(intens, events_grid, solver.delta) + loss.backward() + + def discrete_l2_loss_conv(intensity, events_grid, delta): """Compute the l2 discrete loss using convolutions. diff --git a/fadin/solver.py b/fadin/solver.py index b2eba80..20fd6e3 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -6,9 +6,8 @@ from fadin.utils.utils import optimizer, projected_grid, momentmatching from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ get_ztzG_approx -from fadin.loss_and_gradient import discrete_l2_loss_precomputation, \ - discrete_l2_loss_conv, get_grad_baseline, get_grad_alpha, get_grad_eta, \ - discrete_ll_loss_conv +from fadin.loss_and_gradient import optim_iteration_fadin, \ + optim_iteration_l2_noprecomput, optim_iteration_loglikelihood from fadin.kernels import DiscreteKernelFiniteSupport @@ -93,10 +92,6 @@ class FaDIn(object): max_iter : `int`, `default=1000` Maximum number of iterations during fit. - precomputations : `boolean`, `default=True` - If precomputations is false, pytorch autodiff is applied on the loss. - If precomputations is true, then FaDIn is computed. - ztzG_approx : `boolean`, `default=True` If ztzG_approx is false, compute the true ztzG precomputation constant that is the computational bottleneck of FaDIn. if ztzG_approx is true, @@ -114,10 +109,6 @@ class FaDIn(object): 'truncated_exponential'}`` the gradient function is implemented. If kernel is custom, the custom gradient must be given. - criterion : `str` in ``{'l2' | 'll'}``, `default='l2'` - The criterion to minimize. if not l2, FaDIn minimize - the Log-Likelihood loss through AutoDifferentiation. - tol : `float`, `default=1e-5` The tolerance of the solver (iterations stop when the stopping criterion is below it). If not reached the solver does 'max_iter' @@ -143,14 +134,15 @@ class FaDIn(object): If `log=True`, compute the loss accross iterations. If no early stopping, `n_iter` is equal to `max_iter`. """ + optim_iteration = staticmethod(optim_iteration_fadin) + precomputations = True - def __init__(self, n_dim, kernel, init='random', - optim_mask=None, + def __init__(self, n_dim, kernel, init='random', optim_mask=None, kernel_length=1, delta=0.01, optim='RMSprop', - params_optim=dict(), max_iter=2000, - precomputations=True, ztzG_approx=True, - device='cpu', log=False, grad_kernel=None, criterion='l2', + params_optim=dict(), max_iter=2000, ztzG_approx=True, + device='cpu', log=False, grad_kernel=None, tol=10e-5, random_state=None): + # param discretisation self.delta = delta self.W = kernel_length @@ -231,19 +223,13 @@ def __init__(self, n_dim, kernel, init='random', self.params_intens.append( kernel_params_init[i].float().clip(1e-4).requires_grad_(True)) - self.precomputations = precomputations - # If the learning rate is not given, fix it to 1e-3 - if 'lr' in params_optim.keys(): + if 'lr' not in params_optim.keys(): params_optim['lr'] = 1e-3 self.params_solver = params_optim # self.opt = optimizer(self.params_intens, params_optim, solver=optim) - if criterion == 'll': - self.precomputations = False - - self.criterion = criterion # device and seed if random_state is None: torch.manual_seed(0) @@ -313,9 +299,9 @@ def fit(self, events, end_time): else: ztzG = get_ztzG(events_grid.double().numpy(), self.L) - zG = torch.tensor(zG).float() - zN = torch.tensor(zN).float() - ztzG = torch.tensor(ztzG).float() + self.zG = torch.tensor(zG).float() + self.zN = torch.tensor(zN).float() + self.ztzG = torch.tensor(ztzG).float() print('precomput:', time.time() - start) #################################################### @@ -338,84 +324,15 @@ def fit(self, events, end_time): #################################################### start = time.time() + # Optimize parameters for i in range(self.max_iter): print(f"Fitting model... {i/self.max_iter:6.1%}\r", end='', flush=True) self.opt.zero_grad() - if self.precomputations: - # Compute kernel and gradient - kernel = self.kernel_model.kernel_eval( - self.params_intens[2:], - discretization - ) - grad_theta = self.kernel_model.grad_eval( - self.params_intens[2:], - discretization - ) - - if self.log: - self.v_loss[i] = \ - discrete_l2_loss_precomputation(zG, zN, ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, n_events, - self.delta, - end_time).detach() - # Update baseline - self.params_intens[0].grad = get_grad_baseline( - zG, - self.params_intens[0], - self.params_intens[1], - kernel, - self.delta, - n_events, - end_time - ) - # Update alpha - self.params_intens[1].grad = get_grad_alpha( - zG, - zN, - ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, - self.delta, - n_events - ) - # Update kernel - for j in range(self.n_kernel_params): - self.params_intens[2 + j].grad = \ - get_grad_eta( - zG, - zN, - ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, - grad_theta[j], - self.delta, - n_events - ) - - else: - intens = self.kernel_model.intensity_eval( - self.params_intens[0], - self.params_intens[1], - self.params_intens[2:], - events_grid, - discretization - ) - if self.criterion == 'll': - loss = discrete_ll_loss_conv( - intens, events_grid, self.delta - ) - else: - loss = discrete_l2_loss_conv( - intens, events_grid, self.delta - ) - loss.backward() - + self.optim_iteration( + self, events_grid, discretization, i, n_events, end_time + ) self.opt.step() # Save parameters @@ -452,6 +369,20 @@ def fit(self, events, end_time): return self +class FaDIn_no_precomputations(FaDIn): + """Define the FaDIn framework for estimated Hawkes processes *without + precomputations*.""" + optim_iteration = staticmethod(optim_iteration_l2_noprecomput) + precomputations = False + + +class FaDIn_loglikelihood(FaDIn): + """Define the FaDIn framework for estimated Hawkes processes *with + loglikelihood criterion instead of l2 loss*.""" + optim_iteration = staticmethod(optim_iteration_loglikelihood) + precomputations = False + + def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, savefig=None): """ From 09a59d2f06feb9a30566a3c97f3521c411da1229 Mon Sep 17 00:00:00 2001 From: vloison Date: Fri, 31 May 2024 20:19:24 +0200 Subject: [PATCH 06/25] fix unmarked moment_matching --- examples/plot_univariate_fadin.py | 2 +- experiments/meg/sample.py | 9 +++--- fadin/solver.py | 50 +++++++++++++++++-------------- fadin/tests/test_mask.py | 5 ++-- fadin/utils/utils.py | 26 ++++++++-------- 5 files changed, 50 insertions(+), 42 deletions(-) diff --git a/examples/plot_univariate_fadin.py b/examples/plot_univariate_fadin.py index 3a8faf1..9c47e61 100644 --- a/examples/plot_univariate_fadin.py +++ b/examples/plot_univariate_fadin.py @@ -56,7 +56,7 @@ kernel_length=kernel_length, delta=dt, optim="RMSprop", params_optim={'lr': 1e-3}, - max_iter=2000, criterion='l2' + max_iter=2000 ) solver.fit(events, T) diff --git a/experiments/meg/sample.py b/experiments/meg/sample.py index 4a6d216..679fb87 100644 --- a/experiments/meg/sample.py +++ b/experiments/meg/sample.py @@ -154,12 +154,11 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_practi.png', plotfig=True ) -print('Truncated gaussian, atom 6, with mask') +print('\nTruncated gaussian, atom 6, with mask') print('Estimated baseline:', tg_atom6_mask[0]) print('Estimated alpha:', tg_atom6_mask[1]) print('Estimated kernel parameters', tg_atom6_mask[2:]) - fig, tg_atom3_allmask = fit_fadin_sample( list_tasks=(['1', '2'], ['3', '4']), atom=3, @@ -178,7 +177,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_practi.png', plotfig=True ) -print('Truncated gaussian, atom 3, with mask') +print('\nTruncated gaussian, atom 3, with mask') print('Estimated baseline:', tg_atom3_allmask[0]) print('Estimated alpha:', tg_atom3_allmask[1]) print('Estimated kernel parameters', tg_atom3_allmask[2:]) @@ -201,7 +200,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, savefig='fit_fadin_sample_plots/nomarked_masked_rc_3_practi.png', plotfig=True ) -print('Raised_cosine, atom 3, with mask') +print('\nRaised_cosine, atom 3, with mask') print('Estimated baseline:', rc_atom3_mask[0]) print('Estimated alpha:', 2 * rc_atom3_mask[3] * rc_atom3_mask[1]) print('Estimated kernel parameters u and s', rc_atom3_mask[2:]) @@ -225,7 +224,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, plotfig=True ) -print('Raised_cosine, atom 6, with mask') +print('nRaised_cosine, atom 6, with mask') print('Estimated baseline:', rc_atom6_mask[0]) print('Estimated alpha:', 2 * rc_atom6_mask[3] * rc_atom6_mask[1]) print('Estimated kernel parameters u and s', rc_atom6_mask[2:]) diff --git a/fadin/solver.py b/fadin/solver.py index 20fd6e3..97583e8 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt import numpy as np -from fadin.utils.utils import optimizer, projected_grid, momentmatching +from fadin.utils.utils import optimizer, projected_grid, momentmatching_nomark from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ get_ztzG_approx from fadin.loss_and_gradient import optim_iteration_fadin, \ @@ -51,8 +51,10 @@ class FaDIn(object): init: `str` or `dict`, default='random' Initialization strategy of the parameters of the Hawkes process. If set to 'random', the parameters are initialized randomly. - If set to 'moment_matching', the parameters are initialized - using the moment matching method. + If set to 'moment_matching_max', the parameters are initialized + using the moment matching method with max mode. + If set to 'moment_matching_mean', the parameters are initialized + using the moment matching method with mean mode. Otherwise, the parameters are initialized using the given dictionary, , which must contain the following keys: - 'baseline': `tensor` or `None`, shape (n_dim,): Initial baseline @@ -143,13 +145,13 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, device='cpu', log=False, grad_kernel=None, tol=10e-5, random_state=None): - # param discretisation + # Discretization parameters self.delta = delta self.W = kernel_length self.L = int(self.W / delta) self.ztzG_approx = ztzG_approx - # param optim + # Optimizer parameters self.solver = optim self.max_iter = max_iter self.log = log @@ -157,10 +159,13 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, if optim_mask is None: optim_mask = {'baseline': None, 'alpha': None} - # params model + # Model parameters self.n_dim = n_dim - self.moment_matching = (init == 'moment_matching') - if init in ['random', 'moment_matching'] or init['baseline'] is None: + self.moment_matching = ('moment_matching' in init) + if self.moment_matching: + self.mm_mode = init.split('_')[-1] + random_bl_init = init == 'random' or self.moment_matching + if random_bl_init or init['baseline'] is None: self.baseline = torch.rand(self.n_dim) else: self.baseline = init['baseline'].float() @@ -170,9 +175,10 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, assert optim_mask['baseline'].shape == self.baseline.shape, \ "Invalid baseline_mask shape, must be (n_dim,)" self.baseline_mask = optim_mask['baseline'] - self.baseline = (self.baseline * self.baseline_mask).requires_grad_(True) + bl = self.baseline * self.baseline_mask + self.baseline = bl.requires_grad_(True) - if init in ['random', 'moment_matching'] or init['alpha'] is None: + if init == 'random' or self.moment_matching or init['alpha'] is None: self.alpha = torch.rand(self.n_dim, self.n_dim) else: self.alpha = init['alpha'].float() @@ -184,7 +190,7 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, self.alpha_mask = optim_mask['alpha'] self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) - if init in ['random', 'moment_matching'] or init['kernel'] is None: + if init == 'random' or self.moment_matching or init['kernel'] is None: kernel_params_init = [] if kernel == 'raised_cosine': temp = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) @@ -204,8 +210,6 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, 'kernel initial parameters of not \ implemented kernel have to be given' ) - elif init['kernel'] is None: - kernel_params_init = init['kernel'] self.kernel_params_fixed = kernel_params_init @@ -254,6 +258,7 @@ def fit(self, events, end_time): Returns ------- + TODO: attributes self : object Fitted parameters. """ @@ -267,18 +272,18 @@ def fit(self, events, end_time): if self.moment_matching: # Moment matching initialization of Hawkes parameters - baseline, alpha, kernel_params_init = momentmatching( - self, events, n_ground_events, end_time + baseline, alpha, kernel_params_init = momentmatching_nomark( + self, events, n_ground_events, end_time, self.mm_mode ) self.baseline = baseline self.alpha = alpha # Set optimizer with moment_matching parameters self.params_intens = [self.baseline, self.alpha] - for i in range(self.n_params_kernel): - self.params_intens.append( - kernel_params_init[i].float().clip(1e-4).requires_grad_(True) - ) + for i in range(self.n_kernel_params): + kernel_param = kernel_params_init[i].float().clip(1e-4) + kernel_param.requires_grad_(True) + self.params_intens.append(kernel_param) self.opt = optimizer( self.params_intens, self.params_solver, @@ -289,7 +294,6 @@ def fit(self, events, end_time): # Precomputations #################################################### if self.precomputations: - print('number of events is:', int(n_events[0])) start = time.time() zG = get_zG(events_grid.double().numpy(), self.L) zN = get_zN(events_grid.double().numpy(), self.L) @@ -345,7 +349,8 @@ def fit(self, events, end_time): for j in range(self.n_kernel_params): self.params_intens[2 + j].data = \ self.params_intens[2 + j].data.clip(0) - self.param_kernel[j, i + 1] = self.params_intens[2 + j].detach() + self.param_kernel[j, i + 1] = \ + self.params_intens[2 + j].detach() # Early stopping if i % 100 == 0: @@ -438,7 +443,8 @@ def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, for i in range(solver.n_dim): for j in range(solver.n_dim): # Plot baseline - label = rf'$\mu_{{{ch_names[i]}}}$={round(solver.baseline[i].item(), 2)}' + label = (rf'$\mu_{{{ch_names[i]}}}$=' + + f'{round(solver.baseline[i].item(), 2)}') axs[i, j].hlines( y=solver.baseline[i].item(), xmin=0, diff --git a/fadin/tests/test_mask.py b/fadin/tests/test_mask.py index 945e7e3..bb7245d 100644 --- a/fadin/tests/test_mask.py +++ b/fadin/tests/test_mask.py @@ -19,7 +19,6 @@ def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, delta=dt, optim="RMSprop", params_optim=params_optim, max_iter=max_iter, - criterion='l2', ztzG_approx=ztzG_approx, random_state=random_state ) @@ -63,6 +62,8 @@ def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, 'kernel': None } init2 = 'random' +init3 = 'moment_matching_mean' +init4 = 'moment_matching_max' max_iter = 5000 ztzG_approx = True params_optim = {'lr': 1e-3} @@ -110,7 +111,7 @@ def test_rc_mask(): random_state=simu_random_state) # %% Fit Hawkes process to raised_cosine simulation - for init in [init1, init2]: + for init in [init1, init2, init3, init4]: rc_bl, rc_alpha = maskedsolver( kernel='raised_cosine', events=events_rc, diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index 05152e4..d0a85e3 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -130,14 +130,15 @@ def check_random_state(seed): ' instance' % seed) -def momentmatching_kernel(solver, events, n_ground_events, - plot_delta=False, mode='max'): +def momentmatching_kernel_nomark(solver, events, n_ground_events, + plot_delta=False, mode='max'): """Moment matching initialization of kernel parameters. Implemented for 'truncated_gaussian' and 'raised_cosine' kernels. For the truncated gaussian kernel, the means $m_{i,j}$ and std $\\sigma_{i,j}$ are: $m_{i, j} = - \\frac{1}{N_{g_i}(T)}\\sum_{t_n^i \\in \\mathscr{F}_T^i} \\delta t^{i, j}_n$ + \\frac{1}{N_{g_i}(T)}\\sum_{t_n^i \\in \\mathscr{F}_T^i} + \\delta t^{i, j}_n$ $\\sigma_{i, j} = \\sqrt{\\dfrac{ \\sum_{t_n^i \\in \\mathscr{F}_T^i} (\\delta t^{i, j}_n - m_{i, j})^2 @@ -170,17 +171,15 @@ def momentmatching_kernel(solver, events, n_ground_events, kernel_params_init = [torch.ones(solver.n_dim, solver.n_dim), torch.ones(solver.n_dim, solver.n_dim)] for i in range(solver.n_dim): - print('i', i) for j in range(solver.n_dim): # Mean, std of time delta of [i, j] kernel if mode == 'max': delta_t = torch.zeros(int(n_ground_events[i].item())) for n in range(int(n_ground_events[i])): - print('n', n) - t_n_i = events[i][n][0] + t_n_i = events[i][n] t_n_j = torch.max( - torch.where(torch.tensor(events[j][:, 0]) < t_n_i, - torch.tensor(events[j][:, 0]), + torch.where(torch.tensor(events[j]) < t_n_i, + torch.tensor(events[j]), 0.) ) delta_t[n] = t_n_i - t_n_j @@ -189,8 +188,8 @@ def momentmatching_kernel(solver, events, n_ground_events, if mode == 'mean': delta_t = [] for n in range(int(n_ground_events[i])): - t_n_i = events[i][n, 0] - for t_n_j in events[j][:, 0]: + t_n_i = events[i][n] + for t_n_j in events[j]: if t_n_j < t_n_i - solver.W: continue if t_n_j >= t_n_i: @@ -217,7 +216,8 @@ def momentmatching_kernel(solver, events, n_ground_events, return kernel_params_init -def momentmatching(solver, events, n_ground_events, end_time): +def momentmatching_nomark(solver, events, n_ground_events, end_time, + mode='max'): """Moment matching initialization of baseline, alpha and kernel parameters. $\\mu_i^s = \frac{\\#\\mathscr{F}^i_T}{(D+1)T} \forall i \\in [1, D]$ $\\alpha_{i, j}^s = \\frac{1}{D+1} \\forall i,j \\in [1, D]$ @@ -235,5 +235,7 @@ def momentmatching(solver, events, n_ground_events, end_time): alpha = (alpha * solver.alpha_mask).requires_grad_(True) # Kernel parameters init - kernel_params_init = momentmatching_kernel(solver, events, n_ground_events) + kernel_params_init = momentmatching_kernel_nomark( + solver, events, n_ground_events, mode=mode + ) return baseline, alpha, kernel_params_init From d92cfbff2bc6705886d9f5956e5dfb9f84b8f0dd Mon Sep 17 00:00:00 2001 From: vloison Date: Fri, 31 May 2024 22:06:37 +0200 Subject: [PATCH 07/25] fix failing test --- fadin/tests/test_loss_and_gradient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fadin/tests/test_loss_and_gradient.py b/fadin/tests/test_loss_and_gradient.py index 03d3540..36e139a 100644 --- a/fadin/tests/test_loss_and_gradient.py +++ b/fadin/tests/test_loss_and_gradient.py @@ -207,7 +207,7 @@ def test_l2loss(): kernel_RC, n_events, delta, end_time) - assert torch.isclose(loss_conv_RC, loss_precomp_RC) + assert torch.isclose(1 + loss_conv_RC, 1 + loss_precomp_RC) def test_gradients(): From 649c2db6890695e6fbe857c9dc25db97523f72ae Mon Sep 17 00:00:00 2001 From: Virginie Loison <48970001+vloison@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:36:24 +0200 Subject: [PATCH 08/25] Update experiments/benchmark/run_benchmark_exp.py according to review Co-authored-by: Thomas Moreau --- experiments/benchmark/run_benchmark_exp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/benchmark/run_benchmark_exp.py b/experiments/benchmark/run_benchmark_exp.py index c25d467..c3ab80d 100644 --- a/experiments/benchmark/run_benchmark_exp.py +++ b/experiments/benchmark/run_benchmark_exp.py @@ -275,7 +275,7 @@ def run_experiment(baseline, alpha, decay, T, dt, seed=0): res['err_fadin'] = np.absolute( intens.numpy() - intens_fadin.numpy() - ).mean() + ).mean() res['time_fadin'] = results['time'] results = run_gibbs(S, size_grid, dt, seed=seed) From 42cecfb7c2b9cfa7deefb90ed19353854e783739 Mon Sep 17 00:00:00 2001 From: Virginie Loison <48970001+vloison@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:37:47 +0200 Subject: [PATCH 09/25] Update fadin/solver.py according to review Co-authored-by: Thomas Moreau --- fadin/solver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fadin/solver.py b/fadin/solver.py index 97583e8..1e0fad1 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -381,7 +381,7 @@ class FaDIn_no_precomputations(FaDIn): precomputations = False -class FaDIn_loglikelihood(FaDIn): +class FaDILogLikelihood(FaDIn): """Define the FaDIn framework for estimated Hawkes processes *with loglikelihood criterion instead of l2 loss*.""" optim_iteration = staticmethod(optim_iteration_loglikelihood) From bce234f4f1e84ff904a47c35b3437971c11b6480 Mon Sep 17 00:00:00 2001 From: Virginie Loison <48970001+vloison@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:39:11 +0200 Subject: [PATCH 10/25] Update fadin/solver.py All class names in CamelCase Co-authored-by: Thomas Moreau --- fadin/solver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fadin/solver.py b/fadin/solver.py index 1e0fad1..7e1895b 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -374,7 +374,7 @@ def fit(self, events, end_time): return self -class FaDIn_no_precomputations(FaDIn): +class FaDInNoPrecomputations(FaDIn): """Define the FaDIn framework for estimated Hawkes processes *without precomputations*.""" optim_iteration = staticmethod(optim_iteration_l2_noprecomput) From cc1e10fb78b68b4b6da8f9117bd8399d27915285 Mon Sep 17 00:00:00 2001 From: vloison Date: Wed, 12 Jun 2024 22:23:14 +0200 Subject: [PATCH 11/25] comment +1 in assert torch.isclose --- fadin/tests/test_loss_and_gradient.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fadin/tests/test_loss_and_gradient.py b/fadin/tests/test_loss_and_gradient.py index 36e139a..802b197 100644 --- a/fadin/tests/test_loss_and_gradient.py +++ b/fadin/tests/test_loss_and_gradient.py @@ -208,6 +208,7 @@ def test_l2loss(): delta, end_time) assert torch.isclose(1 + loss_conv_RC, 1 + loss_precomp_RC) + # + 1 to avoid error when loss is close to 0 def test_gradients(): From 33bc14eb704b35c0124fce5f20a0f4788ca572eb Mon Sep 17 00:00:00 2001 From: vloison Date: Wed, 12 Jun 2024 22:24:00 +0200 Subject: [PATCH 12/25] Move FaDInLoglikelihood and FaDInNoPrecomputations to experiments --- experiments/benchmark/run_comparison_ll.py | 55 ++++++++++++++++++- .../inference_error/run_comp_autodiff.py | 41 ++++++++++++-- fadin/loss_and_gradient.py | 51 ----------------- fadin/solver.py | 16 +----- 4 files changed, 90 insertions(+), 73 deletions(-) diff --git a/experiments/benchmark/run_comparison_ll.py b/experiments/benchmark/run_comparison_ll.py index 87a50c1..ead30a8 100644 --- a/experiments/benchmark/run_comparison_ll.py +++ b/experiments/benchmark/run_comparison_ll.py @@ -9,9 +9,58 @@ from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn, FaDIn_loglikelihood +from fadin.solver import FaDIn +from fadin.loss_and_gradient import discrete_l2_loss_conv + +# %% +################################ +# Define solver with loglikelihood criterion +################################ + + +def discrete_ll_loss_conv(intensity, events_grid, delta): + """Compute the LL discrete loss using convolutions. + + Parameters + ---------- + intensity : tensor, shape (n_dim, n_grid) + Values of the intensity function evaluated on the grid. + + events_grid : tensor, shape (n_dim, n_grid) + Events projected on the pre-defined grid. + + delta : float + Step size of the discretization grid. + """ + mask = events_grid > 0 + intens = torch.log(intensity[mask]) + return (intensity.sum(1) * delta - + intens.sum()).sum() / events_grid.sum() + + +def optim_iteration_loglikelihood(solver, events_grid, discretization, + i, n_events, end_time): + """One optimizer iteration of FaDIn_loglikelihood solver, + with loglikelihood loss. + """ + intens = solver.kernel_model.intensity_eval( + solver.params_intens[0], + solver.params_intens[1], + solver.params_intens[2:], + events_grid, + discretization + ) + loss = discrete_ll_loss_conv(intens, events_grid, solver.delta) + loss.backward() + + +class FaDInLogLikelihood(FaDIn): + """Define the FaDIn framework for estimated Hawkes processes *with + loglikelihood criterion instead of l2 loss*.""" + optim_iteration = staticmethod(optim_iteration_loglikelihood) + precomputations = False + -# %% simulate data # Simulated data ################################ @@ -78,7 +127,7 @@ def run_solver(criterion, events, kernel_params_init, device="cpu" ) elif criterion == 'll': - solver = FaDIn_loglikelihood( + solver = FaDInLogLikelihood( 1, kernel, init=init, diff --git a/experiments/inference_error/run_comp_autodiff.py b/experiments/inference_error/run_comp_autodiff.py index 3982d2f..8b913b6 100644 --- a/experiments/inference_error/run_comp_autodiff.py +++ b/experiments/inference_error/run_comp_autodiff.py @@ -10,7 +10,35 @@ from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn, FaDIn_no_precomputations +from fadin.solver import FaDIn +from fadin.loss_and_gradient import discrete_l2_loss_conv + +################################ +# Define solver without precomputations +################################ + + +def optim_iteration_l2_noprecomput(solver, events_grid, discretization, + i, n_events, end_time): + """One optimizer iteration of FaDIn_no_precomputations solver, + with l2 loss and no precomputations.""" + intens = solver.kernel_model.intensity_eval( + solver.params_intens[0], + solver.params_intens[1], + solver.params_intens[2:], + events_grid, + discretization + ) + loss = discrete_l2_loss_conv(intens, events_grid, solver.delta) + loss.backward() + + +class FaDInNoPrecomputations(FaDIn): + """Define the FaDIn framework for estimated Hawkes processes *without + precomputations*.""" + optim_iteration = staticmethod(optim_iteration_l2_noprecomput) + precomputations = False + ################################ # Meta parameters @@ -48,7 +76,11 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) kernels = [[tf]] hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) + baseline=baseline, + kernels=kernels, + end_time=T, + verbose=False, + seed=int(seed) ) hawkes.simulate() @@ -57,14 +89,15 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): @mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): +def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, + seed=0): max_iter = 800 init = { 'alpha': torch.tensor(alpha_init), 'baseline': torch.tensor(baseline_init), 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] } - solver_autodiff = FaDIn_no_precomputations( + solver_autodiff = FaDInNoPrecomputations( 1, "raised_cosine", init=init, diff --git a/fadin/loss_and_gradient.py b/fadin/loss_and_gradient.py index 6de6e3f..6565163 100644 --- a/fadin/loss_and_gradient.py +++ b/fadin/loss_and_gradient.py @@ -61,37 +61,6 @@ def optim_iteration_fadin(solver, events_grid, discretization, ) -def optim_iteration_l2_noprecomput(solver, events_grid, discretization, - i, n_events, end_time): - """One optimizer iteration of FaDIn_no_precomputations solver, - with l2 loss and no precomputations.""" - intens = solver.kernel_model.intensity_eval( - solver.params_intens[0], - solver.params_intens[1], - solver.params_intens[2:], - events_grid, - discretization - ) - loss = discrete_l2_loss_conv(intens, events_grid, solver.delta) - loss.backward() - - -def optim_iteration_loglikelihood(solver, events_grid, discretization, - i, n_events, end_time): - """One optimizer iteration of FaDIn_loglikelihood solver, - with loglikelihood loss. - """ - intens = solver.kernel_model.intensity_eval( - solver.params_intens[0], - solver.params_intens[1], - solver.params_intens[2:], - events_grid, - discretization - ) - loss = discrete_ll_loss_conv(intens, events_grid, solver.delta) - loss.backward() - - def discrete_l2_loss_conv(intensity, events_grid, delta): """Compute the l2 discrete loss using convolutions. @@ -116,26 +85,6 @@ def discrete_l2_loss_conv(intensity, events_grid, delta): (intensity * events_grid).sum(1)).sum()) / events_grid.sum() -def discrete_ll_loss_conv(intensity, events_grid, delta): - """Compute the LL discrete loss using convolutions. - - Parameters - ---------- - intensity : tensor, shape (n_dim, n_grid) - Values of the intensity function evaluated on the grid. - - events_grid : tensor, shape (n_dim, n_grid) - Events projected on the pre-defined grid. - - delta : float - Step size of the discretization grid. - """ - mask = events_grid > 0 - intens = torch.log(intensity[mask]) - return (intensity.sum(1) * delta - - intens.sum()).sum() / events_grid.sum() - - def discrete_l2_loss_precomputation(zG, zN, ztzG, baseline, alpha, kernel, n_events, delta, end_time): """Compute the l2 discrete loss using precomputation terms. diff --git a/fadin/solver.py b/fadin/solver.py index 7e1895b..9b22a79 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -7,7 +7,7 @@ from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ get_ztzG_approx from fadin.loss_and_gradient import optim_iteration_fadin, \ - optim_iteration_l2_noprecomput, optim_iteration_loglikelihood + optim_iteration_loglikelihood from fadin.kernels import DiscreteKernelFiniteSupport @@ -374,20 +374,6 @@ def fit(self, events, end_time): return self -class FaDInNoPrecomputations(FaDIn): - """Define the FaDIn framework for estimated Hawkes processes *without - precomputations*.""" - optim_iteration = staticmethod(optim_iteration_l2_noprecomput) - precomputations = False - - -class FaDILogLikelihood(FaDIn): - """Define the FaDIn framework for estimated Hawkes processes *with - loglikelihood criterion instead of l2 loss*.""" - optim_iteration = staticmethod(optim_iteration_loglikelihood) - precomputations = False - - def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, savefig=None): """ From 5c11fc7e28aaebdf778a7507121d668646ab61c8 Mon Sep 17 00:00:00 2001 From: vloison Date: Wed, 12 Jun 2024 22:34:01 +0200 Subject: [PATCH 13/25] clean imports --- experiments/benchmark/run_comparison_ll.py | 3 +-- fadin/solver.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/experiments/benchmark/run_comparison_ll.py b/experiments/benchmark/run_comparison_ll.py index ead30a8..4b76b58 100644 --- a/experiments/benchmark/run_comparison_ll.py +++ b/experiments/benchmark/run_comparison_ll.py @@ -10,9 +10,8 @@ from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn -from fadin.loss_and_gradient import discrete_l2_loss_conv -# %% +# %% ################################ # Define solver with loglikelihood criterion ################################ diff --git a/fadin/solver.py b/fadin/solver.py index 9b22a79..9895f3d 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -6,8 +6,7 @@ from fadin.utils.utils import optimizer, projected_grid, momentmatching_nomark from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ get_ztzG_approx -from fadin.loss_and_gradient import optim_iteration_fadin, \ - optim_iteration_loglikelihood +from fadin.loss_and_gradient import optim_iteration_fadin from fadin.kernels import DiscreteKernelFiniteSupport From 2ce478a22834721689a016d3a171bcfaf6fa1fce Mon Sep 17 00:00:00 2001 From: vloison Date: Thu, 13 Jun 2024 11:26:16 +0200 Subject: [PATCH 14/25] change optim_iteration to compute_gradient for clarity --- experiments/benchmark/run_comparison_ll.py | 8 ++--- .../inference_error/run_comp_autodiff.py | 6 ++-- fadin/loss_and_gradient.py | 33 +++++++++++++++---- fadin/solver.py | 6 ++-- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/experiments/benchmark/run_comparison_ll.py b/experiments/benchmark/run_comparison_ll.py index 4b76b58..1c7a29d 100644 --- a/experiments/benchmark/run_comparison_ll.py +++ b/experiments/benchmark/run_comparison_ll.py @@ -37,8 +37,8 @@ def discrete_ll_loss_conv(intensity, events_grid, delta): intens.sum()).sum() / events_grid.sum() -def optim_iteration_loglikelihood(solver, events_grid, discretization, - i, n_events, end_time): +def compute_gradient_loglikelihood(solver, events_grid, discretization, + i, n_events, end_time): """One optimizer iteration of FaDIn_loglikelihood solver, with loglikelihood loss. """ @@ -56,10 +56,10 @@ def optim_iteration_loglikelihood(solver, events_grid, discretization, class FaDInLogLikelihood(FaDIn): """Define the FaDIn framework for estimated Hawkes processes *with loglikelihood criterion instead of l2 loss*.""" - optim_iteration = staticmethod(optim_iteration_loglikelihood) + compute_gradient = staticmethod(compute_gradient_loglikelihood) precomputations = False - +################################ # Simulated data ################################ diff --git a/experiments/inference_error/run_comp_autodiff.py b/experiments/inference_error/run_comp_autodiff.py index 8b913b6..2ee116f 100644 --- a/experiments/inference_error/run_comp_autodiff.py +++ b/experiments/inference_error/run_comp_autodiff.py @@ -18,8 +18,8 @@ ################################ -def optim_iteration_l2_noprecomput(solver, events_grid, discretization, - i, n_events, end_time): +def compute_gradient_l2_noprecomput(solver, events_grid, discretization, + i, n_events, end_time): """One optimizer iteration of FaDIn_no_precomputations solver, with l2 loss and no precomputations.""" intens = solver.kernel_model.intensity_eval( @@ -36,7 +36,7 @@ def optim_iteration_l2_noprecomput(solver, events_grid, discretization, class FaDInNoPrecomputations(FaDIn): """Define the FaDIn framework for estimated Hawkes processes *without precomputations*.""" - optim_iteration = staticmethod(optim_iteration_l2_noprecomput) + compute_gradient = staticmethod(compute_gradient_l2_noprecomput) precomputations = False diff --git a/fadin/loss_and_gradient.py b/fadin/loss_and_gradient.py index 6565163..da7f445 100644 --- a/fadin/loss_and_gradient.py +++ b/fadin/loss_and_gradient.py @@ -1,10 +1,29 @@ import torch -def optim_iteration_fadin(solver, events_grid, discretization, - i, n_events, end_time): - """One optimizer iteration of FaDIn solver, with l2 loss and - precomputations. +def compute_gradient_fadin(solver, events_grid, discretization, + i, n_events, end_time): + """Updates gradients for optimizer iteration of FaDIn solver, + with l2 loss and precomputations. Gradients are updated inplace. + + Parameters + ---------- + solver : FaDIn + The FaDIn solver. + events_grid : tensor, shape (n_dim, n_grid) (optionnal) + Not necessary in this function, present for FaDIn derived classes. + discretization : tensor, shape (L,) + Discretization grid. + i : int + Optimizer iteration number. + n_events : tensor, shape (n_dim,) + Number of events for each dimension. + end_time : float + The end time of the Hawkes process. + + Returns + ------- + None """ # Compute kernel and gradient kernel = solver.kernel_model.kernel_eval( @@ -24,7 +43,7 @@ def optim_iteration_fadin(solver, events_grid, discretization, kernel, n_events, solver.delta, end_time).detach() - # Update baseline + # Update baseline gradient solver.params_intens[0].grad = get_grad_baseline( solver.zG, solver.params_intens[0], @@ -34,7 +53,7 @@ def optim_iteration_fadin(solver, events_grid, discretization, n_events, end_time ) - # Update alpha + # Update alpha gradient solver.params_intens[1].grad = get_grad_alpha( solver.zG, solver.zN, @@ -45,7 +64,7 @@ def optim_iteration_fadin(solver, events_grid, discretization, solver.delta, n_events ) - # Update kernel + # Update kernel gradient for j in range(solver.n_kernel_params): solver.params_intens[2 + j].grad = \ get_grad_eta( diff --git a/fadin/solver.py b/fadin/solver.py index 9895f3d..3afaeb2 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -6,7 +6,7 @@ from fadin.utils.utils import optimizer, projected_grid, momentmatching_nomark from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ get_ztzG_approx -from fadin.loss_and_gradient import optim_iteration_fadin +from fadin.loss_and_gradient import compute_gradient_fadin from fadin.kernels import DiscreteKernelFiniteSupport @@ -135,7 +135,7 @@ class FaDIn(object): If `log=True`, compute the loss accross iterations. If no early stopping, `n_iter` is equal to `max_iter`. """ - optim_iteration = staticmethod(optim_iteration_fadin) + compute_gradient = staticmethod(compute_gradient_fadin) precomputations = True def __init__(self, n_dim, kernel, init='random', optim_mask=None, @@ -333,7 +333,7 @@ def fit(self, events, end_time): flush=True) self.opt.zero_grad() - self.optim_iteration( + self.compute_gradient( self, events_grid, discretization, i, n_events, end_time ) self.opt.step() From c8695c28ed5e87e3422a3cef6649e28e8c92d1e8 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 13:42:40 +0200 Subject: [PATCH 15/25] condensate unit parameters and precomputations --- fadin/solver.py | 58 +++++++++++-------------------------------------- 1 file changed, 13 insertions(+), 45 deletions(-) diff --git a/fadin/solver.py b/fadin/solver.py index 3afaeb2..895ca88 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -3,9 +3,9 @@ import matplotlib.pyplot as plt import numpy as np -from fadin.utils.utils import optimizer, projected_grid, momentmatching_nomark -from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ - get_ztzG_approx +from fadin.utils.utils import optimizer, projected_grid, momentmatching_nomark, \ + init_kernel_parameters +from fadin.utils.compute_constants import compute_constants_fadin from fadin.loss_and_gradient import compute_gradient_fadin from fadin.kernels import DiscreteKernelFiniteSupport @@ -99,9 +99,6 @@ class FaDIn(object): ztzG is approximated with Toeplitz matrix not taking into account edge effects. - device : `str` in ``{'cpu' | 'cuda'}`` - Computations done on cpu or gpu. Gpu is not implemented yet. - log : `boolean`, `default=False` Record the loss values during the optimization. @@ -141,7 +138,7 @@ class FaDIn(object): def __init__(self, n_dim, kernel, init='random', optim_mask=None, kernel_length=1, delta=0.01, optim='RMSprop', params_optim=dict(), max_iter=2000, ztzG_approx=True, - device='cpu', log=False, grad_kernel=None, + log=False, grad_kernel=None, tol=10e-5, random_state=None): # Discretization parameters @@ -190,25 +187,9 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) if init == 'random' or self.moment_matching or init['kernel'] is None: - kernel_params_init = [] - if kernel == 'raised_cosine': - temp = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) - temp2 = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) - kernel_params_init.append(temp) - kernel_params_init.append(temp2) - elif kernel == 'truncated_gaussian': - temp = 0.25 * self.W * torch.rand(self.n_dim, self.n_dim) - temp2 = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) - kernel_params_init.append(temp) - kernel_params_init.append(temp2) - elif kernel == 'truncated_exponential': - kernel_params_init.append(2 * torch.rand(self.n_dim, - self.n_dim)) - else: - raise NotImplementedError( - 'kernel initial parameters of not \ - implemented kernel have to be given' - ) + kernel_params_init = init_kernel_parameters(kernel, + self.kernel_length, + self.n_dim) self.kernel_params_fixed = kernel_params_init @@ -239,11 +220,6 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, else: torch.manual_seed(random_state) - if torch.cuda.is_available() and device == 'cuda': - self.device = 'cuda' - else: - self.device = 'cpu' - def fit(self, events, end_time): """Learn the parameters of the Hawkes processes on a discrete grid. @@ -283,6 +259,7 @@ def fit(self, events, end_time): kernel_param = kernel_params_init[i].float().clip(1e-4) kernel_param.requires_grad_(True) self.params_intens.append(kernel_param) + self.opt = optimizer( self.params_intens, self.params_solver, @@ -292,20 +269,11 @@ def fit(self, events, end_time): #################################################### # Precomputations #################################################### - if self.precomputations: - start = time.time() - zG = get_zG(events_grid.double().numpy(), self.L) - zN = get_zN(events_grid.double().numpy(), self.L) - - if self.ztzG_approx: - ztzG = get_ztzG_approx(events_grid.double().numpy(), self.L) - else: - ztzG = get_ztzG(events_grid.double().numpy(), self.L) - - self.zG = torch.tensor(zG).float() - self.zN = torch.tensor(zN).float() - self.ztzG = torch.tensor(ztzG).float() - print('precomput:', time.time() - start) + start = time.time() + self.zG, self.zN, self.ztzG = compute_constants_fadin(events_grid, + self.L, + self.ztzG_approx) + print('precomput:', time.time() - start) #################################################### # save results From 4891015a11dfeda56b7172ee50a8acd8b8e6f7f2 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 13:42:59 +0200 Subject: [PATCH 16/25] condensate unit parameters and precomputations --- fadin/utils/compute_constants.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fadin/utils/compute_constants.py b/fadin/utils/compute_constants.py index 080d2c3..9515d61 100644 --- a/fadin/utils/compute_constants.py +++ b/fadin/utils/compute_constants.py @@ -1,5 +1,6 @@ import numba import numpy as np +import torch from scipy.linalg import toeplitz @@ -131,3 +132,20 @@ def get_ztzG_approx_(events_grid, L): for j in range(n_dim): ztzG[i, j] = toeplitz(diff_tau[i, j]) return ztzG + + +def compute_constants_fadin(events_grid, L, ztzG_approx=True): + """Compute all precomputations""" + zG = get_zG(events_grid.double().numpy(), L) + zN = get_zN(events_grid.double().numpy(), L) + + if ztzG_approx: + ztzG = get_ztzG_approx(events_grid.double().numpy(), L) + else: + ztzG = get_ztzG(events_grid.double().numpy(), L) + + zG = torch.tensor(zG).float() + zN = torch.tensor(zN).float() + ztzG = torch.tensor(ztzG).float() + + return zG, zN, ztzG From 541e9f715913f6cf7c1463ca3a0ec1084ccc51f3 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 13:43:04 +0200 Subject: [PATCH 17/25] condensate unit parameters and precomputations --- fadin/utils/utils.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index d0a85e3..a83603b 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -35,6 +35,26 @@ def kernel_deriv_norm(function, grad_function_param, delta): function * grad_function_param_sum) / (function_sum**2) +def init_kernel_parameters(kernel, kernel_length, n_dim): + kernel_params_init = [] + if kernel == 'raised_cosine': + temp = 0.5 * kernel_length * torch.rand(n_dim, n_dim) + temp2 = 0.5 * kernel_length * torch.rand(n_dim, n_dim) + kernel_params_init.append(temp) + kernel_params_init.append(temp2) + elif kernel == 'truncated_gaussian': + temp = 0.25 * kernel_length * torch.rand(n_dim, n_dim) + temp2 = 0.5 * kernel_length * torch.rand(n_dim, n_dim) + kernel_params_init.append(temp) + kernel_params_init.append(temp2) + elif kernel == 'truncated_exponential': + kernel_params_init.append(2 * torch.rand(n_dim, n_dim)) + else: + raise NotImplementedError('kernel initial parameters of not \ + implemented kernel have to be given') + return kernel_params_init + + def grad_kernel_callable(kernel, grad_kernel, kernel_params, time_values, L, lower, upper, n_dim): """Transform the callables ``kernel and ``grad_kernel`` into From c2fb1b5095c0111abcaa642fecd30292604b677b Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 13:46:03 +0200 Subject: [PATCH 18/25] condensate unit parameters and precomputations --- fadin/solver.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fadin/solver.py b/fadin/solver.py index 895ca88..b8f6da1 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -195,10 +195,10 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, self.n_kernel_params = len(kernel_params_init) - self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, - kernel, - self.W, 0, self.W, - grad_kernel) + # self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, + # kernel, + # self.W, 0, self.W, + # grad_kernel) self.kernel = kernel # Set optimizer self.params_intens = [self.baseline, self.alpha] From fb942ec9c817d36e96486abdc57fa098641d3161 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 13:57:49 +0200 Subject: [PATCH 19/25] correct tests --- fadin/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index a83603b..8afd4ec 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -210,7 +210,7 @@ def momentmatching_kernel_nomark(solver, events, n_ground_events, for n in range(int(n_ground_events[i])): t_n_i = events[i][n] for t_n_j in events[j]: - if t_n_j < t_n_i - solver.W: + if t_n_j < t_n_i - solver.kernel_length: continue if t_n_j >= t_n_i: break @@ -221,7 +221,7 @@ def momentmatching_kernel_nomark(solver, events, n_ground_events, if plot_delta: fig_delta, ax_delta = plt.subplots() ax_delta.hist(delta_t, bins=20) - ax_delta.set_xlim([0, solver.W]) + ax_delta.set_xlim([0, solver.kernel_length]) ax_delta.set_xlabel('Time') ax_delta.set_ylabel('Histogram') fig_delta.suptitle('Moment Matching delta_t') From 94e4032d10ec94be5d27aa52e574eb5e95dd1e33 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 13:57:54 +0200 Subject: [PATCH 20/25] correct tests --- fadin/solver.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fadin/solver.py b/fadin/solver.py index b8f6da1..42d5214 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -143,8 +143,8 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, # Discretization parameters self.delta = delta - self.W = kernel_length - self.L = int(self.W / delta) + self.kernel_length = kernel_length + self.L = int(kernel_length / delta) self.ztzG_approx = ztzG_approx # Optimizer parameters @@ -195,10 +195,10 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, self.n_kernel_params = len(kernel_params_init) - # self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, - # kernel, - # self.W, 0, self.W, - # grad_kernel) + self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, + kernel, self.kernel_length, + 0, self.kernel_length, + grad_kernel) self.kernel = kernel # Set optimizer self.params_intens = [self.baseline, self.alpha] @@ -238,7 +238,7 @@ def fit(self, events, end_time): Fitted parameters. """ n_grid = int(1 / self.delta) * end_time + 1 - discretization = torch.linspace(0, self.W, self.L) + discretization = torch.linspace(0, self.kernel_length, self.L) events_grid = projected_grid(events, self.delta, n_grid) n_events = events_grid.sum(1) n_ground_events = [events[i].shape[0] for i in range(len(events))] @@ -382,6 +382,7 @@ def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, solver.n_dim, kernel=solver.kernel, kernel_length=solver.W) + kappa_values = kernel.kernel_eval(solver.params_intens[-2:], discretization).detach() # Plot From 562ebd890ff0a96cc0f58cd4196802ef1000d1b2 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 15:07:10 +0200 Subject: [PATCH 21/25] remove tick from the simulation of experiments --- .../inference_error/run_error_discrete_EXP.py | 65 ++++--------- .../run_error_discrete_EXP_m.py | 72 +++------------ .../inference_error/run_error_discrete_RC.py | 77 +++++++--------- .../run_error_discrete_RC_m.py | 92 ++++++++----------- .../inference_error/run_error_discrete_TG.py | 80 +++++++--------- .../run_error_discrete_TG_m.py | 80 ++++++---------- fadin/solver.py | 4 +- 7 files changed, 161 insertions(+), 309 deletions(-) diff --git a/experiments/inference_error/run_error_discrete_EXP.py b/experiments/inference_error/run_error_discrete_EXP.py index d9bc853..d99a361 100644 --- a/experiments/inference_error/run_error_discrete_EXP.py +++ b/experiments/inference_error/run_error_discrete_EXP.py @@ -3,14 +3,11 @@ import itertools import time import numpy as np -import torch import pandas as pd from joblib import Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc -from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn - +from fadin.utils.utils_simu import simu_hawkes_cluster ################################ # Meta parameters @@ -32,56 +29,37 @@ # @mem.cache -def simulate_data(baseline, alpha, decay, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - Exp = DiscreteKernelFiniteSupport(dt, 1, kernel='truncated_exponential') - kernel_values = Exp.kernel_eval([torch.Tensor(decay)], discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps +def simulate_data(baseline, alpha, decay, T, seed=0): + kernel = 'expon' + events = simu_hawkes_cluster(T, baseline, alpha, kernel, + params_kernel={'scale': 1 / decay}, + random_state=seed) return events -events = simulate_data(baseline, alpha, decay, T, dt, seed=0) +events = simulate_data(baseline, alpha, decay, T, seed=0) # @mem.cache -def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): +def run_solver(events, T, dt, seed=0): start = time.time() max_iter = 2000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(decay_init)] - } - solver = FaDIn(2, + + solver = FaDIn(1, "truncated_exponential", - init=init, delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, kernel_length=10, log=False, - random_state=0, - device="cpu", + random_state=0 ) + print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item()]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), + param_alpha=solver.param_alpha[-10:].mean().item(), + param_kernel=[solver.param_kernel[0][-10:].mean().item()]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -93,13 +71,8 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): def run_experiment(baseline, alpha, decay, T, dt, seed=0): - v = 0.2 - events = simulate_data(baseline, alpha, decay, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - decay_init = decay + v - - results = run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed) + events = simulate_data(baseline, alpha, decay, T, seed=seed) + results = run_solver(events, T, dt, seed) return results @@ -136,3 +109,5 @@ def compute_norm2_error(s): # df['param_sigma'] = df['param_kernel'].apply(lambda x: x[1]) # , 'sigma': 0.3} + +# %% diff --git a/experiments/inference_error/run_error_discrete_EXP_m.py b/experiments/inference_error/run_error_discrete_EXP_m.py index 1d1d444..5361aad 100644 --- a/experiments/inference_error/run_error_discrete_EXP_m.py +++ b/experiments/inference_error/run_error_discrete_EXP_m.py @@ -4,14 +4,11 @@ import pandas as pd import time import numpy as np -import torch from joblib import Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc -from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn from fadin.utils.utils import l2_error - +from fadin.utils.utils_simu import simu_hawkes_cluster # mem = Memory(location=".", verbose=2) @@ -26,68 +23,33 @@ # @mem.cache -def simulate_data(baseline, alpha, decay, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - n_dim = decay.shape[0] - EXP = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_exponential') - - kernel_values = EXP.kernel_eval([torch.Tensor(decay)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps +def simulate_data(baseline, alpha, decay, T, seed=0): + kernel = 'expon' + events = simu_hawkes_cluster(T, baseline, alpha, kernel, + params_kernel={'scale': 1 / decay}, + random_state=seed) return events - # %% solver -## - # %% solver -# @mem.cache -def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(decay_init)] - } solver = FaDIn(2, "truncated_exponential", - init=init, delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, random_state=0, - device="cpu", ztzG_approx=True) print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0)]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), + param_alpha=solver.param_alpha[-10:].mean(0), + param_kernel=[solver.param_kernel[0][-10:].mean(0)]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -98,15 +60,9 @@ def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): def run_experiment(baseline, alpha, decay, T, dt, seed=0): - v = 0.2 - events = simulate_data(baseline, alpha, decay, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - decay_init = decay + v - - results = run_solver(events, decay_init, - baseline_init, alpha_init, - dt, T, seed) + events = simulate_data(baseline, alpha, decay, T, seed=seed) + results = run_solver(events, dt, T, seed) + return results diff --git a/experiments/inference_error/run_error_discrete_RC.py b/experiments/inference_error/run_error_discrete_RC.py index ffdec14..78977a8 100644 --- a/experiments/inference_error/run_error_discrete_RC.py +++ b/experiments/inference_error/run_error_discrete_RC.py @@ -6,10 +6,10 @@ import numpy as np import torch from joblib import Parallel, delayed, Memory -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn +from fadin.utils.utils_simu import simu_hawkes_cluster ################################ # Meta parameters @@ -31,58 +31,50 @@ u = mu - sigma -@mem.cache def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) u = mu - sigma - RC = DiscreteKernelFiniteSupport(dt, 1, kernel='raised_cosine') - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'u': u, 'sigma': sigma} + + def raised_cosine(x, **params): + rc = DiscreteKernelFiniteSupport(delta=dt, n_dim=1, + kernel='raised_cosine') + u = params['u'] + sigma = params['sigma'] + kernel_values = rc.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], + torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + raised_cosine, + params_kernel=params, + random_state=seed) return events + +events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0) # %% solver ## -@mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] - } - solver = FaDIn(2, + + solver = FaDIn(1, "raised_cosine", - init=init, delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, - random_state=0, - device="cpu") + random_state=0) + print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item(), - results['param_kernel'][1][-10:].mean().item()]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), + param_alpha=solver.param_alpha[-10:].mean().item(), + param_kernel=[solver.param_kernel[0][-10:].mean().item(), + solver.param_kernel[1][-10:].mean().item()]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -93,16 +85,9 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see # %% eval on grid ## def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - mu_init = mu + v - sigma_init = sigma - v - u_init = mu_init - sigma_init - results = run_solver(events, u_init, sigma_init, - baseline_init, alpha_init, - dt, T, seed) + results = run_solver(events, dt, T, seed) + return results diff --git a/experiments/inference_error/run_error_discrete_RC_m.py b/experiments/inference_error/run_error_discrete_RC_m.py index e170803..7f427a1 100644 --- a/experiments/inference_error/run_error_discrete_RC_m.py +++ b/experiments/inference_error/run_error_discrete_RC_m.py @@ -6,12 +6,11 @@ import numpy as np import torch from joblib import Parallel, delayed, Memory -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn from fadin.utils.utils import l2_error - +from fadin.utils.utils_simu import simu_hawkes_cluster mem = Memory(location=".", verbose=2) @@ -20,74 +19,61 @@ ################################ baseline = np.array([.1, .2]) -alpha = np.array([[1.5, 0.1], [0.1, 1.5]]) +alpha = np.array([[0.75, 0.1], [0.1, 0.75]]) mu = np.array([[0.4, 0.6], [0.55, 0.6]]) sigma = np.array([[0.3, 0.3], [0.25, 0.3]]) u = mu - sigma +dt = 0.01 +T = 1000 +size_grid = int(T / dt) + 1 + -@mem.cache def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) u = mu - sigma - n_dim = u.shape[0] - RC = DiscreteKernelFiniteSupport(dt, n_dim, kernel='raised_cosine') - - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'u': u, 'sigma': sigma} + + def raised_cosine(x, **params): + rc = DiscreteKernelFiniteSupport(delta=dt, n_dim=2, + kernel='raised_cosine') + u = params['u'] + sigma = params['sigma'] + kernel_values = rc.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], + torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + raised_cosine, + params_kernel=params, + random_state=seed) return events +events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0) + # %% solver ## -@mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): + +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] - } solver = FaDIn(2, "raised_cosine", - init=init, - delta=dt, optim="RMSprop", - step_size=1e-3, + delta=dt, + optim="RMSprop", max_iter=max_iter, log=False, random_state=0, - device="cpu", ztzG_approx=True) print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0), - results['param_kernel'][1][-10:].mean(0)]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), + param_alpha=solver.param_alpha[-10:].mean(0), + param_kernel=[solver.param_kernel[0][-10:].mean(0), + solver.param_kernel[1][-10:].mean(0)]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -98,16 +84,8 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - mu_init = mu - sigma_init = sigma + v - u_init = mu_init - sigma_init - results = run_solver(events, u_init, sigma_init, - baseline_init, alpha_init, - dt, T, seed) + results = run_solver(events, dt, T, seed) return results @@ -138,3 +116,5 @@ def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): df['err_u']**2 + df['err_sigma']**2) df.to_csv('results/error_discrete_RC_m.csv', index=False) + +# %% diff --git a/experiments/inference_error/run_error_discrete_TG.py b/experiments/inference_error/run_error_discrete_TG.py index 8cb2d7e..d882aa9 100644 --- a/experiments/inference_error/run_error_discrete_TG.py +++ b/experiments/inference_error/run_error_discrete_TG.py @@ -7,11 +7,10 @@ import numpy as np import torch from joblib import Memory, Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn - +from fadin.utils.utils_simu import simu_hawkes_cluster ################################ # Meta parameters @@ -33,59 +32,49 @@ sigma = np.array([[0.3]]) -@mem.cache def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - TG = DiscreteKernelFiniteSupport(dt, 1, kernel='truncated_gaussian') - kernel_values = TG.kernel_eval([torch.Tensor(m), torch.Tensor(sigma)], - discretization) # * dt - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'mu': m, 'sigma': sigma} + + def truncated_gaussian(x, **params): + tg = DiscreteKernelFiniteSupport(delta=dt, n_dim=1, + kernel='truncated_gaussian') + mu = params['mu'] + sigma = params['sigma'] + kernel_values = tg.kernel_eval( + [torch.Tensor(mu), torch.Tensor(sigma)], torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + truncated_gaussian, + params_kernel=params, + random_state=seed) return events events = simulate_data(baseline, alpha, m, sigma, T, dt, seed=0) +# %% -@mem.cache -def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): + +def run_solver(events, T, dt, seed=0): start = time.time() max_iter = 2000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] - } - solver = FaDIn(2, + solver = FaDIn(1, "truncated_gaussian", - init=init, delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, - random_state=0, - device="cpu") + random_state=seed) print(time.time() - start) - results = solver.fit(events, T) + solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item(), - results['param_kernel'][1][-10:].mean().item()]) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), + param_alpha=solver.param_alpha[-10:].mean().item(), + param_kernel=[solver.param_kernel[0][-10:].mean().item(), + solver.param_kernel[1][-10:].mean().item()]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -100,9 +89,7 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, see sigma_init = np.array([[np.random.rand() * 0.5]]) start = time.time() -results_1 = run_solver( - events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0 -) +results_1 = run_solver(events, T, dt, seed=0) baseline_our = results_1['param_baseline'] alpha_our = results_1['param_alpha'] print(np.abs(results_1['param_baseline'])) # baseline)) @@ -114,15 +101,8 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, see def run_experiment(baseline, alpha, m, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, m, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - m_init = m + v - sigma_init = sigma - v - results = run_solver(events, m_init, sigma_init, - baseline_init, alpha_init, - T, dt, seed) + results = run_solver(events, T, dt, seed) return results @@ -157,3 +137,5 @@ def compute_norm2_error(s): lambda x: compute_norm2_error(x), axis=1) df.to_csv('results/error_discrete_TG.csv', index=False) + +# %% diff --git a/experiments/inference_error/run_error_discrete_TG_m.py b/experiments/inference_error/run_error_discrete_TG_m.py index 56ddfbc..5dbcf15 100644 --- a/experiments/inference_error/run_error_discrete_TG_m.py +++ b/experiments/inference_error/run_error_discrete_TG_m.py @@ -6,11 +6,12 @@ import numpy as np import torch from joblib import Parallel, delayed, Memory -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc + from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn from fadin.utils.utils import l2_error +from fadin.utils.utils_simu import simu_hawkes_cluster mem = Memory(location=".", verbose=2) @@ -24,66 +25,47 @@ sigma = np.array([[0.3, 0.3], [0.25, 0.3]]) -@mem.cache def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - n_dim = m.shape[0] - TG = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_gaussian') - - kernel_values = TG.kernel_eval([torch.Tensor(m), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'mu': m, 'sigma': sigma} + + def truncated_gaussian(x, **params): + tg = DiscreteKernelFiniteSupport(delta=dt, n_dim=2, + kernel='truncated_gaussian') + mu = params['mu'] + sigma = params['sigma'] + kernel_values = tg.kernel_eval( + [torch.Tensor(mu), torch.Tensor(sigma)], torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + truncated_gaussian, + params_kernel=params, + random_state=seed) return events # %% solver @mem.cache -def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 - init = { - "baseline": baseline_init, - "alpha": alpha_init, - 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] - } + solver = FaDIn(2, "truncated_gaussian", - init=init, delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, - random_state=0, - device="cpu", + random_state=seed, ztzG_approx=True) print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0), - results['param_kernel'][1][-10:].mean(0)]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), + param_alpha=solver.param_alpha[-10:].mean(0), + param_kernel=[solver.param_kernel[0][-10:].mean(0), + solver.param_kernel[1][-10:].mean(0)]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -94,16 +76,8 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, see def run_experiment(baseline, alpha, m, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, m, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - m_init = m + v - sigma_init = sigma + v - - results = run_solver(events, m_init, sigma_init, - baseline_init, alpha_init, - dt, T, seed) + results = run_solver(events, dt, T, seed) return results diff --git a/fadin/solver.py b/fadin/solver.py index 42d5214..fcca13f 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -196,8 +196,8 @@ def __init__(self, n_dim, kernel, init='random', optim_mask=None, self.n_kernel_params = len(kernel_params_init) self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, - kernel, self.kernel_length, - 0, self.kernel_length, + kernel, self.kernel_length, + 0, self.kernel_length, grad_kernel) self.kernel = kernel # Set optimizer From 5ed638aba5148cb09282a3540c7c029db94d7e54 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 15:11:06 +0200 Subject: [PATCH 22/25] remove useless experiments and comparisons with tick --- .../inference_error/plot_comp_time_dim.py | 82 -------- .../plot_sensi_analysis_length.py | 78 -------- .../inference_error/run_comp_autodiff.py | 178 ------------------ .../run_sensi_analysis_length.py | 171 ----------------- experiments/inference_error/run_time_dim.py | 143 -------------- 5 files changed, 652 deletions(-) delete mode 100644 experiments/inference_error/plot_comp_time_dim.py delete mode 100644 experiments/inference_error/plot_sensi_analysis_length.py delete mode 100644 experiments/inference_error/run_comp_autodiff.py delete mode 100644 experiments/inference_error/run_sensi_analysis_length.py delete mode 100644 experiments/inference_error/run_time_dim.py diff --git a/experiments/inference_error/plot_comp_time_dim.py b/experiments/inference_error/plot_comp_time_dim.py deleted file mode 100644 index 669eecb..0000000 --- a/experiments/inference_error/plot_comp_time_dim.py +++ /dev/null @@ -1,82 +0,0 @@ -# %% get results -import numpy as np -import pickle -import matplotlib.pyplot as plt - - -FONTSIZE = 14 -plt.rcParams["figure.figsize"] = (5, 2.9) -plt.rcParams["axes.grid"] = True -plt.rcParams["axes.grid.axis"] = "y" -plt.rcParams["grid.linestyle"] = "--" -plt.rcParams["xtick.labelsize"] = FONTSIZE -plt.rcParams["ytick.labelsize"] = FONTSIZE -plt.rcParams["font.size"] = FONTSIZE -plt.rc("legend", fontsize=FONTSIZE - 1) - -# Load non-param results and FaDIn -file_name = "results/comp_time_dim.pkl" -open_file = open(file_name, "rb") -all_results = pickle.load(open_file) -open_file.close() - - -def get_results(results): - dim_list = results[-1]["n_dim_list"] - dt_list = results[-1]["dt_list"] - T_list = results[-1]["T_list"] - seeds = results[-1]["seeds"] - n_dim = len(dim_list) - n_dt = len(dt_list) - n_seeds = len(seeds) - n_T = len(T_list) - - our_results = np.zeros((n_dim, n_T, n_dt, n_seeds)) - tick_results = np.zeros((n_dim, n_T, n_dt, n_seeds)) - for i in range(n_dim): - for j in range(n_T): - for k in range(n_dt): - for m in range(n_seeds): - idx = ( - i * (n_T * n_dt * n_seeds) - + j * (n_dt * n_seeds) - + k * (n_seeds) - + m - ) - our_results[i, j, k, m] = all_results[idx][0]["comp_time"] - tick_results[i, j, k, m] = all_results[idx][1]["comp_time"] - return our_results, tick_results - - -comp_time_FaDIn, comp_time_tick = get_results(all_results) -# %% -n_dim_list = all_results[-1]['n_dim_list'][::2] -T_list = all_results[-1]['T_list'] -fig, ax = plt.subplots(1, 1) - -mk = ["s", "h"] -colors = ['C1', 'C0'] -ls = ['-.', '-'] -mksize = 8 -lw = 2 - -for i in range(len(T_list)): - ax.loglog(n_dim_list, comp_time_FaDIn.mean(3)[::2, i, 0], marker=mk[0], - markevery=1, linestyle=ls[i], markersize=mksize, c=colors[0]) - ax.fill_between(n_dim_list, - np.percentile(comp_time_FaDIn[::2, i, 0, :], 20, axis=1), - np.percentile(comp_time_FaDIn[::2, i, 0, :], 80, axis=1), - alpha=0.1, color=colors[0] - ) - ax.loglog(n_dim_list, comp_time_tick.mean(3)[::2, i, 0], marker=mk[1], - markevery=1, linestyle=ls[i], markersize=mksize, c=colors[1]) - ax.fill_between(n_dim_list, - np.percentile(comp_time_tick[::2, i, 0, :], 20, axis=1), - np.percentile(comp_time_tick[::2, i, 0, :], 80, axis=1), - alpha=0.1, color=colors[1]) - - -ax.set_xlim(2, 100) -ax.set_xlabel(r'$p$') -fig.tight_layout() -plt.savefig("plots/comparison/time_comparison_nonparam_dim.pdf", bbox_inches="tight") diff --git a/experiments/inference_error/plot_sensi_analysis_length.py b/experiments/inference_error/plot_sensi_analysis_length.py deleted file mode 100644 index 10ca49b..0000000 --- a/experiments/inference_error/plot_sensi_analysis_length.py +++ /dev/null @@ -1,78 +0,0 @@ -# %% -import numpy as np -import pandas as pd -import matplotlib -import matplotlib.pyplot as plt - -# from tueplots.bundles.iclr2023() -FONTSIZE = 14 -plt.rcParams["figure.figsize"] = (5, 3.2) -plt.rcParams["axes.grid"] = False -plt.rcParams["axes.grid.axis"] = "y" -plt.rcParams["grid.linestyle"] = "--" -plt.rcParams['xtick.labelsize'] = FONTSIZE -plt.rcParams['ytick.labelsize'] = FONTSIZE -plt.rcParams['font.size'] = FONTSIZE -plt.rcParams['mathtext.fontset'] = 'cm' -plt.rc('legend', fontsize=FONTSIZE - 1) - - -def plot_length_analysis(): - df = pd.read_csv('results/sensitivity_length.csv') - T = df["T"].unique() - T.sort() - - palette = [matplotlib.cm.viridis_r(x) for x in np.linspace(0, 1, 5)][1:] - - fig1 = plt.figure() - - df['estimates'] = 'EXP' - methods = [("EXP", "s-", None)] - - for m, ls, hatch in methods: - for i, t in enumerate(T): - this_df = df.query("T == @t and estimates == @m") - curve = this_df.groupby("W")["err_norm2"].quantile( - [0.25, 0.5, 0.75]).unstack() - plt.loglog( - curve.index, curve[0.5], ls, lw=4, c=palette[i], - markersize=10, markevery=3 - ) - plt.fill_between( - curve.index, curve[0.25], curve[0.75], alpha=0.2, - color=palette[i], hatch=hatch - ) - plt.xlim(1e-0, 1e2) - - fig1.tight_layout() - plt.xlabel(r'$W$') - plt.ylabel(r'$\ell_2$ error', fontdict={'color': 'k'}) - plt.savefig("plots/approx/sensi_w_exp_stat.pdf", bbox_inches='tight') - - fig2 = plt.figure() - - for m, ls, hatch in methods: - for i, t in enumerate(T): - this_df = df.query("T == @t and estimates == @m") - curve = this_df.groupby("W")["time"].quantile( - [0.25, 0.5, 0.75]).unstack() - plt.loglog( - curve.index, curve[0.5], ls, lw=4, c=palette[i], - markersize=10, markevery=3 - ) - plt.fill_between( - curve.index, curve[0.25], curve[0.75], alpha=0.2, - color=palette[i], hatch=hatch - ) - plt.xlim(1e-0, 1e2) - - fig2.tight_layout() - plt.xlabel(r'$W$') - plt.ylabel('Time (s.)', fontdict={'color': 'k'}) - plt.savefig("plots/approx/sensi_w_exp_time.pdf", bbox_inches='tight') - - -plt.close('all') - - -plot_length_analysis() diff --git a/experiments/inference_error/run_comp_autodiff.py b/experiments/inference_error/run_comp_autodiff.py deleted file mode 100644 index 2ee116f..0000000 --- a/experiments/inference_error/run_comp_autodiff.py +++ /dev/null @@ -1,178 +0,0 @@ -# %% import stuff -# import libraries -import itertools -import pickle -import time -import numpy as np -import torch -from joblib import Memory, Parallel, delayed - -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn -from fadin.loss_and_gradient import discrete_l2_loss_conv - -################################ -# Define solver without precomputations -################################ - - -def compute_gradient_l2_noprecomput(solver, events_grid, discretization, - i, n_events, end_time): - """One optimizer iteration of FaDIn_no_precomputations solver, - with l2 loss and no precomputations.""" - intens = solver.kernel_model.intensity_eval( - solver.params_intens[0], - solver.params_intens[1], - solver.params_intens[2:], - events_grid, - discretization - ) - loss = discrete_l2_loss_conv(intens, events_grid, solver.delta) - loss.backward() - - -class FaDInNoPrecomputations(FaDIn): - """Define the FaDIn framework for estimated Hawkes processes *without - precomputations*.""" - compute_gradient = staticmethod(compute_gradient_l2_noprecomput) - precomputations = False - - -################################ -# Meta parameters -################################ -dt = 0.1 -T = 1000 -size_grid = int(T / dt) + 1 - -mem = Memory(location=".", verbose=2) - -# %% simulate data -# Simulated data -################################ - -baseline = np.array([1.1]) -alpha = np.array([[0.8]]) -mu = np.array([[0.5]]) -sigma = np.array([[0.3]]) -u = mu - sigma - - -@mem.cache -def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - u = mu - sigma - RC = DiscreteKernelFiniteSupport(dt, 1, kernel='raised_cosine') - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) # * dt - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, - kernels=kernels, - end_time=T, - verbose=False, - seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - - -@mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, - seed=0): - max_iter = 800 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] - } - solver_autodiff = FaDInNoPrecomputations( - 1, - "raised_cosine", - init=init, - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, - random_state=0 - ) - start_autodiff = time.time() - solver_autodiff.fit(events, T) - time_autodiff = time.time() - start_autodiff - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] - } - solver_FaDIn = FaDIn( - 1, - "raised_cosine", - init=init, - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, - random_state=0 - ) - start_FaDIn = time.time() - solver_FaDIn.fit(events, T) - time_FaDIn = time.time() - start_FaDIn - - results = dict(time_autodiff=time_autodiff, time_FaDIn=time_FaDIn) - - results["seed"] = seed - results["T"] = T - results["dt"] = dt - - return results - - -# %% example -v = 0.2 -events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0) -baseline_init = baseline + v -alpha_init = alpha + v -mu_init = mu + v -sigma_init = sigma - v -u_init = mu_init - sigma_init -# run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0) -# %% eval on grid - - -def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): - v = 0.2 - events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - mu_init = mu + v - sigma_init = sigma - v - u_init = mu_init - sigma_init - results = run_solver(events, u_init, sigma_init, baseline_init, alpha_init, - T, dt, seed) - return results - - -T_list = [100000, 1000000] -dt_list = [0.1, 0.01] -seeds = np.arange(10) -info = dict(T_list=T_list, dt_list=dt_list, seeds=seeds) - -n_jobs = 1 -all_results = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(run_experiment)(baseline, alpha, mu, sigma, T, dt, seed=seed) - for T, dt, seed in itertools.product( - T_list, dt_list, seeds - ) -) -all_results.append(info) -file_name = "results/comp_autodiff.pkl" -open_file = open(file_name, "wb") -pickle.dump(all_results, open_file) -open_file.close() diff --git a/experiments/inference_error/run_sensi_analysis_length.py b/experiments/inference_error/run_sensi_analysis_length.py deleted file mode 100644 index 5b5cf1a..0000000 --- a/experiments/inference_error/run_sensi_analysis_length.py +++ /dev/null @@ -1,171 +0,0 @@ -# %% import stuff -# import libraries -import time -import itertools -import numpy as np -import torch -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc, HawkesKernelExp -from joblib import Parallel, delayed -import pandas as pd - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - -################################ -# Meta parameters -################################ -dt = 0.01 -T = 10_000 -kernel_length = 20 - -# %% Experiment -################################ - - -def simulate_data(baseline, alpha, decay, kernel_length, T, dt, seed=0): - L = int(kernel_length / dt) - discretization = torch.linspace(0, kernel_length, L) - n_dim = decay.shape[0] - EXP = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_exponential', - kernel_length=kernel_length, - upper=kernel_length) - kernel_values = EXP.kernel_eval([torch.Tensor(decay)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events_discrete = hawkes.timestamps - - tf = HawkesKernelExp(alpha.item(), decay.item()) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events_continuous = hawkes.timestamps - - return events_discrete, events_continuous - - -baseline = np.array([.1]) -alpha = np.array([[0.8]]) -decay = np.array([[1.]]) - -events_d, events_c = simulate_data(baseline, alpha, decay, kernel_length, T, dt, seed=0) -# %% solver - - -def run_solver(events, decay_init, baseline_init, - alpha_init, kernel_length, T, dt, seed=0): - start = time.time() - max_iter = 2000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(decay_init)] - } - solver = FaDIn(1, - "truncated_exponential", - init=init, - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter - ) - - print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item()] - ) - - results_["time"] = time.time() - start - results_['W'] = kernel_length - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - - -# %% Test - - -baseline = np.array([1.1]) -alpha = np.array([[0.8]]) -decay = np.array([[0.5]]) - -events_d, events_c = simulate_data(baseline, alpha, decay, kernel_length, T, dt, seed=0) - -v = 0.2 -baseline_init = baseline + v -alpha_init = alpha + v -decay_init = decay + v - -results = run_solver(events_c, decay_init, - baseline_init, alpha_init, - kernel_length, T, dt, seed=0) - -print(np.abs(results['param_baseline'] - baseline)) -print(np.abs(results['param_alpha'] - alpha)) -print(np.abs(results['param_kernel'][0] - decay)) - -# %% - - -def run_experiment(baseline, alpha, decay, kernel_length, T, dt, seed=0): - v = 0.2 - events_d, events_c = simulate_data(baseline, alpha, decay, kernel_length, - T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - decay_init = decay + v - - # results_d = run_solver(events_d, decay_init, baseline_init, alpha_init, - # kernel_length, T, dt, seed) - results_c = run_solver(events_c, decay_init, baseline_init, alpha_init, - kernel_length, T, dt, seed) - - return results_c # results_d - - -W_list = [1, 5, 10, 20, 50, 100] -T_list = [1000, 10000, 100_000, 1_000_000] -dt_list = [0.01] -seeds = np.arange(10) - - -n_jobs = 60 -all_results = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(run_experiment)(baseline, alpha, decay, W, T, dt, seed=seed) - for W, T, dt, seed in itertools.product( - W_list, T_list, dt_list, seeds - ) -) - -# save results -df = pd.DataFrame(all_results) -df['param_decay'] = df['param_kernel'].apply(lambda x: x[0]) -true_param = {'baseline': 1.1, 'alpha': 0.8, 'decay': 0.5} -for param, value in true_param.items(): - df[param] = value - - -def compute_norm2_error(s): - return np.sqrt(np.array([(s[param] - s[f'param_{param}'])**2 - for param in ['baseline', 'alpha', 'decay']]).sum()) - - -df['err_norm2'] = df.apply( - lambda x: compute_norm2_error(x), axis=1) - -df.to_csv('results/sensitivity_length.csv', index=False) diff --git a/experiments/inference_error/run_time_dim.py b/experiments/inference_error/run_time_dim.py deleted file mode 100644 index 6b37d68..0000000 --- a/experiments/inference_error/run_time_dim.py +++ /dev/null @@ -1,143 +0,0 @@ -# %% import stuff -# import libraries -import itertools -import time -import pickle -import numpy as np -import torch -from joblib import Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc -from tick.hawkes import HawkesBasisKernels - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - - -# %% simulate data -# Simulated data -################################ - -def simulate_data(n_dim, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - - baseline = 0.1 + np.zeros(n_dim) - - alpha = 0.001 + np.zeros((n_dim, n_dim)) - decay = 5 + np.zeros((n_dim, n_dim)) - - Exp = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_exponential') - kernel_values = Exp.kernel_eval([torch.Tensor(decay)], discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - ker = [] - for i in range(n_dim): - for j in range(n_dim): - k = kernel_values[i, j].double().numpy() - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - ker.append(tf) - - kernels = [[ker[j * n_dim + i] for i in range(n_dim)] for j in range(n_dim)] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - - -# %% -# %% solver - -def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): - start = time.time() - max_iter = 800 - n_dim = baseline_init.shape[0] - init = { - "baseline": baseline_init, - "alpha": alpha_init, - "kernel": [decay_init] - } - solver = FaDIn(n_dim, - "truncated_exponential", - init=init, - delta=dt, optim="RMSprop", - step_size=1e-3, - max_iter=max_iter, - log=False, - random_state=0, - device="cpu", - ztzG_approx=True) - - print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0)]) - results_["time"] = time.time() - start - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - - -def run_experiment(n_dim, T, dt, seed=0): - events = simulate_data(n_dim, T, dt, seed=seed) - baseline_init = np.array([np.random.rand()]) - alpha_init = np.array([[np.random.rand()]]) - decay_init = np.array([[np.random.rand()]]) - - start_our = time.time() - run_solver(events, decay_init, baseline_init, alpha_init, - T, dt, seed=0) - time_our = time.time() - start_our - - start_tick = time.time() - non_param = HawkesBasisKernels(1, n_basis=1, kernel_size=int(1 / dt), - max_iter=800, ode_tol=1e-15) - non_param.fit(events) - time_tick = time.time() - start_tick - - res_our = dict(comp_time=time_our, n_dim=n_dim, T=T, dt=dt, seed=seed) - - res_tick = dict(comp_time=time_tick, n_dim=n_dim, T=T, dt=dt, seed=seed) - - return res_our, res_tick - - -n_dim = 10 -dt = 0.01 -T = 1000 - -us, tick = run_experiment(n_dim, T, dt, seed=0) - -print("us is:", us['comp_time']) -print("tick is:", tick['comp_time']) -# %% run - - -n_dim_list = [2, 5, 10, 50, 100] -dt_list = [0.1] -T_list = [100_000, 1_000_000] -seeds = np.arange(10) - -info = dict(n_dim_list=n_dim_list, T_list=T_list, dt_list=dt_list, seeds=seeds) - -n_jobs = 70 -all_results = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(run_experiment)(n_dim, T, dt, seed=seed) - for n_dim, T, dt, seed in itertools.product( - n_dim_list, T_list, dt_list, seeds - ) -) - - -all_results.append(info) -file_name = "results/comp_time_dim.pkl" -open_file = open(file_name, "wb") -pickle.dump(all_results, open_file) -open_file.close() - -# %% From 68c5d89888335795e0d5e2a3bf722e7a4e967af8 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 17 Jun 2024 15:14:06 +0200 Subject: [PATCH 23/25] useless files --- experiments/example_multivariate.py | 105 ----------------- experiments/example_univariate.py | 108 ------------------ .../run_error_discrete_TG_m.py | 3 +- 3 files changed, 2 insertions(+), 214 deletions(-) delete mode 100644 experiments/example_multivariate.py delete mode 100644 experiments/example_univariate.py diff --git a/experiments/example_multivariate.py b/experiments/example_multivariate.py deleted file mode 100644 index 16da103..0000000 --- a/experiments/example_multivariate.py +++ /dev/null @@ -1,105 +0,0 @@ -# %% import stuff -# import libraries -import time -import numpy as np -import torch -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn -# from tick.hawkes import HawkesBasisKernels -################################ -# Meta parameters -################################ -dt = 0.01 -L = int(1 / dt) -T = 1_000_000 -size_grid = int(T / dt) + 1 - -# mem = Memory(location=".", verbose=2) - -# %% Experiment -################################ - - -# @mem.cache -def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - u = mu - sigma - n_dim = u.shape[0] - RC = DiscreteKernelFiniteSupport(dt, n_dim=n_dim, kernel='raised_cosine', - lower=0, upper=1) - - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - - -# @mem.cache -def run_solver(events, dt, T, - ztzG_approx, seed=0): - start = time.time() - max_iter = 2000 - solver = FaDIn( - 2, - "raised_cosine", - delta=dt, optim="RMSprop", max_iter=max_iter, - ztzG_approx=ztzG_approx, device='cpu', log=False, tol=10e-6 -) - - print(time.time() - start) - solver.fit(events, T) - results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), - param_alpha=solver.param_alpha[-10:].mean(0), - param_kernel=[solver.param_kernel[0][-10:].mean(0), - solver.param_kernel[1][-10:].mean(0)]) - results_["time"] = time.time() - start - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - - -baseline = np.array([.1, .2]) -alpha = np.array([[1.5, 0.1], [0.1, 1.5]]) -mu = np.array([[0.4, 0.6], [0.55, 0.6]]) -sigma = np.array([[0.3, 0.3], [0.25, 0.3]]) -u = mu - sigma -events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=1) - -print("events of the first process: ", events[0].shape[0]) -print("events of the second process: ", events[1].shape[0]) - - -# %% -v = 0.2 -baseline_init = baseline + v -alpha_init = alpha + v -mu_init = mu -sigma_init = sigma + v -u_init = mu_init - sigma_init -ztzG_approx = True -results = run_solver(events, dt, T, ztzG_approx, seed=0) - -# %% diff --git a/experiments/example_univariate.py b/experiments/example_univariate.py deleted file mode 100644 index f44727d..0000000 --- a/experiments/example_univariate.py +++ /dev/null @@ -1,108 +0,0 @@ -# %% import stuff -# import libraries -import time -import numpy as np -import torch -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - -################################ -# Meta parameters -################################ -dt = 0.01 -T = 1_000_000 -kernel_length = 1.5 -size_grid = int(T / dt) + 1 - -# mem = Memory(location=".", verbose=2) - -# %% Experiment -################################ - - -# @mem.cache -def simulate_data(baseline, alpha, a, b, kernel_length, T, dt, seed=0): - L = int(kernel_length / dt) - discretization = torch.linspace(0, kernel_length, L) - n_dim = a.shape[0] - kuma = DiscreteKernelFiniteSupport(dt, n_dim, kernel='kumaraswamy') - kernel_values = kuma.kernel_eval([torch.Tensor(a), torch.Tensor(b)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - -# %% solver - - -# @mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, - alpha_init, kernel_length, dt, T, seed=0): - start = time.time() - max_iter = 10000 - init = { - 'alpha': torch.tensor(alpha_init), - 'baseline': torch.tensor(baseline_init), - 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] - } - solver = FaDIn(1, - "kumaraswamy", - init=init, - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - max_iter=max_iter, criterion='l2', - ) - - print(time.time() - start) - solver.fit(events, T) - results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), - param_alpha=solver.param_alpha[-10:].mean().item(), - param_kernel=[solver.param_kernel[0][-10:].mean().item(), - solver.param_kernel[1][-10:].mean().item()] - ) - - results_["time"] = time.time() - start - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - -# %% Test - - -baseline = np.array([.1]) -alpha = np.array([[0.8]]) -middle = kernel_length / 2 -a = np.array([[2.]]) -b = np.array([[2.]]) - - -events = simulate_data(baseline, alpha, a, b, kernel_length, T, dt, seed=0) - -v = 0.2 -baseline_init = baseline + v -alpha_init = alpha + v -a_init = np.array([[1.]]) -b_init = np.array([[1.]]) - -results = run_solver(events, a_init, b_init, - baseline_init, alpha_init, 1.5, - dt, T, seed=0) - -print(np.abs(results['param_baseline'] - baseline)) -print(np.abs(results['param_alpha'] - alpha)) -print(np.abs(results['param_kernel'][0] - a)) -print(np.abs(results['param_kernel'][1] - b)) diff --git a/experiments/inference_error/run_error_discrete_TG_m.py b/experiments/inference_error/run_error_discrete_TG_m.py index 5dbcf15..5cd2c9b 100644 --- a/experiments/inference_error/run_error_discrete_TG_m.py +++ b/experiments/inference_error/run_error_discrete_TG_m.py @@ -47,7 +47,6 @@ def truncated_gaussian(x, **params): # %% solver -@mem.cache def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 @@ -107,3 +106,5 @@ def run_experiment(baseline, alpha, m, sigma, T, dt, seed=0): df['err_m']**2 + df['err_sigma']**2) df.to_csv('results/error_discrete_TG_m.csv', index=False) + +# %% From 5df41213441ecba4c0f679d07ad48e945600ee1d Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 29 Jul 2024 16:05:53 +0200 Subject: [PATCH 24/25] add unhap solver --- fadin/kernels.py | 6 + fadin/loss_and_gradient.py | 279 ++++++++++++++++++++++ fadin/solver.py | 383 ++++++++++++++++++++++++++++++- fadin/utils/compute_constants.py | 106 +++++++++ fadin/utils/utils.py | 157 ++++++++++++- fadin/utils/utils_simu.py | 203 +++++++++++----- 6 files changed, 1065 insertions(+), 69 deletions(-) diff --git a/fadin/kernels.py b/fadin/kernels.py index cd7f687..45a37a3 100644 --- a/fadin/kernels.py +++ b/fadin/kernels.py @@ -125,6 +125,12 @@ def grad_eval(self, kernel_params, time_values): truncated_gaussian | truncated_exponential") return grad_values + def kernel_and_grad(self, kernel_params, time_values): + kernel_values = self.kernel_eval(kernel_params, time_values) + grad_values = self.grad_eval(kernel_params, time_values) + + return kernel_values, grad_values + def intensity_eval(self, baseline, alpha, kernel_params, events_grid, time_values): """Return the intensity function evaluated on the entire grid. diff --git a/fadin/loss_and_gradient.py b/fadin/loss_and_gradient.py index da7f445..5ed18be 100644 --- a/fadin/loss_and_gradient.py +++ b/fadin/loss_and_gradient.py @@ -494,3 +494,282 @@ def get_grad_eta(zG, zN, ztzG, baseline, alpha, kernel, grad_theta = grad_theta_ / n_events.sum() return grad_theta + + +def get_grad_baseline_mixture(phi_tilde, baseline, alpha, kernel, + delta, end_time, rho, square_int_hawkes, + vec_mark_hawkes, n_ground_events): + """ + marks: list of vector of marks (size number of events) + square_int_marks: integral of the square mark in the left part of the loss + rho: list of vector of size (number of events) + """ + + n_dim, _, _ = kernel.shape + + grad_baseline_ = torch.zeros(n_dim) + for m in range(n_dim): + temp = 0. + for j in range(n_dim): + temp += alpha[m, j] * (phi_tilde[j] @ kernel[m, j]) + grad_baseline_[m] = delta * temp * square_int_hawkes[m] + grad_baseline_[m] += end_time * baseline[m] * square_int_hawkes[m] + grad_baseline_[m] -= vec_mark_hawkes[m] @ rho[m] + + grad_baseline = 2 * grad_baseline_ / n_ground_events.sum() + + return grad_baseline + + +def get_grad_baseline_noise_mixture(baseline_noise, kernel, + end_time, rho, n_ground_events, + square_int_noise, vec_mark_noise): + n_dim, _, _ = kernel.shape + + grad_baseline_noise_ = torch.zeros(n_dim) + for m in range(n_dim): + + grad_baseline_noise_[m] += end_time * baseline_noise[m] * \ + square_int_noise[m] + grad_baseline_noise_[m] -= vec_mark_noise[m].sum() - \ + vec_mark_noise[m] @ rho[m] + + grad_baseline_noise = 2 * grad_baseline_noise_ / n_ground_events.sum() + + return grad_baseline_noise + + +def get_grad_eta_mixture(precomputations, baseline, alpha, kernel, + grad_kernel, delta, square_int_hawkes, n_ground_events, + new=False): + + phi_tilde, phi_tilde_events, psi_tilde, xi_tilde = precomputations + n_dim, _, L = kernel.shape + grad_theta_ = torch.zeros(n_dim, n_dim) + + for m in range(n_dim): + cst = 2 * delta * (baseline[m] * square_int_hawkes[m]) # + baseline_noise[m]) + for n in range(n_dim): + grad_theta_[m, n] = cst * alpha[m, n] * (grad_kernel[m, n] @ phi_tilde[n]) + + grad_theta_[m, n] -= 2 * alpha[m, n] * ( + grad_kernel[m, n] @ phi_tilde_events[m, n]) + if new: + grad_theta_[m, n] += 2 * delta * (alpha[m, n] * grad_kernel[m, n] * + kernel[m, n] * xi_tilde[n]).sum() + + temp = 0 + for k in range(n_dim): + cst2 = alpha[m, n] * alpha[m, k] + temp_ = 0 + temp_ += 2 * (kernel[m, k].view(1, L) + * (psi_tilde[n, k] * grad_kernel[m, n].view(L, 1)).sum(0)) + + temp += cst2 * temp_.sum() + + grad_theta_[m, n] += delta * temp * square_int_hawkes[m] + + grad_theta = grad_theta_ / n_ground_events.sum() + + return grad_theta + + +def get_grad_alpha_mixture(phi_tilde, phi_tilde_events, psi_tilde, + baseline, alpha, kernel, + delta, square_int_hawkes, n_ground_events, + xi_tilde, new=False): + """Return the gradient of the discrete l2 loss w.r.t. alpha. + + .. math:: + N_T\\frac{\\partial\\mathcal{L}_G}{\\partial \\alpha_{ml}} = + 2\\Delta \\mu_m \\sum_{\\tau=1}^{L} \\frac{\\partial + \\phi_{m,l}^\\Delta[\\tau]}{\\partial \\alpha_{m,l}} \\Phi_l(\\tau; G) + + 2 \\Delta \\sum_{k=1}^{p} \\sum_{\\tau=1}^{L} \\sum_{\\tau'=1}^{L} + \\phi_{mk}^\\Delta[\\tau'] \\frac{\\partial \\phi_{m,l}^\\Delta[\\tau]} + {\\partial \\alpha_{m,l}} \\Psi_{l,k}(\\tau, \\tau'; G) + + Parameters + ---------- + zG : tensor, shape (n_dim, L) + + zN : tensor, shape (n_dim, L) + + ztzG : tensor, shape (n_dim, n_dim, L, L) + + baseline : tensor, shape (n_dim,) + Baseline parameter of the intensity of the Hawkes process. + + alpha : tensor, shape (n_dim, n_dim) + Alpha parameter of the intensity of the Hawkes process. + + kernel : tensor, shape (n_dim, n_dim, L) + Kernel values on the discretization. + + delta : float + Step size of the discretization grid. + + n_ground_events : tensor, shape (n_dim,) + Number of events for each dimension. + + Returns + ---------- + grad_alpha : tensor, shape (n_dim, n_dim) + """ + n_dim, _, _ = kernel.shape + + grad_alpha_ = torch.zeros(n_dim, n_dim) + for m in range(n_dim): + dk = delta * (baseline[m] * square_int_hawkes[m]) # + baseline_noise[k]) + for n in range(n_dim): + temp = 0 + for j in range(n_dim): + temp += alpha[m, j] * (torch.outer(kernel[m, n], kernel[m, j]) * + psi_tilde[n, j]).sum() + grad_alpha_[m, n] += delta * temp * square_int_hawkes[m] + grad_alpha_[m, n] += dk * kernel[m, n] @ phi_tilde[n] + grad_alpha_[m, n] -= phi_tilde_events[m, n] @ kernel[m, n] + + if new: + grad_alpha_[m, n] += (delta * kernel[m, n] * kernel[m, n] * + alpha[m, n] * xi_tilde[n]).sum() + + grad_alpha = 2 * grad_alpha_ / n_ground_events.sum() + + return grad_alpha + + +def get_grad_rho_mixture(z_tilde, + marks_grid, + kernel, + square_int_hawkes, + param_intens, + delta, + mask_void, + n_events, + marked_quantities, + new=False): + + baseline = param_intens[0] + baseline_noise = param_intens[1] + alpha = param_intens[2] + vec_mark_hawkes = marked_quantities[1] + vec_mark_noise = marked_quantities[3] + + n_dim, n_grid = z_tilde.shape + L = kernel.shape[-1] # int(1 / delta) + grad_rho = torch.zeros(n_dim, n_grid) + kernel = alpha.unsqueeze(2) * kernel + + first_term = torch.zeros(n_dim, n_grid) + second_term = torch.zeros(n_dim, n_grid) + second_term_bis = torch.zeros(n_dim, n_grid) + third_term = torch.zeros(n_dim, n_grid) + fourth_term = torch.zeros(n_dim, n_grid) + fifth_term = torch.zeros(n_dim, n_grid) + fifth_term_ = torch.zeros(n_dim, n_dim, n_grid) + sixth_term = torch.zeros(n_dim, n_grid) + sixth_term_ = torch.zeros(n_dim, n_dim, n_grid) + temp_second = torch.zeros(n_dim, n_dim, n_dim, n_grid) + for m in range(n_dim): + third_term[m] -= baseline_noise[m] * vec_mark_noise[m] + fourth_term[m] = baseline[m] * vec_mark_hawkes[m] + + # approximation border effects: + cst1_m = delta * ( + square_int_hawkes.unsqueeze(1) * baseline.unsqueeze(1) * kernel[:, m]).sum() + for i in range(n_dim): + + cst2_i = delta * square_int_hawkes[i] + for k in range(n_dim): + kernel_product = torch.outer(kernel[i, m], kernel[i, k]) + for diff_tau in range(1, L): + ker = torch.diag(kernel_product, -diff_tau).sum() + temp_pos = marks_grid[m, :n_grid-diff_tau] * z_tilde[k, diff_tau:] + temp_second[m, i, k, :n_grid-diff_tau] += ker * temp_pos + + for diff_tau in range(-L+1, 1): + ker = torch.diag(kernel_product, -diff_tau).sum() + temp_neg = marks_grid[m, -diff_tau:] * z_tilde[k, :n_grid+diff_tau] + temp_second[m, i, k, -diff_tau:] += ker * temp_neg + + for tau in range(L): + temp_fifth = kernel[m, i, tau] * vec_mark_hawkes[m, tau:] * \ + z_tilde[i, :n_grid-tau] + # temp_fifth has different size depending on tau + fifth_term_[m, i, tau:] += temp_fifth + temp_sixth = kernel[i, m, tau] * vec_mark_hawkes[m, :n_grid-tau] * \ + z_tilde[i, tau:] + # temp_sixth has different size depending on tau + sixth_term_[m, i, tau:] += temp_sixth + + temp_second[m, i, :, :] *= cst2_i + + fifth_term[m] = fifth_term_[m].sum(0) # we sum over i + sixth_term[m] = sixth_term_[m].sum(0) # we sum over i + + first_term[m] = cst1_m * marks_grid[m] + second_term = temp_second.sum((1, 2)) + second_term_bis[m] = 0.5 * delta * (kernel[:, m] ** 2).sum() * marks_grid[m] * ( + marks_grid[m] - 2 * z_tilde[m]) + if new: + grad_rho[m] = first_term[m] + second_term[m] + second_term_bis[m] - ( + third_term[m] + fourth_term[m] + fifth_term[m] + sixth_term[m]) + else: + grad_rho[m] = first_term[m] + second_term[m] - ( + third_term[m] + fourth_term[m] + fifth_term[m] + sixth_term[m]) + + grad = 2 * grad_rho / n_events.sum() + grad[mask_void] = 0. + return grad + + +def compute_base_gradients(precomputations, + params_intens, + kernel, + delta, + end_time, + param_rho, + marked_quantities, + n_ground_events): + + square_int_hawkes, marks_grid_hawkes, square_int_noise, marks_grid_noise = \ + marked_quantities + phi_tilde, phi_tilde_events, psi_tilde, xi_tilde = precomputations + + grad_baseline = get_grad_baseline_mixture( + phi_tilde, + params_intens[0], + params_intens[2], + kernel, + delta, + end_time, + param_rho, + square_int_hawkes, + marks_grid_hawkes, + n_ground_events + ) + + grad_baseline_noise = get_grad_baseline_noise_mixture( + params_intens[1], + kernel, + end_time, + param_rho, + n_ground_events, + square_int_noise, + marks_grid_noise + ) + + grad_alpha = get_grad_alpha_mixture( + phi_tilde, + phi_tilde_events, + psi_tilde, + params_intens[0], + params_intens[2], + kernel, + delta, + square_int_hawkes, + n_ground_events, + xi_tilde + ) + + return grad_baseline, grad_baseline_noise, grad_alpha diff --git a/fadin/solver.py b/fadin/solver.py index fcca13f..a98fff7 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -3,10 +3,13 @@ import matplotlib.pyplot as plt import numpy as np -from fadin.utils.utils import optimizer, projected_grid, momentmatching_nomark, \ - init_kernel_parameters -from fadin.utils.compute_constants import compute_constants_fadin -from fadin.loss_and_gradient import compute_gradient_fadin +from fadin.utils.utils import projected_grid, momentmatching_nomark, \ + init_kernel_parameters, smooth_projection_marked, init_saving_params, \ + smart_init_noise, optimizer_fadin, optimizer_unhap +from fadin.utils.compute_constants import compute_constants_fadin, \ + compute_marked_quantities, compute_constants_unhap +from fadin.loss_and_gradient import compute_gradient_fadin, compute_base_gradients, \ + get_grad_eta_mixture, get_grad_rho_mixture from fadin.kernels import DiscreteKernelFiniteSupport @@ -260,7 +263,7 @@ def fit(self, events, end_time): kernel_param.requires_grad_(True) self.params_intens.append(kernel_param) - self.opt = optimizer( + self.opt = optimizer_fadin( self.params_intens, self.params_solver, solver=self.solver @@ -341,6 +344,376 @@ def fit(self, events, end_time): return self +class UNHaP(object): + """Define the UNHaP framework for estimated mixture of Hawkes and + Poisson processes. + + MarkedFaDIn minimizes the discretized L2 loss of Hawkes processes + defined by the intensity as the sum of a baseline parameter + mhath: `\\mu_i` and a convolution between the kernel + :math:`\\phi_{ij}` and the sum of Dirac functions weighed by their marks + :math:`z_i := \\sum_{t^i_n \\in \\mathscr{F}^i_T} \\delta_{t^i_n}m(t^i_n)` + located at the event occurrences :math:`t^i_n`: + + .. math:: + \\forall i \\in [1 \\dots p], \\quad + \\lambda_i(t) = \\mu_i + \\sum_{j=1}^p \\phi_{ij} * z_j(t), + \\quad t \\in [0, T] + + where + + * :math:`p` is the dimension of the process + * :math:`\\mu_i` are the baseline intensities + * :math:`\\phi_{ij}` are the kernels + * :math:`z_j(t)` are the marked vector on the discretized grid. + + Parameters + ---------- + n_dim : `int` + Dimension of the underlying Hawkes process. + + kernel : `str` or `callable` + Either define a kernel in ``{'raised_cosine' | 'truncated_gaussian' | + 'truncated_exponential'}`` or a custom kernel. + + baseline_mask : `tensor` of shape (n_dim,), or `None` + Tensor of same shape as the baseline vector, with values in (0, 1). + `baseline` coordinates where `baseline_mask` is equal to 0 + will stay constant equal to zero and not be optimized. + If set to `None`, all coordinates of baseline will be optimized. + + alpha_mask : `tensor` of shape (n_dim, n_dim), or `None` + Tensor of same shape as the `alpha` tensor, with values in (0, 1). + `alpha` coordinates and kernel parameters where `alpha_mask` = 0 + will not be optimized. + If set to `None`, all coordinates of alpha will be optimized, + and all kernel parameters will be optimized if optimize_kernel=`True`. + + kernel_length : `float`, `default=1.` + Length of kernels in the Hawkes process. + + delta : `float`, `default=0.01` + Step size of the discretization grid. + + optim : `str` in ``{'RMSprop' | 'Adam' | 'GD'}``, default='RMSprop' + The algorithm used to optimize the Hawkes processes parameters. + + params_optim : dict, {'lr', ...} + Learning rate and parameters of the chosen optimization algorithm. + + max_iter : `int`, `default=2000` + Maximum number of iterations during fit. + + ztzG_approx : `boolean`, `default=True` + If ztzG_approx is false, compute the true ztzG precomputation constant + that is the computational bottleneck of FaDIn. if ztzG_approx is true, + ztzG is approximated with Toeplitz matrix not taking into account + edge effects. + + batch_rho : `int` + Number of FaDIn iterations between latent variables rho updates. + + tol : `float`, `default=1e-5` + The tolerance of the solver (iterations stop when the stopping + criterion is below it). If not reached the solver does 'max_iter' + iterations. + + density_hawkes : `str`, `default=linear` + Density of the marks of the Hawkes process, in ``{'linear' | 'uniform'}``. + + density_noise : `str`, `default=reverse_linear` + Density of the marks of the Hawkes process, in + ``{'reverse_linear' | 'uniform'}``. + + moment_matching: boolean, `default=False` + If set to False, baseline, alpha and kernel parameters are randomly + chosen. If set to True, baseline, alpha and kernel parameters are + chosen using the smart init strategy. + The smart init strategy is only implemented + for truncated gaussian and raised_cosine kernels. + + random_state : `int`, `RandomState` instance or `None`, `default=None` + Set the torch seed to 'random_state'. + If set to `None`, torch seed will be set to 0. + + Attributes + ---------- + param_baseline : `tensor`, shape (n_dim) + Baseline parameter of the Hawkes process. + + param_baseline_noise : `tensor`, shape (n_dim) + Baseline parameter of the Hawkes process. + + param_alpha : `tensor`, shape (n_dim, n_dim) + Weight parameter of the Hawkes process. + + param_kernel : `list` of `tensor` + list containing tensor array of kernels parameters. + The size of the list varies depending the number of + parameters. The shape of each tensor is `(n_dim, n_dim)`. + + """ + def __init__(self, n_dim, kernel, baseline_mask=None, alpha_mask=None, + kernel_length=1, delta=0.01, optim='RMSprop', + params_optim=dict(), max_iter=2000, batch_rho=100, + ztzG_approx=True, tol=10e-5, density_hawkes='linear', + density_noise='uniform', moment_matching=False, random_state=None): + + # Set discretization parameters + self.delta = delta + self.kernel_length = kernel_length + self.L = int(self.kernel_length / delta) + self.ztzG_approx = ztzG_approx + + # Set optimizer parameters + self.solver = optim + self.max_iter = max_iter + self.tol = tol + self.batch_rho = batch_rho + + self.density_hawkes = density_hawkes + self.density_noise = density_noise + self.moment_matching = moment_matching + + # Set model parameters + self.n_dim = n_dim + + self.baseline = torch.rand(self.n_dim) + + if baseline_mask is None: + self.baseline_mask = torch.ones([n_dim]) + else: + assert baseline_mask.shape == self.baseline.shape, \ + "Invalid baseline_mask shape, must be (n_dim,)" + self.baseline_mask = baseline_mask + self.baseline = (self.baseline * self.baseline_mask).requires_grad_(True) + + self.baseline_noise = torch.rand(self.n_dim).requires_grad_(True) + + self.alpha = torch.rand(self.n_dim, self.n_dim) + + if alpha_mask is None: + self.alpha_mask = torch.ones([self.n_dim, self.n_dim]) + else: + assert alpha_mask.shape == self.alpha.shape, \ + "Invalid alpha_mask shape, must be (n_dim, n_dim)" + self.alpha_mask = alpha_mask + self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) + + kernel_params_init = init_kernel_parameters(kernel, + self.kernel_length, + self.n_dim) + + self.n_kernel_params = len(kernel_params_init) + + self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, + kernel, self.kernel_length, 0) + self.kernel = kernel + # Set optimizer + self.params_intens = [self.baseline, self.baseline_noise, self.alpha] + + for i in range(self.n_kernel_params): + self.params_intens.append( + kernel_params_init[i].float().clip(1e-4).requires_grad_(True)) + + # If the learning rate is not given, fix it to 1e-3 + if 'lr' not in params_optim.keys(): + params_optim['lr'] = 1e-3 + + self.params_solver = params_optim + + # device and seed + if random_state is None: + torch.manual_seed(0) + else: + torch.manual_seed(random_state) + + def fit(self, events, end_time): + """Learn the parameters of the Hawkes processes on a discrete grid. + + Parameters + ---------- + events : pandas DataFrame with three columns: timestamps, marks values + and type of events. + + end_time : int + The end time of the Hawkes process. + + Returns + ------- + self : object + Fitted parameters. + """ + n_grid = int(1 / self.delta) * end_time + 1 + discretization = torch.linspace(0, self.kernel_length, self.L) + + events_grid, marks_grid, marked_events, _ = \ + smooth_projection_marked(events, self.delta, n_grid) + + sum_marks = marks_grid.sum(1) + print('sum of marks per channel:', sum_marks) + + # number of events per dimension + n_ground_events = events_grid.sum(1) + print('number of events is:', n_ground_events) + + self.sum_marks = sum_marks + self.n_ground_events = n_ground_events + + mask_events = torch.where(events_grid > 0) + mask_void = torch.where(events_grid == 0) + + # computation of the marked quantities + marked_quantities = compute_marked_quantities(events_grid, marks_grid, + self.n_dim, self.density_hawkes, + self.density_noise) + + self.rho = torch.zeros(self.n_dim, n_grid) + self.rho[mask_events] = 0.5 # Init + self.rho = self.rho.requires_grad_(True) + # add rho parameter at the end of params + self.params_mixture = [self.rho] + + if self.moment_matching: + # Smart initialization of solver parameters + self.baseline, self.baseline_noise, self.alpha, kernel_params_init = \ + smart_init_noise(self, marked_events, n_ground_events, end_time) + + # Set optimizer with smartinit parameters + self.params_intens = [self.baseline, self.baseline_noise, self.alpha] + + for i in range(2): # range(self.n_params_kernel) + self.params_intens.append( + kernel_params_init[i].float().clip(1e-4).requires_grad_(True) + ) + + self.opt_intens, self.opt_mixture = optimizer_unhap([self.params_intens, + self.params_mixture], + self.params_solver, + solver=self.solver) + + self.param_rho = self.params_mixture[0].detach() + + #################################################### + # Precomputations + #################################################### + + start = time.time() + z_tilde = self.rho * marks_grid + precomputations = compute_constants_unhap(z_tilde, + marks_grid, + events_grid, + self.param_rho, + marked_quantities[1], + self.L) + print('precomput:', time.time() - start) + + #################################################### + # save results + #################################################### + + _, self.param_baseline, self.param_baseline_noise, \ + self.param_alpha, self.param_kernel = init_saving_params( + self.params_intens, self.n_kernel_params, self.n_dim, self.max_iter) + + # If kernel parameters are optimized + for i in range(self.n_kernel_params): + self.param_kernel[i, 0] = self.params_intens[3 + i].detach() + + #################################################### + start = time.time() + for i in range(self.max_iter): + print(f"Fitting model... {i/self.max_iter:6.1%}\r", end='', + flush=True) + + self.opt_intens.zero_grad() + + # Update kernel and grad values + kernel, grad_eta = self.kernel_model.kernel_and_grad( + self.params_intens[3:3 + self.n_kernel_params], + discretization + ) + + #################################################### + # Compute gradients + #################################################### + + # Update baseline, baseline noise and alpha + self.params_intens[0].grad, self.params_intens[1].grad, \ + self.params_intens[2].grad = compute_base_gradients( + precomputations, + self.params_intens, + kernel, + self.delta, + end_time, + self.param_rho, + marked_quantities, + n_ground_events + ) + + # Update kernel + for j in range(self.n_kernel_params): + self.params_intens[3 + j].grad = \ + get_grad_eta_mixture( + precomputations, + self.params_intens[0], + self.params_intens[2], + kernel, + grad_eta[j], + self.delta, + marked_quantities[0], + n_ground_events) + + self.opt_intens.step() + + if i % self.batch_rho == 0: + self.opt_mixture.zero_grad() + + z_tilde_ = marks_grid * self.params_mixture[0].data + self.params_mixture[0].grad = get_grad_rho_mixture( + z_tilde_, + marks_grid, + kernel, + marked_quantities[0], + self.params_intens, + self.delta, + mask_void, + n_ground_events, + marked_quantities) + + self.opt_mixture.step() + + self.params_mixture[0].data = self.params_mixture[0].data.clip(0, 1) + self.params_mixture[0].data[mask_void] = 0. + # round the rho + z_tilde = marks_grid * torch.round(self.params_mixture[0].data) + precomputations = compute_constants_unhap( + z_tilde, marks_grid, events_grid, + self.param_rho, marked_quantities[1], self.L) + + self.param_rho = torch.round(self.params_mixture[0].detach()) + + # Save and clip parameters + self.params_intens[0].data = self.params_intens[0].data.clip(0) * \ + self.baseline_mask + self.params_intens[1].data = self.params_intens[1].data.clip(0) + self.params_intens[2].data = self.params_intens[2].data.clip(0) * \ + self.alpha_mask + + self.param_baseline[i+1] = self.params_intens[0].detach() + self.param_alpha[i+1] = self.params_intens[2].detach() + self.param_baseline_noise[i+1] = self.params_intens[1].detach() + + for j in range(self.n_kernel_params): + self.params_intens[3+j].data = \ + self.params_intens[3+j].data.clip(1e-3) + self.param_kernel[j, i+1] = self.params_intens[3+j].detach() + + print('iterations in ', time.time() - start) + + return self + + def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, savefig=None): """ diff --git a/fadin/utils/compute_constants.py b/fadin/utils/compute_constants.py index 9515d61..121c24f 100644 --- a/fadin/utils/compute_constants.py +++ b/fadin/utils/compute_constants.py @@ -3,6 +3,8 @@ import torch from scipy.linalg import toeplitz +from fadin.utils.utils import convert_float_tensor + @numba.jit(nopython=True, cache=True) def get_zG(events_grid, L): @@ -149,3 +151,107 @@ def compute_constants_fadin(events_grid, L, ztzG_approx=True): ztzG = torch.tensor(ztzG).float() return zG, zN, ztzG + + +def compute_marked_quantities(events_grid, marks_grid, n_dim, + density_hawkes, density_noise): + + if density_hawkes == 'linear': + square_int_hawkes = torch.tensor([4/3 for _ in range(n_dim)]) + marks_grid_hawkes = 2 * marks_grid + elif density_hawkes == 'uniform': + square_int_hawkes = torch.tensor([1. for _ in range(n_dim)]) + marks_grid_hawkes = events_grid + else: + raise NotImplementedError('this density is not implemented \ + must be in linear | uniform ') + + if density_noise == 'reverse_linear': + square_int_noise = torch.tensor([4/3 for _ in range(n_dim)]) + marks_grid_noise = 2 - 2 * marks_grid + elif density_noise == 'uniform': + square_int_noise = torch.tensor([1. for _ in range(n_dim)]) + marks_grid_noise = events_grid + else: + raise NotImplementedError('this density is not implemented \ + must be in reverse_linear | uniform ') + + return square_int_hawkes, marks_grid_hawkes, square_int_noise, marks_grid_noise + + +def get_phi_tilde(z_tilde, L): + """ + marks_grid.shape = n_dim, n_grid + rho.shape = n_dim, n_grid + phi_tilde.shape = n_dim, L + + NB: events grid are considered with the distribution of influence of the mark + """ + phi_tilde = get_zG(z_tilde.detach().numpy(), L) + + return phi_tilde + + +def get_xi_tilde(marks_grid, z_tilde, rho, L): + """ + marks_grid.shape = n_dim, n_grid + z_tilde.shape = n_dim, n_grid + rho.shape = n_dim, n_grid + phi_tilde.shape = n_dim, L + + NB: events grid are considered with the distribution of influence of the mark + """ + vec = (marks_grid ** 2) * rho - z_tilde ** 2 + phi_tilde = get_zG(vec.detach().numpy(), L) + + return phi_tilde + + +def get_phi_tilde_events(z_tilde, events_ground_grid, L, rho, marks_grid_hawkes): + """ + z_tilde.shape = n_dim, n_grid + zLN.shape = n_dim, n_dim, L + """ + n_dim, n_grid = z_tilde.shape + + phi_tilde_events = np.zeros(shape=(n_dim, n_dim, L)) + prod = marks_grid_hawkes * rho + for i in range(n_dim): + # ei = events_ground_grid[i] # Count drived marks + prod_i = prod[i] + for j in range(n_dim): + z_tilde_j = z_tilde[j] + for tau in range(L): + phi_tilde_events[i, j, tau] = ( + (z_tilde_j[:n_grid-tau]) * prod_i[tau:]).sum() + + return phi_tilde_events + + +def get_psi_tilde_approx(z_tilde, L): + """ + marks_grid.shape = n_dim, n_grid + loc_events.shape = list of n_dim of vector of size Number of events + rho.shape = list of n_dim of vector of size Number of events + ztzG.shape = n_dim, n_dim, L, L + """ + psi_tilde = get_ztzG_approx(z_tilde.detach().numpy(), L) + + return psi_tilde + + +def compute_constants_unhap(z_tilde, marks_grid, events_grid, + param_rho, marks_grid_hawkes, L): + """Compute all precomputations terms.""" + phi_tilde = get_phi_tilde(z_tilde, L) + phi_tilde_events = get_phi_tilde_events( + z_tilde, events_grid, L, param_rho, marks_grid_hawkes) + psi_tilde = get_psi_tilde_approx(z_tilde, L) + xi_tilde = get_xi_tilde(marks_grid, z_tilde, param_rho, L) + + phi_tilde = convert_float_tensor(phi_tilde) + phi_tilde_events = convert_float_tensor(phi_tilde_events) + psi_tilde = convert_float_tensor(psi_tilde) + xi_tilde = convert_float_tensor(xi_tilde) + + return phi_tilde, phi_tilde_events, psi_tilde, xi_tilde diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index 8afd4ec..694488b 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -3,6 +3,10 @@ import matplotlib.pyplot as plt +def convert_float_tensor(x): + return torch.tensor(x).float() + + def kernel_normalization(kernel_values, time_values, delta, lower=0, upper=1): """Normalize the given kernel on the given discrete grid. """ @@ -55,6 +59,86 @@ def init_kernel_parameters(kernel, kernel_length, n_dim): return kernel_params_init +def smart_init_kernel(solver, events, n_ground_events): + """Smart initialization of kernel parameters. + """ + kernel_params_init = [torch.ones(solver.n_dim, solver.n_dim), + torch.ones(solver.n_dim, solver.n_dim)] + for i in range(solver.n_dim): + for j in range(solver.n_dim): + # Mean, std of time delta of [i, j] kernel + delta_t = torch.zeros(int(n_ground_events[i].item())) + for n in range(int(n_ground_events[i])): + t_n_i = events[i][n][0] + t_n_j = torch.max( + torch.where(torch.tensor(events[j][:, 0]) < t_n_i, + torch.tensor(events[j][:, 0]), + 0.) + ) + delta_t[n] = t_n_i - t_n_j + avg = torch.mean(delta_t) + std = torch.std(delta_t) + + # Parameters of [i, j] kernel + if solver.kernel == 'truncated_gaussian': + kernel_params_init[0][i, j] = avg + if solver.kernel == 'raised_cosine': + u = max(avg - std, 0) + kernel_params_init[0][i, j] = u + kernel_params_init[1][i, j] = std + return kernel_params_init + + +def smart_init(solver, events, n_ground_events, end_time): + """Smart initialization of baseline and alpha. + $\\mu_i^s = \frac{\\#\\mathscr{F}^i_T}{(D+1)T} \forall i \\in [1, D]$ + """ + assert solver.kernel in ['truncated_gaussian', 'raised_cosine'], ( + f"Smart initialization not implemented for kernel {solver.kernel}" + ) + # Baseline init + + baseline = n_ground_events / (end_time * (solver.n_dim + 1)) + baseline = (baseline * solver.baseline_mask).requires_grad_(True) + + # Alpha init + alpha = torch.ones(solver.n_dim, solver.n_dim) / (solver.n_dim + 1) + alpha = (alpha * solver.alpha_mask).requires_grad_(True) + + # Kernel parameters init + if solver.optimize_kernel: + kernel_params_init = smart_init_kernel(solver, events, n_ground_events) + + return baseline, alpha, kernel_params_init + + +def smart_init_noise(solver, events, n_ground_events, end_time): + """Smart initialization of baseline and alpha, with an extra baseline term for + noisy events. + $\\mu_i^{s, noise} = \\frac{\\#\\mathscr{F}^i_T}{(D+2)T} \\forall i \\in [1, D]$ + $\\mu_i^s = \frac{\\#\\mathscr{F}^i_T}{(D+2)T} \forall i \\in [1, D]$ + $\\alpha_{i, j}^s = \\frac{1}{D+2} \\forall i,j \\in [1, D] + """ + assert solver.kernel in ['truncated_gaussian', 'raised_cosine'], ( + f"Smart initialization not implemented for kernel {solver.kernel}" + ) + # Baseline init + baseline = n_ground_events / (end_time * (solver.n_dim + 2)) + baseline = (baseline * solver.baseline_mask).requires_grad_(True) + + bl_noise = n_ground_events / (end_time * (solver.n_dim + 2)) + bl_noise = (bl_noise * solver.baseline_mask).requires_grad_(True) + + # Alpha init + alpha = torch.ones(solver.n_dim, solver.n_dim) / (solver.n_dim + 2) + alpha = (alpha * solver.alpha_mask).requires_grad_(True) + + # Kernel parameters init + kernel_params_init = smart_init_kernel(solver, events, n_ground_events) + + return baseline, bl_noise, alpha, kernel_params_init + + def grad_kernel_callable(kernel, grad_kernel, kernel_params, time_values, L, lower, upper, n_dim): """Transform the callables ``kernel and ``grad_kernel`` into @@ -103,7 +187,33 @@ def projected_grid(events, grid_step, n_grid): return events_grid -def optimizer(param, params_optim, solver='RMSprop'): +def smooth_projection_marked(marked_events, delta, n_grid): + """Project events on the grid and remove duplica in both events grid and + the original events lists. Return also the marks on the grid.""" + n_dim = len(marked_events) + events_grid = torch.zeros((n_dim, n_grid)) + marks_grid = torch.zeros((n_dim, n_grid)) + marked_events_smooth = [None] * n_dim + id_unique = [] + for i in range(n_dim): + ev_i = marked_events[i][:, 0] + marks_i = marked_events[i][:, 1] + idx = np.round(ev_i / delta).astype(np.int64) + events_grid[i, idx] += 1 + + z = np.round(ev_i / delta) * delta + ev_unique, index_unique = np.unique(z, return_index=True) + marked_events_smooth[i] = np.concatenate( + (ev_unique[:, None], marks_i[index_unique][:, None]), axis=1) + + # idx_grid = #np.round(ev_unique * L) + marks_grid[i, idx[index_unique]] += torch.tensor(marks_i[index_unique]) + id_unique.append(index_unique) + # marks_grid = projected_grid_marked(marked_events_smooth, delta, n_grid) + return events_grid, marks_grid, marked_events_smooth, id_unique + + +def optimizer_fadin(param, params_optim, solver='RMSprop'): """Set the Pytorch optimizer. Parameters @@ -129,6 +239,35 @@ def optimizer(param, params_optim, solver='RMSprop'): "solver must be 'GD', 'RMSProp', 'Adam'," f"got '{solver}'") +def optimizer_unhap(param, params_optim, solver='RMSprop'): + """Set the Pytorch optimizer. + + Parameters + ---------- + param : XXX + lr : float + learning rate + solver : str + solver name, possible values are 'GD', 'RMSProp', 'Adam' + or 'CG' + Returns + ------- + XXX + """ + if solver == 'GD': + return torch.optim.SGD(param[0], **params_optim), \ + torch.optim.SGD(param[1], **params_optim) + elif solver == 'RMSprop': + return torch.optim.RMSprop(param[0], **params_optim), \ + torch.optim.RMSprop(param[1], **params_optim) + elif solver == 'Adam': + return torch.optim.Adam(param[0], **params_optim), \ + torch.optim.Adam(param[1], **params_optim) + else: + raise NotImplementedError( + "solver must be 'GD', 'RMSProp', 'Adam'," f"got '{solver}'") + + def l2_error(x, a): return torch.sqrt(((x - a)**2).sum()).item() @@ -259,3 +398,19 @@ def momentmatching_nomark(solver, events, n_ground_events, end_time, solver, events, n_ground_events, mode=mode ) return baseline, alpha, kernel_params_init + + +def init_saving_params(params_intens, n_kernel_params, n_dim, max_iter): + + v_loss = torch.zeros(max_iter) + + param_baseline = torch.zeros(max_iter + 1, n_dim) + param_baseline_noise = torch.zeros(max_iter + 1, n_dim) + param_alpha = torch.zeros(max_iter + 1, n_dim, n_dim) + param_kernel = torch.zeros(n_kernel_params, max_iter + 1, n_dim, n_dim) + + param_baseline[0] = params_intens[0].detach() + param_baseline_noise[0] = params_intens[1].detach() + param_alpha[0] = params_intens[2].detach() + + return v_loss, param_baseline, param_baseline_noise, param_alpha, param_kernel diff --git a/fadin/utils/utils_simu.py b/fadin/utils/utils_simu.py index 25dd64b..83ae5d6 100644 --- a/fadin/utils/utils_simu.py +++ b/fadin/utils/utils_simu.py @@ -405,86 +405,163 @@ def simu_hawkes_thinning(end_time, baseline, alpha, kernel, return events -# def simu_hawkes(end_time, baseline, alpha, kernel, upper_bound=None, -# random_state=None): -# """ Simulate a Hawkes process following an immigration-birth procedure. -# Edge effects may be reduced according to the second references below. +def simu_marked_hawkes_cluster(end_time, baseline, alpha, time_kernel, marks_kernel, + marks_density, params_time_kernel=dict(), + params_marks_kernel=dict(), params_marks_density=dict(), + time_kernel_length=None, marks_kernel_length=None, + random_state=None): + """ Simulate a multivariate marked Hawkes process following + an immigration-birth procedure. -# References: + Parameters + ---------- + end_time : int | float + Duration of the Poisson process. -# Møller, J., & Rasmussen, J. G. (2006). Approximate simulation of Hawkes processes. -# Methodology and Computing in Applied Probability, 8, 53-64. + baseline : array of float of size (n_dim,) + Baseline parameter of the Hawkes process. -# Møller, J., & Rasmussen, J. G. (2005). Perfect simulation of Hawkes processes. -# Advances in applied probability, 37(3), 629-646. + alpha : array of float of size (n_dim, n_dim) + Weight parameter associated to the kernel function. -# Parameters -# ---------- -# end_time : int | float -# Duration of the Poisson process. + time_kernel: str or callable + The choice of the time kernel for the simulation. + String kernel available are probability distribution from scipy.stats module. + A custom kernel can be implemented with the form time_kernel(x, **params). -# baseline : float -# Baseline parameter of the Hawkes process. + marks_density: str + The choice of the kernel for the simulation. + String kernel available are probability distribution from scipy.stats module. -# alpha : float -# Weight parameter associated to the kernel function. + params_time_kernel: dict + Parameters of the kernel used to simulate the process. + It must follow parameters associated to scipy.stats distributions or the custom + given callable. -# kernel: str -# The choice of the kernel for the simulation. -# Kernel available are probability distribution from scipy.stats module. + params_marks_density: dict + Parameters of the kernel used to simulate the process. + It must follow parameters associated to scipy.stats distributions or the custom + given callable. -# upper_bound : int, float or None, default=None -# Upper bound of the baseline function. If None, -# the maximum of the baseline is taken onto a finite discrete grid. + kernel_length: float or None, default=None + If the custom kernel has finite support, fixe the limit of the support. + The support need to be high enough such that probability mass between + zero and kernel_length is strictly higher than zero. -# random_state : int, RandomState instance or None, default=None -# Set the numpy seed to 'random_state'. + random_state : int, RandomState instance or None, default=None + Set the numpy seed to 'random_state'. -# Returns -# ------- -# events : array -# The timestamps of the point process' events. -# """ -# if random_state is None: -# np.random.seed(0) -# else: -# np.random.seed(random_state) + Returns + ------- + events : list of array-like with shape (n_events, 2) + The timestamps and the marks of the point process' events. + Timestamps are first coordinate. + """ -# # Simulate events from baseline -# immigrants = simu_poisson(end_time, intensity=baseline, upper_bound=upper_bound) + if random_state is None: + np.random.seed(np.random.randint(0, 100)) + else: + np.random.seed(random_state) -# # initialize -# gen = dict(gen0=immigrants) -# ev = [immigrants] -# sample_from_kernel = getattr(scipy.stats, kernel) # dic = [4, 3] + n_dim = baseline.shape[0] + immigrants = simu_multi_poisson(end_time, baseline, + upper_bound=None, + random_state=random_state) + + immigrants_marks = [custom_density( + marks_density, params_marks_density, size=immigrants[i].shape[0], + kernel_length=marks_kernel_length) for i in range(n_dim)] + + gen_time = dict(gen0=immigrants) + gen_mark = dict(gen0=immigrants_marks) + gen_labels = dict() + + events = [] + labels = [0] * n_dim + for i in range(n_dim): + temp = np.vstack([immigrants[i], immigrants_marks[i]]).T + events.append(temp) -# # generate offsprings -# it = 0 -# while len(gen[f'gen{it}']): -# Ci = gen[f'gen{it}'] + it = 0 + while len(gen_time[f'gen{it}']): + print(f"Simulate generation {it}\r") + time_ev_k = gen_time[f'gen{it}'] + marks_k = gen_mark[f'gen{it}'] + induced_ev = [[0] * n_dim for _ in range(n_dim)] + time_ev = [[0] * n_dim for _ in range(n_dim)] + sim_ev = [] + sim_marks = [] + sim_labels = [] + s = 0 + for i in range(n_dim): + sim_ev_i = [] + sim_marks_i = [] + sim_labels_i = [] + for j in range(n_dim): + induced_ev[i][j] = np.random.poisson( + lam=alpha[i, j]*marks_k[j], size=len(time_ev_k[j])) -# # number of descendents of an event follow a Poisson law of parameter alpha -# # the expected number of event induced by an event is then alpha + no_exitations = np.where(induced_ev[i][j] == 0)[0] + gen_labels_ij = np.ones(len(time_ev_k[j])) + if it == 0: + gen_labels_ij[no_exitations] = 0. + else: + gen_labels_ij[no_exitations] = 2. -# Di = np.random.poisson(lam=alpha, size=len(Ci)) + nij = induced_ev[i][j].sum() -# # sample interval range according to the given pdf (kernel of the parameter) -# Ci = np.repeat(Ci, repeats=Di) -# n = Di.sum() -# Ei = sample_from_kernel.rvs(size=n) # * dic -# Fi = Ci + Ei + time_ev[i][j] = np.repeat(time_ev_k[j], repeats=induced_ev[i][j]) + inter_arrival_ij = custom_density( + time_kernel, params_time_kernel, size=nij, + kernel_length=time_kernel_length) -# if len(Fi) > 0: -# gen[f'gen{it+1}'] = np.hstack(Fi) -# ev.append(np.hstack(Fi)) -# else: -# break -# it += 1 + sim_ev_ij = time_ev[i][j] + inter_arrival_ij + sim_ev_i.append(sim_ev_ij) + # sim_marks_ij = mark_distribution.rvs(**params_marks_density, size=nij) -# # Add immigrants and sort -# events = np.hstack(ev) -# valid_events = events < end_time -# events = np.sort(events[valid_events]) + # custom distrib on mark density + sim_marks_ij = custom_density( + marks_density, params_marks_density, size=nij, + kernel_length=marks_kernel_length) + + # sim_marks_ij = marksij / marksij.max() + + assert (sim_marks_ij > 1).sum() == 0 + sim_marks_i.append(sim_marks_ij) + sim_labels_i.append(gen_labels_ij) + + s += sim_ev_ij.shape[0] + sim_ev.append(np.hstack(sim_ev_i)) + sim_marks.append(np.hstack(sim_marks_i)) + sim_labels.append(np.hstack(sim_labels_i)) + + if s > 0: + gen_time[f'gen{it+1}'] = sim_ev + gen_mark[f'gen{it+1}'] = sim_marks + gen_labels[f'gen{it}'] = sim_labels + for i in range(n_dim): + temp = np.vstack([sim_ev[i], sim_marks[i]]).T + events[i] = np.concatenate((events[i], temp)) + if it > 0: + labels[i] = np.concatenate((labels[i], sim_labels[i])) + else: + labels[i] = sim_labels[i] + else: + for i in range(n_dim): + if it > 0: + labels[i] = np.concatenate((labels[i], sim_labels[i])) + else: + labels[i] = sim_labels[i] + valid_events = events[i][:, 0] < end_time + temp_ev = events[i][valid_events] + temp_lab = labels[i][valid_events] + + sorting = np.argsort(temp_ev[:, 0]) + events[i] = temp_ev[sorting] + labels[i] = temp_lab[sorting] + break + + it += 1 -# return events + return events, labels From 46dfc78f6014b0e28c3e4b2bd00a20a4c6a14565 Mon Sep 17 00:00:00 2001 From: GuillaumeStaermanML Date: Mon, 29 Jul 2024 16:14:50 +0200 Subject: [PATCH 25/25] doc --- examples/plot_unhap.py | 108 +++++++++++++++++++++++++++++++++++++++ fadin/solver.py | 19 +------ fadin/utils/functions.py | 31 +++++++++++ 3 files changed, 140 insertions(+), 18 deletions(-) create mode 100644 examples/plot_unhap.py create mode 100644 fadin/utils/functions.py diff --git a/examples/plot_unhap.py b/examples/plot_unhap.py new file mode 100644 index 0000000..e030602 --- /dev/null +++ b/examples/plot_unhap.py @@ -0,0 +1,108 @@ +# %% import stuff +import numpy as np + +from fadin.utils.utils_simu import simu_marked_hawkes_cluster, custom_density, \ + simu_multi_poisson +from fadin.solver import UNHaP +from fadin.utils.functions import identity, linear_zero_one, reverse_linear_zero_one, \ + truncated_gaussian + + +# %% Fixing the parameter of the simulation setting + +baseline = np.array([.3]) +baseline_noise = np.array([.05]) +alpha = np.array([[1.45]]) +mu = np.array([[0.5]]) +sigma = np.array([[0.1]]) + +delta = 0.01 +end_time = 10000 +seed = 0 +max_iter = 20000 +batch_rho = 200 + +# %% Create the simulating function + + +def simulate_data(baseline, baseline_noise, alpha, end_time, seed=0): + n_dim = len(baseline) + + marks_kernel = identity + marks_density = linear_zero_one + time_kernel = truncated_gaussian + + params_marks_density = dict() + # params_marks_density = dict(scale=1) + params_marks_kernel = dict(slope=1.2) + params_time_kernel = dict(mu=mu, sigma=sigma) + + marked_events, _ = simu_marked_hawkes_cluster( + end_time, baseline, alpha, time_kernel, marks_kernel, marks_density, + params_marks_kernel=params_marks_kernel, + params_marks_density=params_marks_density, + time_kernel_length=None, marks_kernel_length=None, + params_time_kernel=params_time_kernel, random_state=seed) + + noisy_events_ = simu_multi_poisson(end_time, [baseline_noise]) + + # random_marks = [ + # np.random.rand(noisy_events_[i].shape[0]) for i in range(n_dim)] + noisy_marks = [custom_density( + reverse_linear_zero_one, dict(), size=noisy_events_[i].shape[0], + kernel_length=1.) for i in range(n_dim)] + noisy_events = [ + np.concatenate((noisy_events_[i].reshape(-1, 1), + noisy_marks[i].reshape(-1, 1)), axis=1) for i in range(n_dim)] + + events = [ + np.concatenate( + (noisy_events[i], marked_events[i]), axis=0) for i in range(n_dim)] + + events_cat = [events[i][events[i][:, 0].argsort()] for i in range(n_dim)] + + labels = [np.zeros(marked_events[i].shape[0] + + noisy_events_[i].shape[0]) for i in range(n_dim)] + labels[0][-marked_events[0].shape[0]:] = 1. + true_rho = [labels[i][events[i][:, 0].argsort()] for i in range(n_dim)] + # put the mark to one to test the impact of the marks + # events_cat[0][:, 1] = 1. + + return events_cat, noisy_marks, true_rho + + +ev, noisy_marks, true_rho = simulate_data(baseline, baseline_noise.item(), + alpha, end_time, seed=0) +# %% Apply UNHAP + + +solver = UNHaP(n_dim=1, + kernel="truncated_gaussian", + kernel_length=1., + delta=delta, optim="RMSprop", + params_optim={'lr': 1e-3}, + max_iter=max_iter, + batch_rho=batch_rho, + density_hawkes='linear', + density_noise='reverse_linear', + moment_matching=True + ) +solver.fit(ev, end_time) + +# %% Print estimated parameters + +print('Estimated baseline is: ', solver.param_baseline[-10:].mean().item()) +print('Estimated alpha is: ', solver.param_alpha[-10:].mean().item()) +print('Estimated kernel mean is: ', (solver.param_kernel[0][-10:].mean().item())) +print('Estimated kernel sd is: ', solver.param_kernel[1][-10:].mean().item()) + +# error on params +error_baseline = (solver.param_baseline[-10:].mean().item() - baseline.item()) ** 2 +error_alpha = (solver.param_alpha[-10:].mean().item() - alpha.item()) ** 2 +error_mu = (solver.param_kernel[0][-10:].mean().item() - 0.5) ** 2 +error_sigma = (solver.param_kernel[1][-10:].mean().item() - 0.1) ** 2 +sum_error = error_baseline + error_alpha + error_mu + error_sigma +error_params = np.sqrt(sum_error) + +print('L2 square errors of the vector of parameters is:', error_params) +# %% diff --git a/fadin/solver.py b/fadin/solver.py index a98fff7..2036296 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -348,24 +348,7 @@ class UNHaP(object): """Define the UNHaP framework for estimated mixture of Hawkes and Poisson processes. - MarkedFaDIn minimizes the discretized L2 loss of Hawkes processes - defined by the intensity as the sum of a baseline parameter - mhath: `\\mu_i` and a convolution between the kernel - :math:`\\phi_{ij}` and the sum of Dirac functions weighed by their marks - :math:`z_i := \\sum_{t^i_n \\in \\mathscr{F}^i_T} \\delta_{t^i_n}m(t^i_n)` - located at the event occurrences :math:`t^i_n`: - - .. math:: - \\forall i \\in [1 \\dots p], \\quad - \\lambda_i(t) = \\mu_i + \\sum_{j=1}^p \\phi_{ij} * z_j(t), - \\quad t \\in [0, T] - - where - - * :math:`p` is the dimension of the process - * :math:`\\mu_i` are the baseline intensities - * :math:`\\phi_{ij}` are the kernels - * :math:`z_j(t)` are the marked vector on the discretized grid. + Unhap minimizes the discretized L2 mixture loss of Hawkes and Poisson processes. Parameters ---------- diff --git a/fadin/utils/functions.py b/fadin/utils/functions.py new file mode 100644 index 0000000..02ae95e --- /dev/null +++ b/fadin/utils/functions.py @@ -0,0 +1,31 @@ +import torch + +from fadin.kernels import DiscreteKernelFiniteSupport + + +def identity(x, **param): + return x + + +def linear_zero_one(x, **params): + temp = 2 * x + mask = x > 1 + temp[mask] = 0. + return temp + + +def reverse_linear_zero_one(x, **params): + temp = 2 - 2 * x + mask = x > 1 + temp[mask] = 0. + return temp + + +def truncated_gaussian(x, **params): + rc = DiscreteKernelFiniteSupport(delta=0.01, n_dim=1, kernel='truncated_gaussian') + mu = params['mu'] + sigma = params['sigma'] + kernel_values = rc.kernel_eval( + [torch.Tensor(mu), torch.Tensor(sigma)], torch.tensor(x)) + + return kernel_values.double().numpy()