diff --git a/bask/bayesgpr.py b/bask/bayesgpr.py index fcf4633..44c6cd5 100644 --- a/bask/bayesgpr.py +++ b/bask/bayesgpr.py @@ -166,6 +166,17 @@ def __init__( @property def theta(self): + """The current geometric median of the kernel hyperparameter distribution. + + The returned values are located in log space. Call `BayesGPR.kernel_` to obtain + the values their original space. + + Returns + ------- + ndarray + Array containing the kernel hyperparameters in log space. + + """ if self.kernel_ is not None: with np.errstate(divide="ignore"): return np.copy(self.kernel_.theta) @@ -192,6 +203,11 @@ def theta(self, theta): @contextmanager def noise_set_to_zero(self): + """Context manager in which the noise of the Gaussian process is 0. + + This is useful when you want to predict the epistemic uncertainty of the + Gaussian process without the noise. + """ current_theta = self.theta try: # Now we set the noise to 0, but do NOT recalculate the alphas!: @@ -228,7 +244,51 @@ def sample( add=False, **kwargs ): - """ Sample from the posterior distribution of the hyper-parameters.""" + """Sample from the posterior distribution of the hyper-parameters. + + Parameters + ---------- + X : ndarray, shape (n_points, n_dims), optional (default: None) + Points at which the function is evaluated. If None, it will use the saved + datapoints. + y : ndarray, shape (n_points,), optional (default: None) + Value(s) of the function at `X`. If None, it will use the saved values. + noise_vector : + Variance(s) of the function at `X`. If None, no additional noise is applied. + n_threads : int, optional (default: 1) + Number of threads to use during inference. + This is currently not implemented. + n_desired_samples : int, optional (default: 100) + Number of hyperposterior samples to collect during inference. Must be a + multiple of `n_walkers_per_thread`. + n_burnin : int, optional (default: 0) + Number of iterations to discard before collecting hyperposterior samples. + Needs to be increased only, if the hyperposterior samples have not reached + their typical set yet. Higher values increase the running time. + n_thin : int, optional (default: 1) + Only collect hyperposterior samples every k-th iteration. This can help + reducing the autocorrelation of the collected samples, but reduces the + total number of samples. + n_walkers_per_thread : int, optional (default: 100) + Number of MCMC ensemble walkers to employ during inference. + progress : bool, optional (default: False) + If True, show a progress bar during inference. + priors : list or callable, optional (default: None) + Log prior(s) for the kernel hyperparameters. Remember that the kernel + hyperparameters are transformed into log space. Thus your priors need to + perform the necessary change-of-variables. + position : ndarray, shape (n_walkers, n_kernel_dims), optional (default: None) + Starting position of the Markov chain. If None, it will use the current + position. If this is None as well, it will try to initialize in a small + ball. + add : bool, optional (default: False) + If True, all collected hyperposterior samples will be added to the existing + samples in `BayesGPR.chain_`. Otherwise they will be replaced. + kwargs : dict + Additional keyword arguments for emcee.EnsembleSampler + + """ + def log_prob_fn(x, gp=self): lp = 0 @@ -326,6 +386,43 @@ def fit( position=None, **kwargs ): + """Fit the Gaussian process model to the given training data. + + Parameters + ---------- + X : ndarray, shape (n_points, n_dims) + Points at which the function is evaluated. If None, it will use the saved + datapoints. + y : ndarray, shape (n_points,) + Value(s) of the function at `X`. If None, it will use the saved values. + noise_vector : + Variance(s) of the function at `X`. If None, no additional noise is applied. + n_threads : int, optional (default: 1) + Number of threads to use during inference. + This is currently not implemented. + n_desired_samples : int, optional (default: 100) + Number of hyperposterior samples to collect during inference. Must be a + multiple of `n_walkers_per_thread`. + n_burnin : int, optional (default: 0) + Number of iterations to discard before collecting hyperposterior samples. + Needs to be increased only, if the hyperposterior samples have not reached + their typical set yet. Higher values increase the running time. + n_walkers_per_thread : int, optional (default: 100) + Number of MCMC ensemble walkers to employ during inference. + progress : bool, optional (default: False) + If True, show a progress bar during inference. + priors : list or callable, optional (default: None) + Log prior(s) for the kernel hyperparameters. Remember that the kernel + hyperparameters are transformed into log space. Thus your priors need to + perform the necessary change-of-variables. + position : ndarray, shape (n_walkers, n_kernel_dims), optional (default: None) + Starting position of the Markov chain. If None, it will use the current + position. If this is None as well, it will try to initialize in a small + ball. + kwargs : dict + Additional keyword arguments for BayesGPR.sample + + """ self.kernel = self._kernel self._apply_noise_vector(len(y), noise_vector) super().fit(X, y) @@ -343,6 +440,30 @@ def fit( ) def sample_y(self, X, sample_mean=False, noise=False, n_samples=1, random_state=0): + """Sample function realizations of the Gaussian process. + + Parameters + ---------- + X : ndarray, shape (n_points, n_dims) + Points at which to evaluate the functions. + sample_mean : bool, optional (default: False) + If True, the geometric median of the hyperposterior samples is used as the + Gaussian process to sample from. If False, a new set of hyperposterior + is used for each new sample. + noise : bool, optional (default: False) + If True, Gaussian noise is added to the samples. + n_samples : int, optional (default: 1) + Number of samples to draw from the Gaussian process(es). + random_state : int or RandomState or None, optional, default=None + Pseudo random number generator state used for random uniform sampling + from lists of possible values instead of scipy.stats distributions. + + Returns + ------- + result : ndarray, shape (n_points, n_samples) + Samples from the Gaussian process(es) + + """ rng = check_random_state(random_state) if sample_mean: if noise: diff --git a/bask/optimizer.py b/bask/optimizer.py index 3569e37..ea5befe 100644 --- a/bask/optimizer.py +++ b/bask/optimizer.py @@ -168,6 +168,29 @@ def __init__( self._next_x = None def ask(self, n_points=1): + """Ask the optimizer for the next point to evaluate. + + If the optimizer is still in its initialization phase, it will return a point + as specified by the init_strategy. + If the Gaussian process has been fit, a previously computed point as + + Parameters + ---------- + n_points : int + Number of points to return. This is currently not implemented and will raise + a NotImplementedError. + + Returns + ------- + list + A list with the same dimensionality as the optimization space. + + Raises + ------ + NotImplementedError + If n_points is != 1, which is not implemented yet. + + """ if n_points > 1: raise NotImplementedError( "Returning multiple points is not implemented yet." @@ -197,12 +220,54 @@ def tell( gp_burnin=10, progress=False, ): - # if y isn't a scalar it means we have been handed a batch of points + """Inform the optimizer about the objective function at discrete points. - # TODO (noise vector): - # 1. Replace case should be easy - # 2. Add case should add noise values to list - # -> What if noise_vector is None? (have to set noise to 0) + Provide values of the objective function at points suggested by `ask()` or other + points. By default a new model will be fit to all observations. + The new model is used to suggest the next point at which to evaluate the + objective. This point can be retrieved by calling `ask()`. + To add observations without fitting a new model set `fit` to False. + To add multiple observations in a batch pass a list-of-lists for `x` + and a list of scalars for `y`. + + Parameters + ---------- + x : list or list of lists + Point(s) at which the objective function was evaluated. + y : scalar or list + Value(s) of the objective function at `x`. + noise_vector : list, default=None + Variance(s) of the objective function at `x`. + fit : bool, optional (default: True) + If True, a model will be fitted to the points, if `n_initial_points` points + have been evaluated. + replace : bool, optional (default: False) + If True, the existing data points will be replaced with the one given in + `x` and `y`. + n_samples : int, optional (default: 0) + Number of hyperposterior samples over which to average the acquisition + function. More samples make the acquisition function more robust, but + increase the running time. + Can be set to 0 for `pvrs` and `vr`. + gp_samples : int, optional (default: 100) + Number of hyperposterior samples to collect during inference. More samples + result in a more accurate representation of the hyperposterior, but + increase the running time. + Has to be a multiple of 100. + gp_burnin : int, optional (default: 10) + Number of inference iterations to discard before beginning collecting + hyperposterior samples. Only needs to be increased, if the hyperposterior + after burnin has not settled on the typical set. Drastically increases + running time. + progress : bool, optional (default: False) + If True, show a progress bar during the inference phase. + + Returns + ------- + scipy.optimize.OptimizeResult object + Contains the points, the values of the objective function, the search space, + the random state and the list of models. + """ if replace: self.Xi = [] self.yi = [] @@ -288,6 +353,30 @@ def tell( return create_result(self.Xi, self.yi, self.space, self.rng, models=[self.gp]) def run(self, func, n_iter=1, n_samples=5, gp_burnin=10): + """Execute the ask/tell-loop on a given objective function. + + Parameters + ---------- + func : function + The objective function to minimize. + n_iter : int, optional (default: 1) + Number of iterations to perform. + n_samples : int, optional (default: 5) + Number of hyperposterior samples over which to average the acquisition + function. + gp_burnin : int, optional (default: 10) + Number of inference iterations to discard before beginning collecting + hyperposterior samples. Only needs to be increased, if the hyperposterior + after burnin has not settled on the typical set. Drastically increases + running time. + + Returns + ------- + scipy.optimize.OptimizeResult object + Contains the points, the values of the objective function, the search space, + the random state and the list of models. + + """ for _ in range(n_iter): x = self.ask() self.tell(x, func(x), n_samples=n_samples, gp_burnin=gp_burnin)