Write API documentation for sphinx (#44)

* Set up sphinx for autodoc and autosummary * Docstring for Optimizer * Document r2_sequence * Document evaluate_acquisitions * Document acquisition functions
kiudee · Mar 12, 2020 · fc97b76 · fc97b76
1 parent 275b13e
commit fc97b76
Show file tree

Hide file tree

Showing 9 changed files with 338 additions and 10 deletions.
diff --git a/bask/acquisition.py b/bask/acquisition.py
@@ -55,6 +55,41 @@ def evaluate_acquisitions(
     random_state=None,
     **kwargs
 ):
+    """Run a set of acquisitions functions on a given set of points.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n, d), float
+        Set of points for which to evaluate the acquisition functions
+    gpr : BayesGPR object
+        Gaussian process for which the posterior distribution of the kernel
+        hyperparameters is available.
+    acquisition_functions : list of Acquisition objects
+        List of aquisition functions to evaluate.
+        They each should inherit from one of these:
+            - :class:`FullGPAcquisition`
+            - :class:`UncertaintyAcquisition`
+            - :class:`SampleAcquisition`
+    n_samples : int, default=10
+        Number of posterior samples to draw from the GP. The acquisition
+        functions will be evaluated for each of the sampled kernels.
+        Exceptions are Acquisition functions inheriting from
+        :class:`FullGPAcquisition`.
+    progress : bool, default=False
+        Show a progress bar
+    random_state : int or RandomState or None, optional, default=None
+        Pseudo random number generator state used for random uniform sampling
+        from lists of possible values instead of scipy.stats distributions.
+    kwargs : dict
+        Any additional keyword arguments are passed on to each acquisition
+        function.
+
+    Returns
+    -------
+    acq_output : float ndarray, shape (len(acquisition_functions), len(X))
+        The acquisition functions evaluated on all of the input points.
+
+    """
     n_cand_points = len(X)
     n_acqs = len(acquisition_functions)
     acq_output = np.zeros((n_acqs, n_cand_points))
@@ -97,6 +132,16 @@ def _ei_f(x):
 
 
 class ExpectedImprovement(UncertaintyAcquisition):
+    """Select the point maximizing the expected improvement over the current
+    optimum.
+
+    Parameters
+    ----------
+    y_opt : float, default=None
+        The value of the current optimum. If it is None, it will use the
+        minimum y value of the evaluated points.
+    """
+
     def __call__(self, mu, std, *args, y_opt=None, **kwargs):
         if y_opt is None:
             y_opt = mu.min()
@@ -108,6 +153,16 @@ def __call__(self, mu, std, *args, y_opt=None, **kwargs):
 
 
 class TopTwoEI(ExpectedImprovement):
+    """Select the point with the highest expected improvement over the
+    point with the maximum expected improvement overall.
+
+    Parameters
+    ----------
+    y_opt : float, default=None
+        The value of the current optimum. If it is None, it will use the
+        minimum y value of the evaluated points.
+    """
+
     def __call__(self, mu, std, *args, y_opt=None, **kwargs):
         ei = super().__call__(mu, std, *args, y_opt=y_opt, **kwargs)
         values = np.zeros_like(mu)
@@ -120,18 +175,42 @@ def __call__(self, mu, std, *args, y_opt=None, **kwargs):
 
 
 class Expectation(UncertaintyAcquisition):
+    """Select the point with the lowest estimated mean."""
+
     def __call__(self, mu, std, *args, **kwargs):
         return -mu
 
 
 class LCB(UncertaintyAcquisition):
-    def __call__(self, mu, std, *args, alpha=1.86, **kwargs):
+    """Select the point with the lowest lower confidence bound.
+
+    Parameters
+    ----------
+    alpha : positive float, alpha=1.96
+        Number of standard errors to substract from the mean estimate.
+    """
+
+    def __call__(self, mu, std, *args, alpha=1.96, **kwargs):
         if alpha == "inf":
             return std
         return alpha * std - mu
 
 
 class MaxValueSearch(UncertaintyAcquisition):
+    """Select points based on their mutual information with the optimum value.
+
+    Parameters
+    ----------
+    n_min_samples : int, default=1000
+        Number of samples for the optimum distribution
+
+    References
+    ----------
+    [1] Wang, Z. & Jegelka, S.. (2017). Max-value Entropy Search for Efficient
+        Bayesian Optimization. Proceedings of the 34th International Conference
+        on Machine Learning, in PMLR 70:3627-3635
+    """
+
     def __call__(self, mu, std, *args, n_min_samples=1000, **kwargs):
         def probf(x):
             return np.exp(np.sum(st.norm.logcdf(-(x - mu) / std), axis=0))
@@ -166,12 +245,19 @@ def probf(x):
 
 
 class ThompsonSampling(SampleAcquisition):
+    """Sample a random function from the GP and select its optimum."""
+
     def __call__(self, gp_sample, *args, **kwargs):
         return -gp_sample
 
 
 class VarianceReduction(FullGPAcquisition):
-    """ A criterion which tries to find the region where it can reduce the variance the most."""
+    """A criterion which tries to find the region where it can reduce the
+    global variance the most.
+
+    This criterion is suitable for active learning, where the goal is to
+    uniformly estimate the target function and not only its optimum.
+    """
 
     def __call__(self, X, gp, *args, **kwargs):
         n = len(X)
@@ -192,13 +278,14 @@ def __call__(self, X, gp, *args, **kwargs):
 class PVRS(FullGPAcquisition):
     """Implements the predictive variance reduction search algorithm.
 
-    The algorithm draws a set of Thompson samples (samples from the optimum distribution) and proposes the point which
-    reduces the predictive variance of these samples the most.
+    The algorithm draws a set of Thompson samples (samples from the optimum
+    distribution) and proposes the point which reduces the predictive variance
+    of these samples the most.
 
     References
     ----------
-    [1] Nguyen, Vu, et al. "Predictive variance reduction search." Workshop on Bayesian optimization at neural
-        information processing systems (NIPSW). 2017.
+    [1] Nguyen, Vu, et al. "Predictive variance reduction search." Workshop on
+    Bayesian optimization at neural information processing systems (NIPSW). 2017.
     """
 
     def __call__(self, X, gp, *args, n_thompson=10, random_state=None, **kwargs):

diff --git a/bask/optimizer.py b/bask/optimizer.py
@@ -23,6 +23,79 @@
 
 
 class Optimizer(object):
+    """Execute a stepwise Bayesian optimization.
+
+    Parameters
+    ----------
+    dimensions : list, shape (n_dims,)
+        List of search space dimensions.
+        Each search dimension can be defined either as
+
+        - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+          dimensions),
+        - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+          dimensions),
+        - as a list of categories (for `Categorical` dimensions), or
+        - an instance of a `Dimension` object (`Real`, `Integer` or
+          `Categorical`).
+    n_points : int, default=500
+        Number of random points to evaluate the acquisition function on.
+    n_initial_points : int, default=10
+        Number of initial points to sample before fitting the GP.
+    init_strategy : string or None, default="r2"
+        Sampling strategy to use for the initial ``n_initial_points``.
+        "r2" computes points using the quasirandom R2 sequence. If the value
+        is None or any other string, uniform random sampling is employed.
+    gp_kernel : kernel object
+        The kernel specifying the covariance function of the GP. If None is
+        passed, a suitable default kernel is constructed.
+        Note that the kernel’s hyperparameters are estimated using MCMC during
+        fitting.
+    gp_kwargs : dict, optional
+        Dict of arguments passed to :class:`BayesGPR`.  For example,
+        ``{'normalize_y': True}`` would allow the GP to normalize the output
+        values before fitting.
+    acq_func : string or Acquisition object, default="pvrs"
+        Acquisition function to use as a criterion to select new points to test.
+        By default we use "pvrs", which is a very robust criterion with fast
+        convergence.
+        Should be one of
+            - 'pvrs' Predictive variance reductions search
+            - 'mes' Max-value entropy search
+            - 'ei' Expected improvement
+            - 'ttei' Top-two expected improvement
+            - 'lcb' Lower confidence bound
+            - 'mean' Expected value of the GP
+            - 'ts' Thompson sampling
+            - 'vr' Global variance reduction
+        Can also be a custom :class:`Acquisition` object.
+    acq_func_kwargs : dict, optional
+        Dict of arguments passed to :class:`Acquisition`.
+    random_state : int or RandomState or None, optional, default=None
+        Pseudo random number generator state used for random uniform sampling
+        from lists of possible values instead of scipy.stats distributions.
+
+    Attributes
+    ----------
+    Xi : list
+        Points at which objective has been evaluated.
+    yi : scalar
+        Values of objective at corresponding points in `Xi`.
+    space : Space
+        An instance of :class:`skopt.space.Space`. Stores parameter search
+        space used to sample points, bounds, and type of parameters.
+    gp : BayesGPR object
+        The current underlying GP model, which is used to calculate the
+        acquisition function.
+    gp_priors : list of callables
+        List of prior distributions for the kernel hyperparameters of the GP.
+        Each callable returns the logpdf of the prior distribution.
+    n_initial_points_ : int
+        Number of initial points to sample
+    noisei : list of floats
+        Additional pointwise noise which is added to the diagonal of the
+        kernel matrix
+    """
     def __init__(
         self,
         dimensions,
@@ -56,8 +129,6 @@ def __init__(
             )
         self.n_points = n_points
 
-        # TODO: Maybe a variant of cook estimator?
-        # TODO: Construct kernel if None
         if gp_kwargs is None:
             gp_kwargs = dict()
         if gp_kernel is None:

diff --git a/bask/utils.py b/bask/utils.py
@@ -162,6 +162,22 @@ def phi(d, n_iter=10):
 
 
 def r2_sequence(n, d, seed=0.5):
+    """Output ``n`` points of the infinite R2 quasi-random sequence.
+
+    Parameters
+    ----------
+    n : int
+        Number of points to generate
+    d : int
+        Number of dimensions for each point
+    seed : float in [0, 1], default=0.5
+        Seed value for the sequence
+
+    Returns
+    -------
+    z : ndarray, shape (n, d)
+        ``n`` points of the R2 sequence
+    """
     g = phi(d)
     alpha = np.zeros(d)
     for j in range(d):

diff --git a/docs/conf.py b/docs/conf.py
@@ -33,13 +33,23 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
     'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'numpydoc',
     'sphinx.ext.viewcode',
     'nbsphinx',
     'sphinx.ext.mathjax'
 ]
 
+autodoc_default_options = {
+    'members': True,
+    'inherited-members': True
+}
+
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ['templates']
+
+# generate autosummary even if no references
+autosummary_generate = True
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:

diff --git a/docs/index.rst b/docs/index.rst
@@ -8,7 +8,7 @@ Welcome to Bayes-skopt's documentation!
    readme
    installation
    usage
-   modules
+   modules/classes
    contributing
    authors
    history