diff --git a/bask/acquisition.py b/bask/acquisition.py
index dd36ad2..5e47928 100644
--- a/bask/acquisition.py
+++ b/bask/acquisition.py
@@ -55,6 +55,41 @@ def evaluate_acquisitions(
     random_state=None,
     **kwargs
 ):
+    """Run a set of acquisitions functions on a given set of points.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n, d), float
+        Set of points for which to evaluate the acquisition functions
+    gpr : BayesGPR object
+        Gaussian process for which the posterior distribution of the kernel
+        hyperparameters is available.
+    acquisition_functions : list of Acquisition objects
+        List of aquisition functions to evaluate.
+        They each should inherit from one of these:
+            - :class:`FullGPAcquisition`
+            - :class:`UncertaintyAcquisition`
+            - :class:`SampleAcquisition`
+    n_samples : int, default=10
+        Number of posterior samples to draw from the GP. The acquisition
+        functions will be evaluated for each of the sampled kernels.
+        Exceptions are Acquisition functions inheriting from
+        :class:`FullGPAcquisition`.
+    progress : bool, default=False
+        Show a progress bar
+    random_state : int or RandomState or None, optional, default=None
+        Pseudo random number generator state used for random uniform sampling
+        from lists of possible values instead of scipy.stats distributions.
+    kwargs : dict
+        Any additional keyword arguments are passed on to each acquisition
+        function.
+
+    Returns
+    -------
+    acq_output : float ndarray, shape (len(acquisition_functions), len(X))
+        The acquisition functions evaluated on all of the input points.
+
+    """
     n_cand_points = len(X)
     n_acqs = len(acquisition_functions)
     acq_output = np.zeros((n_acqs, n_cand_points))
@@ -97,6 +132,16 @@ def _ei_f(x):
 
 
 class ExpectedImprovement(UncertaintyAcquisition):
+    """Select the point maximizing the expected improvement over the current
+    optimum.
+
+    Parameters
+    ----------
+    y_opt : float, default=None
+        The value of the current optimum. If it is None, it will use the
+        minimum y value of the evaluated points.
+    """
+
     def __call__(self, mu, std, *args, y_opt=None, **kwargs):
         if y_opt is None:
             y_opt = mu.min()
@@ -108,6 +153,16 @@ def __call__(self, mu, std, *args, y_opt=None, **kwargs):
 
 
 class TopTwoEI(ExpectedImprovement):
+    """Select the point with the highest expected improvement over the
+    point with the maximum expected improvement overall.
+
+    Parameters
+    ----------
+    y_opt : float, default=None
+        The value of the current optimum. If it is None, it will use the
+        minimum y value of the evaluated points.
+    """
+
     def __call__(self, mu, std, *args, y_opt=None, **kwargs):
         ei = super().__call__(mu, std, *args, y_opt=y_opt, **kwargs)
         values = np.zeros_like(mu)
@@ -120,18 +175,42 @@ def __call__(self, mu, std, *args, y_opt=None, **kwargs):
 
 
 class Expectation(UncertaintyAcquisition):
+    """Select the point with the lowest estimated mean."""
+
     def __call__(self, mu, std, *args, **kwargs):
         return -mu
 
 
 class LCB(UncertaintyAcquisition):
-    def __call__(self, mu, std, *args, alpha=1.86, **kwargs):
+    """Select the point with the lowest lower confidence bound.
+
+    Parameters
+    ----------
+    alpha : positive float, alpha=1.96
+        Number of standard errors to substract from the mean estimate.
+    """
+
+    def __call__(self, mu, std, *args, alpha=1.96, **kwargs):
         if alpha == "inf":
             return std
         return alpha * std - mu
 
 
 class MaxValueSearch(UncertaintyAcquisition):
+    """Select points based on their mutual information with the optimum value.
+
+    Parameters
+    ----------
+    n_min_samples : int, default=1000
+        Number of samples for the optimum distribution
+
+    References
+    ----------
+    [1] Wang, Z. & Jegelka, S.. (2017). Max-value Entropy Search for Efficient
+        Bayesian Optimization. Proceedings of the 34th International Conference
+        on Machine Learning, in PMLR 70:3627-3635
+    """
+
     def __call__(self, mu, std, *args, n_min_samples=1000, **kwargs):
         def probf(x):
             return np.exp(np.sum(st.norm.logcdf(-(x - mu) / std), axis=0))
@@ -166,12 +245,19 @@ def probf(x):
 
 
 class ThompsonSampling(SampleAcquisition):
+    """Sample a random function from the GP and select its optimum."""
+
     def __call__(self, gp_sample, *args, **kwargs):
         return -gp_sample
 
 
 class VarianceReduction(FullGPAcquisition):
-    """ A criterion which tries to find the region where it can reduce the variance the most."""
+    """A criterion which tries to find the region where it can reduce the
+    global variance the most.
+
+    This criterion is suitable for active learning, where the goal is to
+    uniformly estimate the target function and not only its optimum.
+    """
 
     def __call__(self, X, gp, *args, **kwargs):
         n = len(X)
@@ -192,13 +278,14 @@ def __call__(self, X, gp, *args, **kwargs):
 class PVRS(FullGPAcquisition):
     """Implements the predictive variance reduction search algorithm.
 
-    The algorithm draws a set of Thompson samples (samples from the optimum distribution) and proposes the point which
-    reduces the predictive variance of these samples the most.
+    The algorithm draws a set of Thompson samples (samples from the optimum
+    distribution) and proposes the point which reduces the predictive variance
+    of these samples the most.
 
     References
     ----------
-    [1] Nguyen, Vu, et al. "Predictive variance reduction search." Workshop on Bayesian optimization at neural
-        information processing systems (NIPSW). 2017.
+    [1] Nguyen, Vu, et al. "Predictive variance reduction search." Workshop on
+    Bayesian optimization at neural information processing systems (NIPSW). 2017.
     """
 
     def __call__(self, X, gp, *args, n_thompson=10, random_state=None, **kwargs):
diff --git a/bask/optimizer.py b/bask/optimizer.py
index 9f8f7ca..2b032f2 100644
--- a/bask/optimizer.py
+++ b/bask/optimizer.py
@@ -23,6 +23,79 @@
 
 
 class Optimizer(object):
+    """Execute a stepwise Bayesian optimization.
+
+    Parameters
+    ----------
+    dimensions : list, shape (n_dims,)
+        List of search space dimensions.
+        Each search dimension can be defined either as
+
+        - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+          dimensions),
+        - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+          dimensions),
+        - as a list of categories (for `Categorical` dimensions), or
+        - an instance of a `Dimension` object (`Real`, `Integer` or
+          `Categorical`).
+    n_points : int, default=500
+        Number of random points to evaluate the acquisition function on.
+    n_initial_points : int, default=10
+        Number of initial points to sample before fitting the GP.
+    init_strategy : string or None, default="r2"
+        Sampling strategy to use for the initial ``n_initial_points``.
+        "r2" computes points using the quasirandom R2 sequence. If the value
+        is None or any other string, uniform random sampling is employed.
+    gp_kernel : kernel object
+        The kernel specifying the covariance function of the GP. If None is
+        passed, a suitable default kernel is constructed.
+        Note that the kernel’s hyperparameters are estimated using MCMC during
+        fitting.
+    gp_kwargs : dict, optional
+        Dict of arguments passed to :class:`BayesGPR`.  For example,
+        ``{'normalize_y': True}`` would allow the GP to normalize the output
+        values before fitting.
+    acq_func : string or Acquisition object, default="pvrs"
+        Acquisition function to use as a criterion to select new points to test.
+        By default we use "pvrs", which is a very robust criterion with fast
+        convergence.
+        Should be one of
+            - 'pvrs' Predictive variance reductions search
+            - 'mes' Max-value entropy search
+            - 'ei' Expected improvement
+            - 'ttei' Top-two expected improvement
+            - 'lcb' Lower confidence bound
+            - 'mean' Expected value of the GP
+            - 'ts' Thompson sampling
+            - 'vr' Global variance reduction
+        Can also be a custom :class:`Acquisition` object.
+    acq_func_kwargs : dict, optional
+        Dict of arguments passed to :class:`Acquisition`.
+    random_state : int or RandomState or None, optional, default=None
+        Pseudo random number generator state used for random uniform sampling
+        from lists of possible values instead of scipy.stats distributions.
+
+    Attributes
+    ----------
+    Xi : list
+        Points at which objective has been evaluated.
+    yi : scalar
+        Values of objective at corresponding points in `Xi`.
+    space : Space
+        An instance of :class:`skopt.space.Space`. Stores parameter search
+        space used to sample points, bounds, and type of parameters.
+    gp : BayesGPR object
+        The current underlying GP model, which is used to calculate the
+        acquisition function.
+    gp_priors : list of callables
+        List of prior distributions for the kernel hyperparameters of the GP.
+        Each callable returns the logpdf of the prior distribution.
+    n_initial_points_ : int
+        Number of initial points to sample
+    noisei : list of floats
+        Additional pointwise noise which is added to the diagonal of the
+        kernel matrix
+    """
     def __init__(
         self,
         dimensions,
@@ -56,8 +129,6 @@ def __init__(
             )
         self.n_points = n_points
 
-        # TODO: Maybe a variant of cook estimator?
-        # TODO: Construct kernel if None
         if gp_kwargs is None:
             gp_kwargs = dict()
         if gp_kernel is None:
diff --git a/bask/utils.py b/bask/utils.py
index d775de4..059b5a3 100644
--- a/bask/utils.py
+++ b/bask/utils.py
@@ -162,6 +162,22 @@ def phi(d, n_iter=10):
 
 
 def r2_sequence(n, d, seed=0.5):
+    """Output ``n`` points of the infinite R2 quasi-random sequence.
+
+    Parameters
+    ----------
+    n : int
+        Number of points to generate
+    d : int
+        Number of dimensions for each point
+    seed : float in [0, 1], default=0.5
+        Seed value for the sequence
+
+    Returns
+    -------
+    z : ndarray, shape (n, d)
+        ``n`` points of the R2 sequence
+    """
     g = phi(d)
     alpha = np.zeros(d)
     for j in range(d):
diff --git a/docs/conf.py b/docs/conf.py
index bf8d4cb..7f228e3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -33,13 +33,23 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
     'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'numpydoc',
     'sphinx.ext.viewcode',
     'nbsphinx',
     'sphinx.ext.mathjax'
 ]
 
+autodoc_default_options = {
+    'members': True,
+    'inherited-members': True
+}
+
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ['templates']
+
+# generate autosummary even if no references
+autosummary_generate = True
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
diff --git a/docs/index.rst b/docs/index.rst
index 5115724..332abc9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -8,7 +8,7 @@ Welcome to Bayes-skopt's documentation!
    readme
    installation
    usage
-   modules
+   modules/classes
    contributing
    authors
    history
diff --git a/docs/modules/classes.rst b/docs/modules/classes.rst
new file mode 100644
index 0000000..ecbbc54
--- /dev/null
+++ b/docs/modules/classes.rst
@@ -0,0 +1,115 @@
+.. _api_ref:
+
+=============
+API Reference
+=============
+
+Bayes-skopt, or bask, builds on Scikit-Optimize and implements a fully
+Bayesian sequential optimization framework of very noise black-box functions.
+
+
+
+:mod:`bask`: module
+====================
+
+Base classes
+------------
+.. currentmodule:: bask
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+    BayesGPR
+    BayesSearchCV
+    Optimizer
+
+Functions
+---------
+.. currentmodule:: bask
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+    geometric_median
+    guess_priors
+    construct_default_kernel
+    r2_sequence
+
+.. _acquisition_ref:
+
+:mod:`bask.acquisition`: Acquisition
+=====================================
+
+.. automodule:: skopt.acquisition
+   :no-members:
+   :no-inherited-members:
+
+**User guide:** See the :ref:`acquisition` section for further details.
+
+.. currentmodule:: bask
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+    acquisition.PVRS
+    acquisition.MaxValueSearch
+    acquisition.ExpectedImprovement
+    acquisition.TopTwoEI
+    acquisition.LCB
+    acquisition.Expectation
+    acquisition.ThompsonSampling
+    acquisition.VarianceReduction
+
+.. currentmodule:: bask
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+    acquisition.evaluate_acquisitions
+
+
+.. _optimizer_ref:
+
+:mod:`bask.optimizer`: Optimizer
+=================================
+
+.. automodule:: bask.optimizer
+   :no-members:
+   :no-inherited-members:
+
+**User guide:** See the :ref:`optimizer` section for further details.
+
+.. currentmodule:: bask
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+    optimizer.Optimizer
+
+.. _utils_ref:
+
+:mod:`bask.utils`: Utils functions.
+====================================
+
+.. automodule:: bask.utils
+   :no-members:
+   :no-inherited-members:
+
+**User guide:** See the :ref:`utils` section for further details.
+
+
+.. currentmodule:: bask
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+    utils.geometric_median
+    utils.r2_sequence
+    utils.guess_priors
+    utils.construct_default_kernel
diff --git a/docs/templates/class.rst b/docs/templates/class.rst
new file mode 100644
index 0000000..d223675
--- /dev/null
+++ b/docs/templates/class.rst
@@ -0,0 +1,16 @@
+:mod:`{{module}}`.{{objname}}
+{{ underline }}==============
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block methods %}
+   .. automethod:: __init__
+   {% endblock %}
+
+.. include:: {{module}}.{{objname}}.examples
+
+.. raw:: html
+
+    <div class="clearer"></div>
diff --git a/docs/templates/function.rst b/docs/templates/function.rst
new file mode 100644
index 0000000..f4b11ed
--- /dev/null
+++ b/docs/templates/function.rst
@@ -0,0 +1,12 @@
+:mod:`{{module}}`.{{objname}}
+{{ underline }}====================
+
+.. currentmodule:: {{ module }}
+
+.. autofunction:: {{ objname }}
+
+.. include:: {{module}}.{{objname}}.examples
+
+.. raw:: html
+
+    <div class="clearer"></div>
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 77f3169..c008762 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -8,6 +8,7 @@ coverage==5.0.3
 Sphinx==2.4.3
 nbsphinx==0.5.1
 nbsphinx-link==1.3.0
+numpydoc==0.9.1
 ipython==7.13.0
 twine==3.1.1
 Click==7.0