make_commonfunc_stan.py

#! /usr/bin/env python3
# rlib/make_commonfunc_stan.py

"""
===============================================================================
    Copyright (C) 2009-2018 Rudolf Cardinal (rudolf@pobox.com).

    This file is part of rlib.

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
===============================================================================

Stan doesn't allow templating of its user-defined functions.
As a result, we end up repeating boilerplate code.
This is probably preferable - a script to make the .stan file.

"""

import argparse
from enum import Enum
from typing import List, Tuple


# =============================================================================
# Stan variable types
# =============================================================================

class VarDescriptor(object):
    def __init__(self,
                 abbreviation: str,
                 typedef: str,
                 singleton: bool,
                 dimensions: int,
                 vector: bool,
                 name: str = None) -> None:
        self.abbreviation = abbreviation
        self.typedef = typedef
        self.singleton = singleton
        self.dimensions = dimensions
        self.vector = vector
        self.name = name

    def __str__(self) -> str:
        return self.typedef

    def __repr__(self) -> str:
        return "VarDescriptor<{} {}>".format(self.typedef, self.name)

    def __eq__(self, other: "VarDescriptor") -> bool:
        return self.typedef == other.typedef

    def clone(self) -> "VarDescriptor":
        return VarDescriptor(
            abbreviation=self.abbreviation,
            typedef=self.typedef,
            singleton=self.singleton,
            dimensions=self.dimensions,
            vector=self.vector,
            name=self.name
        )


REAL = VarDescriptor(
    abbreviation="R",
    typedef="real",
    singleton=True,
    dimensions=1,
    vector=False
)
ARRAY = VarDescriptor(
    abbreviation="A",
    typedef="real[]",
    singleton=False,
    dimensions=1,
    vector=False
)
ARRAY_2D = VarDescriptor(
    abbreviation="2",
    typedef="real[,]",
    singleton=False,
    dimensions=2,
    vector=False
)
VECTOR = VarDescriptor(
    abbreviation="V",
    typedef="vector",
    singleton=False,
    dimensions=1,
    vector=True
)

ALL_TYPES = [REAL, ARRAY, ARRAY_2D, VECTOR]


class SampleMethod(Enum):
    PLAIN = 1
    LOWER = 2
    UPPER = 3
    RANGE = 4


# =============================================================================
# Helper functions
# =============================================================================

def comment(x: str) -> str:
    return """
    // {}
    """.format(x)


def remove_blank_lines(x: str) -> str:
    lines = x.splitlines()
    return "\n".join(line for line in lines if line.strip())


# =============================================================================
# Common stuff
# =============================================================================

HEADER = """
    // DO NOT EDIT THIS FILE DIRECTLY. It is created by make_commonfunc_stan.py.

    // ========================================================================
    // Common functions
    // ========================================================================
    /*
    Reminders:
    - Annoyingly, you can't modify arguments to Stan user-defined functions.
      (No pass-by-reference.)
    - size() doesn't work on a plain "vector". Use num_elements().
    - Array/vector indexing is 1-based.
    - The addition-assignment (+=) operator generally doesn't work (it
      appears to be reserved for the one variable "target += ...").
      Similarly for all others you might expect.
    - Can't define constants in a functions{} block.
    */
"""

SIMPLE_FUNCTIONS = """
    // ------------------------------------------------------------------------
    // Simple functions
    // ------------------------------------------------------------------------

    real softmaxNth(vector softmax_inputs, int index)
    {
        /*
            For softmax: see my miscstat.R; the important points for
            optimization are (1) that softmax is invariant to the addition/
            subtraction of a constant, and subtracting the mean makes the
            numbers less likely to fall over computationally; (2) we only
            need the final part of the computation for a single number
            (preference for the right), so we don't have to waste time
            vector-calculating the preference for the left as well [that is:
            we don't have to calculate s_exp_products / sum(s_exp_products)].

            Since Stan 2.0.0, the alternative is to use softmax(); see
            stan/math/fwd/mat/fun/softmax.hpp. Not sure which is faster, or
            whether it really matters.
        */
        int length = num_elements(softmax_inputs);
        vector[length] s_exp_products;
        if (index < 1 || index > length) {
            reject("softmaxNth(): index is ", index,
                   " but must be in range 1-", length);
        }
        s_exp_products = exp(softmax_inputs - mean(softmax_inputs));
        return s_exp_products[index] / sum(s_exp_products);
    }

    real softmaxNthInvTemp(vector softmax_inputs, real inverse_temp, int index)
    {
        int length = num_elements(softmax_inputs);
        vector[length] s_exp_products;
        if (index < 1 || index > length) {
            reject("softmaxNthInvTemp(): index is ", index,
                   " but must be in range 1-", length);
        }
        s_exp_products = exp(softmax_inputs * inverse_temp - mean(softmax_inputs));
        return s_exp_products[index] / sum(s_exp_products);
    }

    real logistic(real x, real x0, real k, real L)
    {
        // Notation as per https://en.wikipedia.org/wiki/Logistic_function
        // x0: centre
        // k: steepness
        // L: maximum (usually 1)

        return L / (1 + exp(-k * (x - x0)));
    }

    real bound(real x, real min_value, real max_value)
    {
        // We should simply be able to do this:
        //     return max(min_value, min(x, max_value));
        // ... but Stan doesn't have max(real, real) or
        // min(real, real) functions!

        if (x < min_value) {
            return min_value;
        } else if (x > max_value) {
            return max_value;
        } else {
            return x;
        }
    }

    real boundLower(real x, real min_value)
    {
        // a.k.a. max()

        if (x < min_value) {
            return min_value;
        } else {
            return x;
        }
    }

    real boundUpper(real x, real max_value)
    {
        // a.k.a. min()

        if (x > max_value) {
            return max_value;
        } else {
            return x;
        }
    }
"""

DUFF_ANOVA_FUNCTIONS = """
    // ------------------------------------------------------------------------
    // ANOVA-type designs: DEPRECATED APPROACH
    // ------------------------------------------------------------------------
    // ... rather than coding intercept + main effects + interactions (etc.),
    // as here, it's probably best to code individual cells. That makes
    // distributions more sensible (and predictable/easily calculable).

    int interactionIndex2Way(int first_index, int first_max,
                             int second_index, int second_max)
    {
        /*
            Because Stan doesn't support sampling into matrix, we need to
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            convert matrix-like concepts to vectors. Specifically, it doesn't
            support either
                matrix[A, B] m;
                m ~ normal(0, 0.5);  // error: "no matches for matrix ~ normal(int, real)"
            or
                real a[A, B];
                a ~ normal(0, 0.5);  // error: "no matches for real[,] ~ normal(int, real)"

            And note that a vectorized sampling statement is strongly preferred
            (for performance reasons) over iterating through a matrix:
                https://groups.google.com/forum/#!topic/stan-users/4gv3fNCqSNk
                    "Do not loop over sampling statements when a vectorized
                    sampling statement is possible"

            So we use a vector of size A*B, and this index lookup function.
            Parameters:
            - first_index is from 1 to first_max
            - second_index is from 1 to second_max
            - We want a consecutive index from 1 to (first_max * second_max)

            In the output, the FIRST will cycle LEAST rapidly, and the
            LAST will cycle MOST rapidly.
        */
        return (
            (first_index - 1) * first_max +     // slow cycling
            second_index                        // fast cycling
        );
    }

    vector setLastForZeroSum(vector parameters)
    {
        /*
            Makes a vector of parameters sum to zero, by setting the last
            element to the negative sum of the others.
            Used for ANOVA-style effects; e.g. if you have a grand mean, you
            might specify the effects of a three-level factor A as A1, A2, A3;
            then A1 + A2 + A3 must be zero, so A1 and A2 are free parameters
            that are drawn from an appropriate distribution, and then A3 is
            fully constrainted to be -(A1 + A2).

            Because we can't modify the input parameters, we make a new copy.

            Returns a vector of the SAME LENGTH as the original.
            (The last element of the incoming vector is ignored.)
        */
        int length = num_elements(parameters);
        vector[length] newparams;
        real total = 0.0;
        for (i in 1:length - 1) {
            real value = parameters[i];
            newparams[i] = value;
            total = total + value;
        }
        newparams[length] = -total;
        return newparams;
    }

    vector appendElementForZeroSum(vector parameters)
    {
        /*
            As for setLastForZeroSum(), but uses all the information in the
            incoming vector, and returns a vector that's one element longer.
        */
        int initial_length = num_elements(parameters);
        int new_length = initial_length + 1;
        vector[new_length] newparams;
        real total = 0.0;
        for (i in 1:initial_length) {
            real value = parameters[i];
            newparams[i] = value;
            total = total + value;
        }
        newparams[new_length] = -total;
        return newparams;
    }
"""

LOG_PROB_HEADER = """
    // ------------------------------------------------------------------------
    // LOG PROBABILITY FUNCTIONS FOR BRIDGE SAMPLING
    // ------------------------------------------------------------------------
    /*
    We can have functions that access the log probability accumulator
    if the function name ends in '_lp'; see Stan manual section 23.3.

    RE ARGUMENTS:

    The Stan manual uses notation like
         real normal_lpdf(reals y | reals mu, reals sigma)
    but "reals" isn't something you can actually use in user functions.
    See p495:
        "reals" means:
                real
                real[]
                vector
                row_vector
        "ints" means
                int
                int[]

    Moreover, you can't define two copies of the same function with
    different names (23.6: no overloading of user-defined functions).
    For real arguments, the options are therefore:
         real
         real[]  // one-dimensional array
         real[,]  // two-dimensional array
         vector  // vector, similar to a one-dimensional array.
         matrix  // matrix, similar to a two-dimensional array.
    See p297 of the 2017 Stan manual, and also p319.
    Which do we use in practice?
    - Firstly, we use single numbers or one-dimensional collections,
      and generally the latter. So that means real[] or vector.
    - We use both.
    - So let's have "Real", "Arr" and "Vec" versions.
    - Then, to make things worse, we sometimes have constant parameters,
      and sometimes array/vector parameters...
    - For something with two distribution parameters, like the normal
      distribution and many others, that means that we have 3*3*3 combinations
      for each thing. Urgh. Stan should allow user overloading ;).
    - Let's do it and define "R", "A", "2", "V" for the parameters
    - Except we won't be returning R unless it's RRR!
    - Last thing cycles fastest.
    So:
        RRR
        -- nothing else R*

        ARA
        ARV
        AAR
        AAA
        AAV
        AVR
        AVA
        AVV

        2RR
            ...

        VRA
        VRV
        VAR
        VAA
        VAV
        VVR
        VVA
        VVV

    RE SAMPLING TWO-DIMENSIONAL ARRAYS:

    You can't sample an entire matrix or 2D array; you have do to it row-wise.
    - This isn't very clear in the manual, as far as I can see.
    - The definition of e.g. beta_lpdf() is in terms of "reals", which
      probably means a vector or array of real.
    - Section 9.6 ("Multi-logit regression") of the Stan manual v2.16.0
      gives an example where one would use a matrix sampling statement but
      they don't.
    - But it is explicit in the sense that they define what they mean by
      "reals", as above, and that doesn't include 2D arrays.
    - Better to move the boilerplate code here than in user land, though.

    RE TWO-DIMENSIONAL ARRAYS:

        real thing[N_A, N_B];

        // One way to iterate through all elements:
        for (a in 1:N_A) {
            for (b in 1:N_B) {
                do_something(thing[a, b]);
            }
        }

        // NOT another way to iterate through all elements:
        for (i in 1:num_elements(thing)) {
            do_something(thing[i]);  // A BUG, because b[1] is a real[], not a real
        }

    So for some functions we want real[,]... let's give this the one-character
    notation "2" (for 2D array).

    Now:
        num_elements() gives the total, in this case N_A * N_B;
        size() gives the size of first dimension, in this case N_A;
        dims() gives all dimensions, in this case an int[] containing {N_A, N_B}.

    RE ARITHMETIC:

    Note that we cannot do:
            real * real[]
            vector * vector

    */
"""

LOG_PROB_HELPERS = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Helper functions for boundary checking
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // See Stan (2017) manual p82.
    // These are internal functions that ASSUME size match.
    // We can't use a leading "_" prefix on function names (Stan syntax error).

    // Lower

    void enforceLowerBound_R_lp(real y, real lower)
    {
        if (y < lower) {
            target += negative_infinity();
        }
    }
    void enforceLowerBound_A_lp(real[] y, real lower)
    {
        int length = num_elements(y);
        for (i in 1:length) {
            if (y[i] < lower) {
                target += negative_infinity();
            }
        }
    }
    void enforceLowerBound_2_lp(real[,] y, real lower)
    {
        int dimensions[2] = dims(y);
        int nrows = dimensions[1];
        int ncols = dimensions[2];
        for (i in 1:nrows) {
            for (j in 1:ncols) {
                if (y[i, j] < lower) {
                    target += negative_infinity();
                }
            }
        }
    }
    void enforceLowerBound_V_lp(vector y, real lower)
    {
        int length = num_elements(y);
        for (i in 1:length) {
            if (y[i] < lower) {
                target += negative_infinity();
            }
        }
    }

    // Upper

    void enforceUpperBound_R_lp(real y, real upper)
    {
        if (y > upper) {
            target += negative_infinity();
        }
    }
    void enforceUpperBound_A_lp(real[] y, real upper)
    {
        int length = num_elements(y);
        for (i in 1:length) {
            if (y[i] > upper) {
                target += negative_infinity();
            }
        }
    }
    void enforceUpperBound_2_lp(real[,] y, real upper)
    {
        int dimensions[2] = dims(y);
        int nrows = dimensions[1];
        int ncols = dimensions[2];
        for (i in 1:nrows) {
            for (j in 1:ncols) {
                if (y[i, j] > upper) {
                    target += negative_infinity();
                }
            }
        }
    }
    void enforceUpperBound_V_lp(vector y, real upper)
    {
        int length = num_elements(y);
        for (i in 1:length) {
            if (y[i] > upper) {
                target += negative_infinity();
            }
        }
    }

    // Range

    void enforceRangeBounds_R_lp(real y, real lower, real upper)
    {
        if (y < lower || y > upper) {
            target += negative_infinity();
        }
    }
    void enforceRangeBounds_A_lp(real[] y, real lower, real upper)
    {
        int length = num_elements(y);
        for (i in 1:length) {
            if (y[i] < lower || y[i] > upper) {
                target += negative_infinity();
            }
        }
    }
    void enforceRangeBounds_2_lp(real[,] y, real lower, real upper)
    {
        int dimensions[2] = dims(y);
        int nrows = dimensions[1];
        int ncols = dimensions[2];
        for (i in 1:nrows) {
            for (j in 1:ncols) {
                if (y[i, j] < lower || y[i, j] > upper) {
                    target += negative_infinity();
                }
            }
        }
    }
    void enforceRangeBounds_V_lp(vector y, real lower, real upper)
    {
        int length = num_elements(y);
        for (i in 1:length) {
            if (y[i] < lower || y[i] > upper) {
                target += negative_infinity();
            }
        }
    }
"""

REPARAM_HEADER = """
    // ------------------------------------------------------------------------
    // LOG PROBABILITY FUNCTIONS FOR BRIDGE SAMPLING WITH NON-CENTERED
    // REPARAMETERIZATION
    // ------------------------------------------------------------------------
"""


# =============================================================================
# Generic distribution
# =============================================================================

def sample_generic(name_caps: str,
                   name_lower: str,
                   y: VarDescriptor,
                   distribution_params: List[VarDescriptor],
                   method: SampleMethod) -> str:
    if (y.dimensions == 2 and
            any(vd.dimensions > 1 for vd in distribution_params)):
        raise NotImplementedError("y={}, distribution_params={}".format(
            y, distribution_params))
    y.name = "y"
    call_params = [y] + distribution_params
    lower = REAL.clone()
    lower.name = "lower"
    upper = REAL.clone()
    upper.name = "upper"
    lpdf_func = "{}_lpdf".format(name_lower)
    lcdf_func = "{}_lcdf".format(name_lower)
    lccdf_func = "{}_lccdf".format(name_lower)
    pdf_call_params = ", ".join(vd.name for vd in distribution_params)

    if method == SampleMethod.PLAIN:
        if y.dimensions == 2:
            code = """
        int nrows = size(y);
        for (i in 1:nrows) {{
            target += {lpdf_func}(y[i] | {pdf_call_params});
        }}
            """.format(lpdf_func=lpdf_func,
                       pdf_call_params=pdf_call_params)
        else:
            code = """
        target += {lpdf_func}(y | {pdf_call_params});
            """.format(lpdf_func=lpdf_func,
                       pdf_call_params=pdf_call_params)
        funcname_extra = ""

    elif method == SampleMethod.LOWER:
        if y.dimensions == 2:
            code = """
        int nrows = size(y);
        real correction = {lccdf_func}(lower | {pdf_call_params});
        for (i in 1:nrows) {{
            target += {lpdf_func}(y[i] | {pdf_call_params}) -
                      correction;
        }}
        enforceLowerBound_{ya}_lp(y, lower);
                """.format(lpdf_func=lpdf_func,
                           lccdf_func=lccdf_func,
                           pdf_call_params=pdf_call_params,
                           ya=y.abbreviation)
        else:
            code = """
        target += {lpdf_func}(y | {pdf_call_params}) -
                  {lccdf_func}(lower | {pdf_call_params});
        enforceLowerBound_{ya}_lp(y, lower);
            """.format(lpdf_func=lpdf_func,
                       lccdf_func=lccdf_func,
                       pdf_call_params=pdf_call_params,
                       ya=y.abbreviation)
        funcname_extra = "LowerBound"
        call_params += [lower]

    elif method == SampleMethod.UPPER:
        if y.dimensions == 2:
            code = """
        int nrows = size(y);
        real correction = {lcdf_func}(upper | {pdf_call_params});
        for (i in 1:nrows) {{
            target += {lpdf_func}(y[i] | {pdf_call_params}) -
                      correction;
        }}
        enforceUpperBound_{ya}_lp(y, upper);
            """.format(lpdf_func=lpdf_func,
                       lcdf_func=lcdf_func,
                       pdf_call_params=pdf_call_params,
                       ya=y.abbreviation)
        else:
            code = """
        target += {lpdf_func}(y | {pdf_call_params}) -
                  {lcdf_func}(upper | {pdf_call_params});
        enforceUpperBound_{ya}_lp(y, upper);
            """.format(lpdf_func=lpdf_func,
                       lcdf_func=lcdf_func,
                       pdf_call_params=pdf_call_params,
                       ya=y.abbreviation)
        funcname_extra = "UpperBound"
        call_params += [upper]

    elif method == SampleMethod.RANGE:
        if y.dimensions == 2:
            code = """
        int nrows = size(y);
        real correction = log_diff_exp({lcdf_func}(upper | {pdf_call_params}),
                                       {lcdf_func}(lower | {pdf_call_params}));
        for (i in 1:nrows) {{
            target += {lpdf_func}(y[i] | {pdf_call_params}) -
                      correction;
        }}
        enforceRangeBounds_{ya}_lp(y, lower, upper);
            """.format(lpdf_func=lpdf_func,
                       lcdf_func=lcdf_func,
                       pdf_call_params=pdf_call_params,
                       ya=y.abbreviation)
        else:
            code = """
        target += {lpdf_func}(y | {pdf_call_params}) -
                  log_diff_exp({lcdf_func}(upper | {pdf_call_params}),
                               {lcdf_func}(lower | {pdf_call_params}));
        enforceRangeBounds_{ya}_lp(y, lower, upper);
            """.format(lpdf_func=lpdf_func,
                       lcdf_func=lcdf_func,
                       pdf_call_params=pdf_call_params,
                       ya=y.abbreviation)
        funcname_extra = "RangeBound"
        call_params += [lower, upper]
    else:
        raise AssertionError("bug")

    funcname = "sample{name_caps}{funcname_extra}_{types}_lp".format(
        name_caps=name_caps,
        funcname_extra=funcname_extra,
        types="".join(vd.abbreviation for vd in [y] + distribution_params)
    )
    param_defs = ", ".join(
        "{} {}".format(vd.typedef, vd.name)
        for vd in call_params
    )

    return """
    void {funcname}({param_defs})
    {{
        {code}
    }}
    """.format(
        funcname=funcname,
        param_defs=param_defs,
        code=code.strip(),
    )


def sample_uniform(y: VarDescriptor, lower: VarDescriptor,
                   upper: VarDescriptor) -> str:
    distribution_params = [lower, upper]
    if (y.dimensions == 2 and
            any(vd.dimensions > 1 for vd in distribution_params)):
        raise NotImplementedError("y={}, distribution_params={}".format(
            y, distribution_params))
    y.name = "y"
    lower.name = "lower"
    upper.name = "upper"

    if y.dimensions == 2:
        code = """
        int nrows = size(y);
        for (i in 1:nrows) {
            target += uniform_lpdf(y[i] | lower, upper);
        }
        """
    else:
        code = """
        target += uniform_lpdf(y | lower, upper);
        """

    call_params = [y, lower, upper]
    funcname = "sampleUniform_{types}_lp".format(
        types="".join(vd.abbreviation for vd in call_params)
    )
    param_defs = ", ".join(
        "{} {}".format(vd.typedef, vd.name)
        for vd in call_params
    )

    return """
    void {funcname}({param_defs})
    {{
        {code}
    }}
    """.format(
        funcname=funcname,
        param_defs=param_defs,
        code=code.strip(),
    )


# =============================================================================
# Normal distribution
# =============================================================================

def get_normal_distribution() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Normal distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    """

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for mu in ALL_TYPES:
            for sigma in ALL_TYPES:
                if y == REAL and (mu != REAL or sigma != REAL):
                    continue
                if y == ARRAY_2D and (mu != REAL or sigma != REAL):
                    continue
                if mu.dimensions == 2 or sigma.dimensions == 2:
                    continue
                supported_combinations.append((y, mu, sigma))

    def do_call(y_: VarDescriptor,
                mu_: VarDescriptor,
                sigma_: VarDescriptor,
                method: SampleMethod):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        mu_ = mu_.clone()
        sigma_ = sigma_.clone()
        y_ = y_.clone()
        mu_.name = "mu"
        sigma_.name = "sigma"
        code += sample_generic(
            name_caps="Normal",
            name_lower="normal",
            y=y_,
            distribution_params=[mu_, sigma_],
            method=method
        )

    code += comment("Sampling")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.PLAIN)
    code += comment("Sampling with lower bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.LOWER)
    code += comment("Sampling with upper bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.UPPER)
    code += comment("Sampling with range (lower and upper) bounds")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.RANGE)
    return code


# =============================================================================
# Cauchy distribution
# =============================================================================

def get_cauchy_distribution() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Cauchy distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    """

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for mu in ALL_TYPES:
            for sigma in ALL_TYPES:
                if y == REAL and (mu != REAL or sigma != REAL):
                    continue
                if y == ARRAY_2D and (mu != REAL or sigma != REAL):
                    continue
                if mu.dimensions == 2 or sigma.dimensions == 2:
                    continue
                supported_combinations.append((y, mu, sigma))

    def do_call(y_: VarDescriptor,
                mu_: VarDescriptor,
                sigma_: VarDescriptor,
                method: SampleMethod):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        mu_ = mu_.clone()
        sigma_ = sigma_.clone()
        y_ = y_.clone()
        mu_.name = "mu"
        sigma_.name = "sigma"
        code += sample_generic(
            name_caps="Cauchy",
            name_lower="cauchy",
            y=y_,
            distribution_params=[mu_, sigma_],
            method=method
        )

    code += comment("Sampling")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.PLAIN)
    code += comment("Sampling with lower bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.LOWER)
    code += comment("Sampling with upper bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.UPPER)
    code += comment("Sampling with range (lower and upper) bounds")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.RANGE)
    return code


# =============================================================================
# Beta distribution
# =============================================================================

def get_beta_distribution() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Beta distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    """

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for alpha in ALL_TYPES:
            for beta in ALL_TYPES:
                if y == REAL and (alpha != REAL or beta != REAL):
                    continue
                if y == ARRAY_2D and (alpha != REAL or beta != REAL):
                    continue
                if alpha.dimensions == 2 or beta.dimensions == 2:
                    continue
                supported_combinations.append((y, alpha, beta))

    def do_call(y_: VarDescriptor,
                alpha_: VarDescriptor,
                beta_: VarDescriptor,
                method: SampleMethod):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        alpha_ = alpha_.clone()
        beta_ = beta_.clone()
        y_ = y_.clone()
        alpha_.name = "alpha"
        beta_.name = "beta"
        code += sample_generic(
            name_caps="Beta",
            name_lower="beta",
            y=y_,
            distribution_params=[alpha_, beta_],
            method=method
        )

    code += comment("Sampling")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.PLAIN)
    code += comment("Sampling with lower bound")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.LOWER)
    code += comment("Sampling with upper bound")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.UPPER)
    code += comment("Sampling with range (lower and upper) bounds")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.RANGE)
    return code


# =============================================================================
# Gamma distribution
# =============================================================================

def get_gamma_distribution() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Gamma distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    """

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for alpha in ALL_TYPES:
            for beta in ALL_TYPES:
                if y == REAL and (alpha != REAL or beta != REAL):
                    continue
                if y == ARRAY_2D and (alpha != REAL or beta != REAL):
                    continue
                if alpha.dimensions == 2 or beta.dimensions == 2:
                    continue
                supported_combinations.append((y, alpha, beta))

    def do_call(y_: VarDescriptor,
                alpha_: VarDescriptor,
                beta_: VarDescriptor,
                method: SampleMethod):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        alpha_ = alpha_.clone()
        beta_ = beta_.clone()
        y_ = y_.clone()
        alpha_.name = "alpha"
        beta_.name = "beta"
        code += sample_generic(
            name_caps="Gamma",
            name_lower="gamma",
            y=y_,
            distribution_params=[alpha_, beta_],
            method=method
        )

    code += comment("Sampling")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.PLAIN)
    code += comment("Sampling with lower bound")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.LOWER)
    code += comment("Sampling with upper bound")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.UPPER)
    code += comment("Sampling with range (lower and upper) bounds")
    for y, alpha, beta in supported_combinations:
        do_call(y, alpha, beta, SampleMethod.RANGE)
    return code


# =============================================================================
# Uniform distribution
# =============================================================================

def get_uniform_distribution() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Uniform distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Always constrained with both a lower and an upper bound.
    // Simple; no extra work for the bridge sampler.
    """

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for lower in ALL_TYPES:
            for upper in ALL_TYPES:
                if y == REAL and (lower != REAL or upper != REAL):
                    continue
                if y == ARRAY_2D and (lower != REAL or upper != REAL):
                    continue
                if lower.dimensions == 2 or upper.dimensions == 2:
                    continue
                supported_combinations.append((y, lower, upper))

    def do_call(y_: VarDescriptor,
                lower_: VarDescriptor,
                upper_: VarDescriptor):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        lower_ = lower_.clone()
        upper_ = upper_.clone()
        y_ = y_.clone()
        lower_.name = "lower"
        upper_.name = "upper"
        code += sample_uniform(y=y_, lower=lower_, upper=upper_)

    code += comment("Sampling")
    for y, lower, upper in supported_combinations:
        do_call(y, lower, upper)
    return code


# =============================================================================
# Bernoulli distribution
# =============================================================================

SAMPLE_BERNOULLI = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Bernoulli distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // y is in {0, 1} and theta is in the range [0, 1].

    void sampleBernoulli_IR_lp(int y, real theta)
    {
        target += bernoulli_lpmf(y | theta);
    }
    void sampleBernoulli_AR_lp(int[] y, real theta)
    {
        target += bernoulli_lpmf(y | theta);
    }
    void sampleBernoulli_AA_lp(int[] y, real[] theta)
    {
        target += bernoulli_lpmf(y | theta);
    }
    void sampleBernoulli_AV_lp(int[] y, vector theta)
    {
        target += bernoulli_lpmf(y | theta);
    }
"""


# =============================================================================
# Reparameterized normal distribution
# =============================================================================

def make_reparam_normal(y: VarDescriptor,
                        mu: VarDescriptor,
                        sigma: VarDescriptor,
                        method: SampleMethod) -> str:
    if (y.dimensions == 2 or
            (y.singleton and (not mu.singleton or not sigma.singleton))):
        raise NotImplementedError("y={}, mu={}, sigma={}".format(
            y, mu, sigma))
    y.name = "y_unit_normal"
    mu.name = "mu"
    sigma.name = "sigma"
    call_params = [y, mu, sigma]
    original_call_params = call_params.copy()
    lower = REAL.clone()
    lower.name = "lower"
    upper = REAL.clone()
    upper.name = "upper"

    using_lower = False
    using_upper = False
    funcname_extra = ""
    constraints = ""
    mu_i = "[i]" if not mu.singleton else ""
    sigma_i = "[i]" if not sigma.singleton else ""
    calc_transformed_1 = ""
    calc_transformed_2 = ""
    if method == SampleMethod.PLAIN:
        pass  # as above
    elif method == SampleMethod.LOWER:
        using_lower = True
        using_upper = False
        funcname_extra = "LowerBound"
    elif method == SampleMethod.UPPER:
        using_lower = False
        using_upper = True
        funcname_extra = "UpperBound"
    elif method == SampleMethod.RANGE:
        using_lower = True
        using_upper = True
        funcname_extra = "RangeBound"

    if using_lower:
        call_params += [lower]
        constraints += ", lower_transformed"
        calc_transformed_1 = (
            "lower_transformed = (lower - mu{mu_i}) / sigma{sigma_i};".format(
                mu_i=mu_i, sigma_i=sigma_i)
        )
    if using_upper:
        call_params += [upper]
        constraints += ", upper_transformed"
        calc_transformed_2 = (
            "upper_transformed = (upper - mu{mu_i}) / sigma{sigma_i};".format(
                mu_i=mu_i, sigma_i=sigma_i)
        )

    # Variable declarations
    code = ""
    if y.singleton:
        code += """
        real result;
        """
    elif y.vector:
        code += """
        int length = num_elements(y_unit_normal);
        vector[length] result;
        """
    else:
        code += """
        int length = num_elements(y_unit_normal);
        real result[length];
        """
    if using_lower:
        code += """
        real lower_transformed;
        """
    if using_upper:
        code += """
        real upper_transformed;
        """

    # Size checks
    if not y.singleton:
        sized_dist_params = [x for x in [mu, sigma] if not x.singleton]
        if sized_dist_params:
            code += """
        if ({conditions}) {{
            reject("Incompatible arguments");
        }}
            """.format(conditions=" || ".join(
                "num_elements({}) != length".format(x.name)
                for x in sized_dist_params
            ))

    # Sample, calculate result, etc.
    if y.singleton:
        code += """
        {calc_transformed_1}
        {calc_transformed_2}
        sampleNormal{fe}_{ya}RR_lp(y_unit_normal, 0, 1{constraints});
        result = mu + sigma * y_unit_normal;
        """.format(
            calc_transformed_1=calc_transformed_1,
            calc_transformed_2=calc_transformed_2,
            fe=funcname_extra,
            ya=y.abbreviation,
            constraints=constraints,
        )
    else:
        code += """
        for (i in 1:length) {{
            {calc_transformed_1}
            {calc_transformed_2}
            sampleNormal{fe}_RRR_lp(y_unit_normal[i], 0, 1{constraints});
            result[i] = mu{mu_i} + sigma{sigma_i} * y_unit_normal[i];
        }}
        """.format(
            calc_transformed_1=calc_transformed_1,
            calc_transformed_2=calc_transformed_2,
            fe=funcname_extra,
            ya=y.abbreviation,
            constraints=constraints,
            mu_i=mu_i,
            sigma_i=sigma_i
        )

    # Return value
    code += """
        return result;
    """

    funcname = "getReparameterizedNormal{funcname_extra}_{types}_lp".format(
        funcname_extra=funcname_extra,
        types="".join(vd.abbreviation for vd in original_call_params)
    )
    param_defs = ", ".join("{} {}".format(vd.typedef, vd.name)
                           for vd in call_params)

    return """
    {rettype} {funcname}({param_defs})
    {{
        {code}
    }}
    """.format(
        rettype=y.typedef,
        funcname=funcname,
        param_defs=param_defs,
        code=remove_blank_lines(code.strip()),
    )


def get_reparamaterized_normal() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Normal distribution, reparameterized to the unit normal distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Compare Stan (2017) manual p299, but we use a bridgesampling version.

    real reparameterizedNormalBoundary(real boundary, real mu, real sigma)
    {
        // boundary: in real-world N(mu, sigma) space
        // return value: equivalent in the reparameterized N(0, 1) space
        return (boundary - mu) / sigma;
    }
    """

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for lower in ALL_TYPES:
            for upper in ALL_TYPES:
                if y == REAL and (lower != REAL or upper != REAL):
                    continue
                if y == ARRAY_2D:
                    continue
                if lower.dimensions == 2 or upper.dimensions == 2:
                    continue
                supported_combinations.append((y, lower, upper))
    
    def do_call(y_: VarDescriptor,
                mu_: VarDescriptor,
                sigma_: VarDescriptor,
                method: SampleMethod):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        mu_ = mu_.clone()
        sigma_ = sigma_.clone()
        y_ = y_.clone()
        code += make_reparam_normal(y_, mu_, sigma_, method)

    code += comment("Plain")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.PLAIN)
    code += comment("With lower bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.LOWER)
    code += comment("With upper bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.UPPER)
    code += comment("With range (lower and upper) bounds")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.RANGE)
    return code


# =============================================================================
# Reparameterized Cauchy distribution
# =============================================================================

def make_reparam_cauchy(y: VarDescriptor,
                        mu: VarDescriptor,
                        sigma: VarDescriptor,
                        method: SampleMethod) -> str:
    if (y.dimensions == 2 or
            (y.singleton and (not mu.singleton or not sigma.singleton))):
        raise NotImplementedError("y={}, mu={}, sigma={}".format(
            y, mu, sigma))
    y.name = "y_uniform"
    mu.name = "mu"
    sigma.name = "sigma"
    call_params = [y, mu, sigma]
    original_call_params = call_params.copy()
    lower = REAL.clone()
    lower.name = "lower"
    upper = REAL.clone()
    upper.name = "upper"

    using_lower = False
    using_upper = False
    funcname_extra = ""
    constraints = ""
    mu_i = "[i]" if not mu.singleton else ""
    sigma_i = "[i]" if not sigma.singleton else ""
    calc_transformed_1 = ""
    calc_transformed_2 = ""
    if method == SampleMethod.PLAIN:
        pass  # as above
    elif method == SampleMethod.LOWER:
        using_lower = True
        using_upper = False
        funcname_extra = "LowerBound"
    elif method == SampleMethod.UPPER:
        using_lower = False
        using_upper = True
        funcname_extra = "UpperBound"
    elif method == SampleMethod.RANGE:
        using_lower = True
        using_upper = True
        funcname_extra = "RangeBound"

    if using_lower:
        call_params += [lower]
        constraints += ", lower_transformed"
        calc_transformed_1 = (
            "lower_transformed = atan((lower - mu{mu_i}) / sigma{sigma_i});".format(  # noqa
                mu_i=mu_i, sigma_i=sigma_i)
        )
    if using_upper:
        call_params += [upper]
        constraints += ", upper_transformed"
        calc_transformed_2 = (
            "upper_transformed = atan((upper - mu{mu_i}) / sigma{sigma_i});".format(  # noqa
                mu_i=mu_i, sigma_i=sigma_i)
        )

    # Variable declarations
    code = ""
    if y.singleton:
        code += """
        real result;
        """
    elif y.vector:
        code += """
        int length = num_elements(y_uniform);
        vector[length] result;
        """
    else:
        code += """
        int length = num_elements(y_uniform);
        real result[length];
        """
    if using_lower:
        code += """
        real lower_transformed;
        """
    if using_upper:
        code += """
        real upper_transformed;
        """

    # Size checks
    if not y.singleton:
        sized_dist_params = [x for x in [mu, sigma] if not x.singleton]
        if sized_dist_params:
            code += """
        if ({conditions}) {{
            reject("Incompatible arguments");
        }}
            """.format(conditions=" || ".join(
                "num_elements({}) != length".format(x.name)
                for x in sized_dist_params
            ))

    lower_param = "lower_transformed" if using_lower else "-pi()/2"
    upper_param = "upper_transformed" if using_upper else "pi()/2"

    # Sample, calculate result, etc.
    if y.singleton:
        code += """
        {calc_transformed_1}
        {calc_transformed_2}
        sampleUniform_{ya}RR_lp(y_uniform, {lp}, {up});
        result = mu + sigma * tan(y_uniform);
        """.format(
            calc_transformed_1=calc_transformed_1,
            calc_transformed_2=calc_transformed_2,
            ya=y.abbreviation,
            lp=lower_param,
            up=upper_param,
        )
    else:
        code += """
        for (i in 1:length) {{
            {calc_transformed_1}
            {calc_transformed_2}
            sampleUniform_RRR_lp(y_uniform[i], {lp}, {up});
            result[i] = mu{mu_i} + sigma{sigma_i} * tan(y_uniform[i]);
        }}
        """.format(
            calc_transformed_1=calc_transformed_1,
            calc_transformed_2=calc_transformed_2,
            ya=y.abbreviation,
            lp=lower_param,
            up=upper_param,
            mu_i=mu_i,
            sigma_i=sigma_i
        )

    # Return value
    code += """
        return result;
    """

    funcname = "getReparameterizedCauchy{funcname_extra}_{types}_lp".format(
        funcname_extra=funcname_extra,
        types="".join(vd.abbreviation for vd in original_call_params)
    )
    param_defs = ", ".join("{} {}".format(vd.typedef, vd.name)
                           for vd in call_params)

    return """
    {rettype} {funcname}({param_defs})
    {{
        {code}
    }}
    """.format(
        rettype=y.typedef,
        funcname=funcname,
        param_defs=param_defs,
        code=remove_blank_lines(code.strip()),
    )


def get_reparamaterized_cauchy() -> str:
    code = """
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // Cauchy distribution, reparameterized to the uniform distribution
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    /*
    See p339 of the Stan (2017) manual.
    The transformation is

        y_cauchy(mu, sigma) = mu + sigma * y_uniform(-pi/2, pi/2)

    so the boundary transformation is the reverse, namely

        uniform_boundary = arctan((boundary - mu) / sigma)

    Note that
        arctan(-infinity) is -pi/2; arctan(0) is 0; arctan(infinity) is pi/2
        ... so for lower == 0, lower_transformed == 0

    We can do the range constraints like this:
         real<lower=-pi()/2, upper=pi()/2> y_uniform;  // Cauchy
         real<lower=0, upper=pi()/2> y_uniform;  // positive half-Cauchy
    and the sampling statement would be unnecessary, but we're going to
    to the sampling using "target +=" so that bridgesampling works.

    You might think that because of that, the range constraint is unnecessary,
    but it IS STILL NECESSARY or Stan will explore invalid ranges.

    */

    real reparameterizedCauchyBoundary(real boundary, real mu, real sigma)
    {
        // boundary: in real-world Cauchy(mu, sigma) space
        // return value: equivalent in the reparameterized uniform [-pi/2, +pi/2] space 
        return atan((boundary - mu) / sigma);
    }
    """  # noqa

    supported_combinations = []  # type: List[Tuple[VarDescriptor, VarDescriptor, VarDescriptor]]  # noqa
    for y in ALL_TYPES:
        for lower in ALL_TYPES:
            for upper in ALL_TYPES:
                if y == REAL and (lower != REAL or upper != REAL):
                    continue
                if y == ARRAY_2D:
                    continue
                if lower.dimensions == 2 or upper.dimensions == 2:
                    continue
                supported_combinations.append((y, lower, upper))

    def do_call(y_: VarDescriptor,
                mu_: VarDescriptor,
                sigma_: VarDescriptor,
                method: SampleMethod):
        nonlocal code
        # Cloning necessary to prevent name overwriting:
        mu_ = mu_.clone()
        sigma_ = sigma_.clone()
        y_ = y_.clone()
        code += make_reparam_cauchy(y_, mu_, sigma_, method)

    code += comment("Plain")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.PLAIN)
    code += comment("With lower bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.LOWER)
    code += comment("With upper bound")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.UPPER)
    code += comment("With range (lower and upper) bounds")
    for y, mu, sigma in supported_combinations:
        do_call(y, mu, sigma, SampleMethod.RANGE)
    return code


# =============================================================================
# Main
# =============================================================================

def get_code() -> str:
    return (
        HEADER +
        SIMPLE_FUNCTIONS +
        LOG_PROB_HEADER +
        LOG_PROB_HELPERS +
        get_normal_distribution() +
        get_cauchy_distribution() +
        get_beta_distribution() +
        get_gamma_distribution() +
        get_uniform_distribution() +
        SAMPLE_BERNOULLI +
        REPARAM_HEADER +
        get_reparamaterized_normal() +
        get_reparamaterized_cauchy() +
        DUFF_ANOVA_FUNCTIONS
    )


def main() -> None:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="""
Make a set of common functions for Stan programs.
By Rudolf Cardinal. Created 2018-02-09.
        """)
    parser.add_argument(
        "--filename", type=str, default="commonfunc.stan",
        help="Output filename"
    )
    args = parser.parse_args()

    code = get_code()
    with open(args.filename, "w") as f:
        f.write(code)
    print("Written to {}".format(args.filename))


if __name__ == '__main__':
    main()