-
Notifications
You must be signed in to change notification settings - Fork 2
/
utility_functions.py
254 lines (212 loc) · 8.86 KB
/
utility_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import numpy as np
from numpy.random import binomial
from scipy.stats import multivariate_normal, norm
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import math
import os
import errno
import seaborn as sns
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d import Axes3D
blue_cmap = ListedColormap(cm.Blues(np.linspace(0, 1, 20))[10:, :-1])
oran_cmap = ListedColormap(cm.Oranges(np.linspace(0, 1, 20))[10:, :-1])
green_cmap = ListedColormap(cm.Greens(np.linspace(0, 1, 20))[10:, :-1])
def setup_plotting():
"""This function sets up Latex-like fonts, font sizes and other
parameter to make plots look prettier."""
sns.set_style('darkgrid')
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
plt.rcParams['font.size'] = 14
plt.rcParams['figure.figsize'] = (12, 6)
plt.rc("savefig", dpi=300)
plt.rc('figure', titlesize=16)
def mkdir_p(path):
"""Creates a directory in the specified path."""
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def generate_parameters(p):
"""Simple utility function to generate a list of the form
[1.0, 0.5, ... , 0.5] with length p."""
return np.insert(np.repeat(0.5, p-1), 0, 1.0)
def sigmoid(x):
"""
Sigmoid function that can handle arrays. For a thorough discussion on
speeding up sigmoid calculations, see:
https://stackoverflow.com/a/25164452/6435921
:param x: Argument for sigmoid function. Can be float, list or numpy array.
:type x: float or iterable.
:return: Evaluation of sigmoid at x.
:rtype: np.array
"""
return 1.0 / (1.0 + np.exp(-x))
def logit(x):
"""
Logit function. Inverse of sigmoid function. It is optimized using the same
tricks as in sigmoid().
:param x: Argument for logit.
:return: Value of logit at x.
"""
return np.log(x / (1.0 - x))
def lambda_func(x):
"""
Lambda function as defined for variational inference.
:param x: Argument of lambda function. Usually xi_i
:return: Value of lambda function at xi_i = x.
"""
return (sigmoid(x) - 0.5) / (2.0*x)
def log_normal_kernel(t, m, v, multivariate=False):
"""
This is basically the argument in the exponential of a normal distribution
with mean=m and variance=v.
"""
if multivariate:
return -0.5*np.dot(t - m, np.dot(np.linalg.inv(v), t - m))
else:
return - (0.5*(t - m) ** 2.0) / v
def generate_bernoulli(size, params):
"""
Simulates a Bernoulli data set. Parameters used to generate probabilities.
params has to be a numpy array in order b0, b1, b2 ...
"""
# Design matrix X of dimension (n x p)
nx = len(params) - 1 # n of params - 1 = n of explanatory vars
design = np.random.normal(loc=0, scale=1, size=(size, nx))
design = np.hstack((np.ones((size, 1)), design)) # intercept column
# Bernoulli RVs with inverse logit (sigmoid) of linear predictor as prob
bin_samples = binomial(n=1, p=sigmoid(np.dot(design, params)), size=size)
return design, bin_samples
def surface_plot(f, xmin, xmax, ymin, ymax, n):
"""
Given a R2 -> R function "func" which takes an array of dimension 2 and
returns a scalar, this function will plot a surface plot over the
specified region.
"""
# Get grid data
x, y, z = prepare_surface_plot(f, xmin, xmax, ymin, ymax, n)
# Plot 3D surface
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(x, y, z)
ax.update({'xlabel': 'x', 'ylabel': 'y', 'zlabel': 'z'})
return fig, ax, x, y, z
def prepare_surface_plot(f, xmin, xmax, ymin, ymax, n):
"""This function creates the three X, Y and Z arrays that are needed to
plot surfaces, but it does not plot them. It can be used to put multiple
plots into one figure."""
x, y = np.meshgrid(np.linspace(xmin, xmax, n), np.linspace(ymin, ymax, n))
z = np.array([f(xy) for xy in zip(x.ravel(), y.ravel())]).reshape(x.shape)
return x, y, z
def row_outer(A):
"""
Computes the row-wise outer product a_i a_i^T where each row is a_i.
"""
return np.einsum('ni,nj->nij', A, A)
def round_matrix_to_symmetry(x, max_digits=7):
"""This function takes a matrix and tries to keep the largest number of
digits such that the matrix is symmetric. This is useful when using the
inverse hessian matrix coming from the minimization routine used
to find the mode of the log-posterior. The inverse hessian matrix of
-log_posterior will be used as variance-covariance matrix of proposal
distribution."""
q_max = 1
# Allow only up to 6 digits. This should be more than enough for a vcov.
for q in range(1, max_digits):
# Round the matrix
rounded = x.round(q)
# Check for symmetry
if np.allclose(rounded, rounded.T, rtol=1e-10, atol=1e-10):
q_max = q
return x.round(q_max), q_max
def metropolis(p, z0, cov, n_samples=100, burn_in=0, thinning=1, scale=1.0):
"""
Random-Walk Metropolis algorithm for a multivariate probability density.
:param p: Probability distribution we want to sample from.
:type p: callable
:param z0: Initial guess for Metropolis algorithm.
:type z0: np.array
:param cov: Variance-Covariance matrix for normal proposal distribution.
:type cov: np.array
:param n_samples: Number of total samples we want to obtain. This is the
number of samples created after applying burn-in and
thinning. Basically `samples[burn_in::thinning]`.
:type n_samples: int
:param burn_in: Number of samples to burn initially.
:type: burn_in: int
:param thinning: If `thinning` > 1 then after applying burn_in we get
every <<thinning>> samples.
:type thinning: int
:param scale: Extra parameter that can be used to scale the standard
deviation that is found with grg rule. This can be useful
when grg rule does not yield great results.
:type scale: float
:return: `n_samples` samples from `p`.
:rtype: np.array
"""
# Initialize algorithm. Calculate num iterations. Acceptance counter.
z, n_params, pz = z0.copy(), len(z0), p(z0)
tot = burn_in + (n_samples - 1) * thinning + 1
accepted = 0
# Init list storing all samples. Generate random numbers.
sample_list = np.zeros((tot, n_params))
logu = np.log(np.random.uniform(size=tot))
# Optimal scale
a = (2.38**2 / n_params)*scale
if n_params >= 2:
normal_shift = multivariate_normal.rvs(mean=np.zeros(n_params),
cov=a*cov, size=tot)
else:
normal_shift = norm.rvs(loc=0, scale=np.sqrt(a*cov), size=tot)
for i in range(tot):
# Sample a candidate from Normal(mu, sigma)
cand = z + normal_shift[i]
try:
# Store values to save computations. logu[i] <= 0 for u \in (0, 1)
p_cand = p(cand)
if p_cand - pz > logu[i]:
z, pz, accepted = cand.copy(), p_cand, accepted + 1
except (OverflowError, ValueError, RuntimeWarning):
continue
sample_list[i, :] = z.copy()
return sample_list[burn_in::thinning], accepted / tot
def choose_subplot_dimensions(k):
"""This function will determine the number or rows and columns of a
subplot based on the number of parameters that we want to plot."""
if k < 4:
return k, 1
elif k < 11:
return math.ceil(k/2), 2
else:
# I've chosen to have a maximum of 3 columns
return math.ceil(k/3), 3
def generate_subplots(k, row_wise=False, suptitle=None, fontsize=20):
"""This function generates subplots in the dimension specified by
choose_subplot_dimensions(). """
nrows, ncols = choose_subplot_dimensions(k)
# Choose your share X and share Y parameters as you wish:
figure, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(13, 5*nrows))
figure.suptitle(suptitle, fontsize=fontsize)
# Check if it's an array. If there's only one plot, it's just an Axes obj
if not isinstance(axes, np.ndarray):
return figure, [axes]
else:
# Choose the traversal you'd like: 'F' is col-wise, 'C' is row-wise
axes = axes.flatten(order=('C' if row_wise else 'F'))
# Delete any unused axes from the figure, so that they don't show
# blank x- and y-axis lines
for idx, ax in enumerate(axes[k:]):
figure.delaxes(ax)
# Turn ticks on for the last ax in each column, wherever it lands
idx_to_turn_on_ticks = idx + k - ncols if row_wise else idx + k - 1
for tk in axes[idx_to_turn_on_ticks].get_xticklabels():
tk.set_visible(True)
axes = axes[:k]
return figure, axes
if __name__ == "__main__":
pass