-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsvgd.py
63 lines (48 loc) · 2.11 KB
/
svgd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt
class SVGD():
def __init__(self):
pass
def svgd_kernel(self, theta, h = -1):
sq_dist = pdist(theta)
pairwise_dists = squareform(sq_dist)**2
if h < 0: # if h < 0, using median trick
h = np.median(pairwise_dists)
h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))
# compute the rbf kernel
Kxy = np.exp( -pairwise_dists / h**2 / 2)
dxkxy = -np.matmul(Kxy, theta)
sumkxy = np.sum(Kxy, axis=1)
for i in range(theta.shape[1]):
dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
dxkxy = dxkxy / (h**2)
return (Kxy, dxkxy)
def update(self, x0, lnprob, obj= None, n_iter = 1000, stepsize = 1e-3, bandwidth = -1, alpha = 0.9, debug = False):
# Check input
if x0 is None or lnprob is None:
raise ValueError('x0 or lnprob cannot be None!')
theta = np.copy(x0)
# adagrad with momentum
fudge_factor = 1e-6
historical_grad = 0
for iter in range(n_iter):
if debug and (iter+1) % 1000 == 0:
print ('iter ' + str(iter+1) )
lnpgrad = lnprob(theta)
# calculating the kernel matrix
kxy, dxkxy = self.svgd_kernel(theta, h = -1)
grad_theta = (np.matmul(kxy, lnpgrad) + dxkxy) / x0.shape[0]
# adagrad
if iter == 0:
historical_grad = historical_grad + grad_theta ** 2
else:
historical_grad = alpha * historical_grad + (1 - alpha) * (grad_theta ** 2)
adj_grad = np.divide(grad_theta, fudge_factor+np.sqrt(historical_grad))
if iter%50 == 0 and debug:
print(str(iter)+':'+str(obj(theta)))
plt.figure()
plt.plot(theta[:, 0], theta[:, 1], 'ro')
plt.savefig(str(iter)+'.jpg')
theta = theta + stepsize * adj_grad
return theta