-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerator.py
171 lines (146 loc) · 5.91 KB
/
generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# This code is based on gEDMD theory from Klus et al. (https://arxiv.org/pdf/1909.10638.pdf)
# Throughout, l (letter ell) will be used to refer to x_l, or the lth snapshot of the state observables, X
if __name__ == '__main__':
import pymp
import math
import numpy as np
import observables
import pandas as pd
from sympy import symbols
from sympy.polys.monomials import itermonomials, monomial_count
from sympy.polys.orderings import monomial_key
# Construct B matrix as seen in 3.1.2 of the reference paper
def constructB(d, n):
Bt = np.zeros((d, n))
if Bt.shape[1] == 1:
Bt[0,0] = 1
else:
row = 0
for i in range(1, d+1):
Bt[row,i] = 1
row += 1
B = np.transpose(Bt)
return B
# Construct similar B matrix as above, but for second order monomials
def constructSecondOrderB(s, n):
Bt = np.zeros((s, n))
if Bt.shape[1] == 1:
Bt[0,0] = 1
else:
row = 0
for i in range(d+1, d+1+s):
Bt[row,i] = 1
row += 1
B = np.transpose(Bt)
return B
# Get dataframe from CSV file
coin_data = pd.read_csv('coindatav4.csv')
coin_data = coin_data.drop(columns=['Unnamed: 0', 'coin', 'price']) # returns
g = coin_data.groupby('datetime').cumcount()
X = np.transpose(np.array((coin_data.set_index(['datetime',g])
.unstack(fill_value=0)
.stack().groupby(level=0)
.apply(lambda x: np.array(x.values.tolist()).reshape(len(x)))
.tolist())))
X = X[:, int(X.shape[1]*0.9):] # last 10% of snapshots
# Various setup variables and definitions
d = X.shape[0]
m = X.shape[1]
s = int(d*(d+1)/2) # number of second order poly terms
psi = observables.monomials(4)
Psi_X = psi(X)
nablaPsi = psi.diff(X)
nabla2Psi = psi.ddiff(X)
n = Psi_X.shape[0]
# This computes dpsi_k(x) exactly as in the paper
# t = 1 is a placeholder time step, not really sure what it should be
def dpsi(k, l, t=1):
term_1 = (1/t) * ((X[:, l+1]-X[:, l]).reshape(1, -1))
term_2 = nablaPsi[k, :, l].reshape(-1, 1)
term_3 = (1/(2*t)) * ((X[:, l+1]-X[:, l]).reshape(-1, 1) * (X[:, l+1]-X[:, l]).reshape(1, -1))
term_4 = nabla2Psi[k, :, :, l]
return np.dot(term_1,term_2) + np.sum(np.multiply(term_3,term_4))
# Construct \text{d}\Psi_X matrix
dPsi_X = np.zeros((n, m))
for row in range(n):
for column in range(m-1):
dPsi_X[row, column] = dpsi(row, column)
# Calculate Koopman generator approximation
train = int(m * 0.8)
test = m - train
M = dPsi_X[:, :train] @ np.linalg.pinv(Psi_X[:, :train]) # \widehat{L}^\top
L = M.T # estimate of Koopman generator
# Construct B matrix (selects first-order monomials except 1)
B = constructB(d, n)
# Computed b function (sometimes called \mu)
def b(l):
return (L @ B).T @ Psi_X[:, l].reshape(-1, 1)
# Eigen decomposition
eig_vals, eig_vecs = np.linalg.eigh(L)
# Calculate Koopman modes
V = np.transpose(B) @ np.linalg.inv((eig_vecs).T)
# Compute eigenfunction matrix
eig_funcs = (eig_vecs).T @ Psi_X
# This b function allows for heavy dimension reduction!
# default is reducing by 90% (taking the first n/10 eigen-parts)
def b_v2(l, num_dims=n//10):
res = 0
for ell in range(n-1, n-num_dims, -1):
res += eig_vals[ell].reshape(-1, 1) * eig_funcs[ell, l].reshape(-1, 1) * V[:, ell].reshape(-1, 1)
return res
# Construct second order B matrix (selects second-order monomials)
second_orderB = constructSecondOrderB(s, n)
# the a function
# this was calculated in a weird way, so could have issues...
def a(l):
return (np.transpose(L @ second_orderB) @ Psi_X[:, l].reshape(-1, 1)) - ((np.transpose(second_orderB) @ nablaPsi[:, :, l]) @ b_v2(l))
x_str = ""
for i in range(d):
x_str += 'x_' + str(i) + ', '
x_syms = symbols(x_str)
M = itermonomials(x_syms, 4)
sortedM = sorted(M, key=monomial_key('grlex', np.flip(x_syms)))
# Function to compute the a matrix at a specific snapshot index
def evalAMatrix(l):
a_matrix = np.zeros((d, d))
for p in range(d+1, d+1+s):
monomial = str(sortedM[p])
i = 0
j = 0
split_mon = monomial.split('**')
if len(split_mon) > 1:
i = int(split_mon[0][-1])
j = int(split_mon[0][-1])
else:
split_mon = monomial.split('*')
i = int(split_mon[0][-1])
j = int(split_mon[1][-1])
a_matrix[i,j] = a(l)[p-d-1]
a_matrix[j,i] = a(l)[p-d-1]
return a_matrix
# Oh no... it's not positive definite
# Some calculation must be wrong, darn...
# decomp = np.linalg.cholesky(evalAMatrix(0))
def sigma(l):
# Attempt at work around without Cholesky
U, S, V = np.linalg.svd(evalAMatrix(l))
square_S = np.diag(S**(1/2))
sigma = V @ square_S @ V.T
return sigma
# snapshots by coins
# rows are snapshots
epsilons = pymp.shared.array((d, m, 1))
with pymp.Parallel(8) as p:
def epsilon_t(l):
return np.linalg.inv(sigma(l-1).T @ sigma(l-1)) @ sigma(l-1).T @ (X[:, l].reshape(-1, 1) - b_v2(l-1))
for snapshot_index in p.range(1, epsilons.shape[1], 1):
p.print("snapshot ", snapshot_index)
epsilons[:, snapshot_index] = epsilon_t(snapshot_index)
# Epsilons produced make no sense...
# We are looking for numbers that follow a
# normal distribution but these are way off
epsilons = np.array(epsilons)
print(epsilons)
print(epsilons.shape)
np.save('gedmd_epsilons_fixed', epsilons)
np.savetxt("gedmd_epsilons_fixed.csv", epsilons.reshape(-1, epsilons.shape[1]), delimiter=",")