-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaccuracyAlgo.py
132 lines (112 loc) · 4.17 KB
/
accuracyAlgo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import time
from keras.datasets import mnist
from LSVD_s import lanczosSVD
from LSVD_p import lanczosSVDp
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import cupy as cp
if __name__ == '__main__':
# Load MNIST
(train_X, train_y), (test_X, test_y) = mnist.load_data()
train_samples = train_X.shape[0]
test_samples = test_X.shape[0]
pixels = train_X.shape[1] * train_X.shape[2]
X = train_X.reshape(train_samples, pixels)
X = np.concatenate((X, test_X.reshape(test_samples, pixels)))
label = np.concatenate((train_y.reshape(train_samples, 1), test_y.reshape(test_samples, 1)))
trunc = 3
# Compare values of k (to determine how varying k affects accuracy)
k_vals = np.linspace(0, 150, 16).astype(int)
k_vals[0] = 1
times_s = np.zeros((k_vals.shape[0], 1))
times_p = np.zeros((k_vals.shape[0], 1))
errors_inf = np.zeros((k_vals.shape[0], 1))
errors_2norm = np.zeros((k_vals.shape[0], 1))
errors_inf_p = np.zeros((k_vals.shape[0], 1))
errors_2norm_p = np.zeros((k_vals.shape[0], 1))
dim = 3
X = StandardScaler().fit_transform(X)
# Time the difference between computing true SVD and approximate SVD via Lanczos
pca_t = time.time()
pca = PCA(n_components=dim, svd_solver='arpack')
pca.fit(X)
trueProjX = pca.transform(X)
elapsed_pca = time.time() - pca_t
print(trueProjX.shape)
# Serial Full SVD Time
svd_t = time.time()
Ux, Sx, Vx = np.linalg.svd(X[0:30000, :])
svdX = Ux[:, 0:trunc] * Sx[0:trunc]
elapsed_svd = time.time() - svd_t
# Flush out parallel overhead by running once
projXp, Ukp, Dkp, Vtkp = lanczosSVDp(X, 1, trunc)
count = 0
for k in k_vals:
# Compute serial time
t3dk = time.time()
projX, Uk, Dk, Vtk = lanczosSVD(X, k, trunc)
elapsed_t3dk = time.time() - t3dk
# Compute parallel time
t3dkp = time.time()
projXp, Ukp, Dkp, Vtkp = lanczosSVDp(X, k.item(), trunc)
elapsed_t3dkp = time.time() - t3dkp
print('Run for k = %d is Complete!' % k)
# Store times
times_s[count] = elapsed_t3dk
times_p[count] = elapsed_t3dkp
# Compute and store error
errors_inf[count] = np.linalg.norm(abs(projX) - abs(trueProjX), np.inf)
errors_2norm[count] = np.linalg.norm(abs(projX) - abs(trueProjX))
errors_inf_p[count] = np.linalg.norm(abs(cp.asnumpy(projXp)) - abs(trueProjX), np.inf)
errors_2norm_p[count] = np.linalg.norm(abs(cp.asnumpy(projXp)) - abs(trueProjX))
count += 1
fig1 = plt.figure()
plt.plot(k_vals, errors_inf)
plt.xlabel('K Values')
plt.ylabel('Low Rank Approximation Error (Infinity Norm)')
plt.yscale('log')
plt.ylim([1e-13, 5000])
fig1.savefig('inf_error_serial.png')
fig2 = plt.figure()
plt.plot(k_vals, errors_2norm)
plt.xlabel('K Values')
plt.ylabel('Low Rank Approximation Error (Frobenius Norm)')
plt.yscale('log')
plt.ylim([1e-13, 5000])
fig2.savefig('2norm_error_serial.png')
fig3 = plt.figure()
plt.plot(k_vals, times_s)
plt.xlabel('K Values')
plt.ylabel('Runtime (seconds)')
fig3.savefig('runtime_serial.png')
fig4 = plt.figure()
plt.plot(k_vals, errors_inf_p)
plt.xlabel('K Values')
plt.ylabel('Low Rank Approximation Error (Infinity Norm)')
plt.yscale('log')
plt.ylim([1e-13, 5000])
fig4.savefig('inf_error_parallel.png')
fig5 = plt.figure()
plt.plot(k_vals, errors_2norm_p)
plt.xlabel('K Values')
plt.ylabel('Low Rank Approximation Error (Frobenius Norm)')
plt.yscale('log')
plt.ylim([1e-13, 5000])
fig5.savefig('2norm_error_parallel.png')
fig6 = plt.figure()
plt.plot(k_vals, times_p)
plt.xlabel('K Values')
plt.ylabel('Runtime (seconds)')
fig6.savefig('runtime_parallel.png')
print('Full SVD Serial Runtime on 30,000 MNIST Samples:')
print(elapsed_svd)
print('Sklearn Runtime on 60,000 MNIST Samples:')
print(elapsed_pca)
print('Values of k Tested:')
print(k_vals)
print('Serial Run-times (s):')
print(times_s)
print('Parallel Run-times (s):')
print(times_p)