-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconv.py
300 lines (210 loc) · 8.61 KB
/
conv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
import numpy as np
from numba import jit
from numba.typed import List
from functions import *
import math
import matplotlib.pyplot as plt
#initialize filter with random values
def init_filters(x, y, num_filters):
#check the size of filter
assert x % 2 == 1 and y % 2 == 1, "Filter must have an odd shape (3x3,5x5,..)"
## TODO: Check xavier init https://www.quora.com/What-is-an-intuitive-explanation-of-the-Xavier-Initialization-for-Deep-Neural-Networks
# divide by 9 to reduce the variance of our initial values
filters = np.random.randn(num_filters, x, y) / 9
return filters
#apply filter on a image
@jit(nopython=True)
def apply_filter(image, filters, stride=1, bias=0):
assert image.shape[0] >= filters[0].shape[0] or \
image.shape[1] >= filters[0].shape[1], "Image size must be greater than the filter"
#get outputSize
outXF = (image.shape[0] - filters[0].shape[0])/stride + 1
outYF = (image.shape[1] - filters[0].shape[1])/stride + 1
#get half size of filter to substract this value from the image edges
fhX = int(filters[0].shape[0] / 2) #filterHalfX
fhY = int(filters[0].shape[1] / 2) #filterHalfY
outX = int(outXF)
outY = int(outYF)
#check if outX and outY have decimals
if outX - outXF != 0 or outY - outYF != 0:
raise ValueError("filtering cannot be adapted to this image. Adapt size or stride")
#create empty list to save output
outputs = []
for filter in filters:
#create empty output
output=np.empty((outX, outY))
oX, oY = 0,0
#iterate over image and apply filter
for i in range(fhX, image.shape[0] - fhX, stride):
for j in range(fhY, image.shape[1] - fhY, stride):
#create subset from the image
subset = image[i-fhX:i+fhX+1, j-fhY:j+fhY+1] #+1 as at slicing np exclude last number
#get the filter value for this subset
output[oX, oY] = np.sum(filter*subset) + bias
oY += 1
oX += 1
oY = 0
#flatten and append to list
outputs.append(output)
#to remove the deprecated warning
typed_outputs = List()
[typed_outputs.append(x) for x in outputs]
return typed_outputs
@jit(nopython=True)
def downsample(filtered, kSize=2, stride=2):
#get size for the output image
outXF = int((filtered[0].shape[0] - kSize)/stride + 1)
outYF = int((filtered[0].shape[1] - kSize)/stride + 1)
#get half size of pooling to substract this value from the image edges
phX = int(kSize / 2) #poolingHalfX
phY = int(kSize / 2) #poolingHalfY
outX = int(outXF)
outY = int(outYF)
#check if outX and outY have decimals
if outX - outXF != 0 or outY - outYF != 0:
raise ValueError("pooling cannot be adapted to this image. Adapt kSize or stride")
#create empty list to save output
outputs = []
#iterate all filtered images
for image in filtered:
output = np.empty((outX, outY))
oX, oY = 0,0
#iterate one image
for i in range(phX, image.shape[0], stride):
for j in range(phY, image.shape[1], stride):
#get subset of this image
subset = image[i-phX:i+phX, j-phY:j+phY] #+1 as at slicing np exclude last number
#only transfer the maxium value of this subset
output[oX, oY] = np.max(subset)
oY += 1
oX += 1
oY = 0
outputs.append(output)
return outputs
#forward propagation for cnn
def cnn_forward_prop(input):
X = input[0]
filters = input[1]
imgShape = input[2]
fStride = input[3]
fBias = input[4]
pStride = input[5]
pSize = input[6]
output = []
filteredCache = []
#iterate all images
for img in X.T:
#check if image is in grayscale
try:
img = img.reshape(imgShape[0], imgShape[1])
except:
raise ValueError("The input image has too many pixels for its shape (Can be caused by non grayscale images)")
#apply filter
filtered = apply_filter(img, filters, stride=fStride, bias=fBias)
filteredCache.append(filtered)
#downsample
pooled = downsample(filtered, kSize = pSize, stride=pStride)
#convert list of images into an 1d array
images = np.asarray(pooled).flatten()
#append to the outputlist
output.append(images)
#convert list to array
output = np.asarray(output)
output = output.T
#ouput = array with each element the flattenend results of all pooling images
#filteredCache = unflattened filtered images
return [output, filteredCache]
@jit(nopython=True)
def deapply_filter(img, cnnGrad, filters, stride=1):
#get half size of filter to substract this value from the image edges
fhX = int(filters[0].shape[0] / 2) #filterHalfX
fhY = int(filters[0].shape[1] / 2) #filterHalfY
#initialize empty filter gradients
filterGrads = np.zeros(filters.shape)
for f, filter in enumerate(filters):
#needed for averaging the filter
counter = 0
#iterate over image and apply filter
for i in range(fhX, img.shape[0] - fhX, stride):
for j in range(fhY, img.shape[1] - fhY, stride):
#calculate gradient
filterGrads[f] += cnnGrad[f][i-1,j-1] * img[i-fhX:i+fhX+1, j-fhY:j+fhY+1]
counter += 1
#create the average per filter
filterGrads[f] = filterGrads[f]/counter
return filterGrads
#backward propagation for pooling
@jit(nopython=True)
def upsample(grads, cache, kSize=2, stride=2):
#get size for the output image
outX = int((cache[0].shape[0] - kSize)/stride + 1)
outY = int((cache[0].shape[1] - kSize)/stride + 1)
#get half size of pooling to substract this value from the image edges
phX = int(kSize / 2) #poolingHalfX
phY = int(kSize / 2) #poolingHalfY
#create empty list to save output
outputs = []
#iterate all filters
for k, image in enumerate(cache):
#make gradient in 2d
grad = grads[k].copy() #copy needed that reshape can run in numba
grad = grad.reshape(outX,outY)
#create np.array where everything is zero
output = np.zeros((image.shape[0], image.shape[1]))
oX, oY = 0, 0
#iterate one image
for i in range(phX, image.shape[0], stride):
for j in range(phY, image.shape[1], stride):
#get subset of this image
subset = image[i-phX:i+phX, j-phY:j+phY] #+1 as at slicing np exclude last number
#get max value and replace it with the gradient value
subset = subset.flatten()
subset[subset == np.amax(subset)] = grad[oX, oY]
subset = subset.reshape((kSize,kSize))
#put the result back to the image
output[i-phX:i+phX, j-phY:j+phY] = subset
oY += 1
oX += 1
oY = 0
outputs.append(output)
typed_outputs = List()
[typed_outputs.append(x) for x in outputs]
return typed_outputs
#backward propagation for cnn
def cnn_backward_prop(input):
grads = input[0]
X = input[1]
imgShape = input[2]
cacheF = input[3] #cacheF = filteredCache
filters = input[4]
cnnGrads = []
#transpose grads to iterate through it
grads = grads.T
#iterate all images
for i, img in enumerate(X.T):
#get grad for certain image
grad = grads[i]
grad = np.reshape(grad, (len(cacheF[i]),-1))
#reverse pooling
grad = upsample(grad, cacheF[i])
#reshape flattened result to filtered image shape
img = img.reshape(imgShape[0], imgShape[1])
#calculate gradient for filter
grad = deapply_filter(img, grad, filters)
cnnGrads.append(grad)
#average filter gradient over all images
cnnGrads = np.asarray(cnnGrads)
cnnGrads = cnnGrads.mean(axis=(0))
return [cnnGrads]
#update the filters based on gradient and learning rate
def update_filters(input):
filters = input[0]
gradient = input[1]
learning_rate = input[2]
filters -= learning_rate * gradient
return [filters]
#change input layer to the new inputsize
def changeInputLayer(layer_dims, inputSize):
#replace input layer
layer_dims[0] = inputSize
return layer_dims