-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
391 lines (292 loc) · 11.2 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
import numpy as np
import pandas as pd
from numba import jit
import time
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import log_loss
#add input and output layer to layerDims
def adapt_layer(layer_dims, activation, inputSize, numClasses):
#add input layer
layer_dims.insert(0, inputSize)
#add output layers
layer_dims.append(numClasses)
#output layer is dependent on number of classes
if numClasses == 1:
activation.append("sigmoid")
else:
activation.append("softmax")
return layer_dims
#initalize weight and bias based on the layer of the network
def init_params(layer_dims):
#weight and bias is stored as a dict
parameters = {}
#get the number of layers
L = len(layer_dims)
#iterate all layers
for i in range(L-1):
idx = i + 1
parameters['W' + str(idx)] = np.random.randn(layer_dims[idx], layer_dims[idx-1]) * 0.1
parameters['b' + str(idx)] = np.random.randn(layer_dims[idx], 1) * 0.1
return parameters
#defines the output of a certain neuron based on type
def activation_func(Z, type, direction, Y=0, alpha=0.01):
#prevents an overflow of the values (precision too high)
Z = np.clip(Z, -500,500)
if type == "sigmoid":
if direction == "forward":
output = 1/(1+np.exp(-Z))
if direction == "backward":
sig = activation_func(Z, "sigmoid", "forward")
output = sig * (1 - sig)
if type == "tanH":
if direction == "forward":
output = np.tanh(Z)
if direction == "backward":
output = 1 - (np.tanh(Z) * np.tanh(Z))
if type == "elu":
if direction == "forward":
pass
if direction == "backward":
pass
if type == "relu":
if direction == "forward":
output = np.maximum(0,Z)
if direction == "backward":
output = np.ones_like(Z)
output[Z <= 0] = 0
if type == "leakyRelu":
if direction == "forward":
y1 = ((Z >= 0) * Z)
y2 = ((Z < 0) * Z * alpha)
output = y1 + y2
if direction == "backward":
output = np.ones_like(Z)
output[Z < 0] = alpha
if type == "softmax":
if direction == "forward":
shiftz = Z - np.max(Z) # this makes softmax more stable
temp = np.exp(Z)
output = temp/np.sum(temp, axis = 0)
if direction == "backward":
temp = np.zeros((Y.shape[1],(int(np.amax(Y)) + 1)))
for i, elem in enumerate(Y.T):
temp[i, int(elem)] = 1
output = dA - temp.T
return output
#execute the forward propagation
def forward_prop(input):
#in this function the value of the nodes are calculated
X = input[0]
params = input[1]
activation = input[2]
#save intermediate steps in this dict
cache={}
#copy first input to A (which is used in the loop to contain the data and is
#renewed every iteration)
A_curr = X
#iterate through the layers from left to right
for i, layer in enumerate(activation):
#increase id for 1 as weights and bias are always one step ahead and 0 is not available
idx = i + 1
A_prev = A_curr
#get weights and bias
W_curr = params["W" + str(idx)]
b_curr = params["b" + str(idx)]
#linear forward
Z_curr = np.dot(W_curr, A_prev) + b_curr
#apply activation func
A_curr = activation_func(Z_curr, layer, "forward")
#save intermedia results in cache
cache["A" + str(i)] = A_prev #<- important here not idx but i
cache["Z" + str(idx)] = Z_curr
#A_curr is output at the end nodes
#cache contains all intermediate outputs; A[n] contains the input, Z[n+1] the output of this input
return [A_curr, cache]
#execute backwars propagation
def backward_prop(input):
AL = input[0]
Y = input[1]
params = input[2]
activation = input[3]
cache = input[4]
numClasses = input[5]
costType = input[6]
costParams = input[7]
#this dictionary will save the gradients
grads = {}
#get number of entries
m = Y.shape[1]
#assure that Y has the same shape as AL
#Y = Y.reshape(AL.shape)
#different creation of base gradient based on number of classes
if costType == "crossEntropy":
if numClasses == 1:
#use derivative of cross Entroy
dA_prev = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
else:
dA_prev = AL
dA_prev[range(m),Y] -= 1
dA_prev = dA_prev/1
if costType == "focalLoss":
#check if loss params are defined
if len(costParams) == 0:
gamma = 2.0
alpha = 0.25
else:
gamma = costParams[0]
alpha = costParams[1]
if numClasses == 1:
pt_0 = np.where(np.equal(Y, 0), AL, np.zeros_like(AL))
pt_1 = np.where(np.equal(Y, 1), AL, np.ones_like(AL))
dA_prev = Y * np.power(1. - pt_1, gamma) * (Y * np.log(pt_1) + pt_0 - 1)
#iterate through the layers from right to left
for i, layer in reversed(list(enumerate(activation))):
#increase id for 1 as weights and bias are always one step ahead and 0 is not available
idx = i + 1
dA_curr = dA_prev
#get the values from the cache and params
A_prev = cache["A" + str(i)]
Z_curr = cache["Z" + str(idx)]
W_curr = params["W" + str(idx)]
b_curr = params["b" + str(idx)]
#calculate all gradient
dZ_curr = dA_curr * activation_func(Z_curr, layer, "backward", Y)
dW_curr = np.dot(dZ_curr, A_prev.T) / m
db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
dA_prev = np.dot(W_curr.T, dZ_curr)
#save the gradient of input, weight and bias
grads["dA" + str(i)] = dA_prev
grads["dW" + str(idx)] = dW_curr
grads["db" + str(idx)] = db_curr
return [grads]
#compute cost for current model
def compute_cost(input):
AL = input[0]
Y = input[1]
numClasses = input[2]
costType = input[3]
epsilon=input[4]
costParams=input[5]
#get number of entries
m = AL.shape[1]
#use a super tiny value to prevent log(0) lead to -inf and consequently NaN for the cost
if epsilon:
e = np.nextafter(0, 1)
else:
e = 0
if costType == "crossEntropy": #aka log loss in ml
#dependent on number of classes cost is calculated different
if numClasses == 1:
#binary cross entropy
cost = -1 / m * (np.dot(Y, np.log(AL + e).T) + np.dot(1 - Y, np.log(1 - AL + e).T))
cost = cost[0][0] # get single value from cost
else:
#cross entropy
cost = log_loss(Y.flatten(), AL.T)
elif costType == "focalLoss":
#check if loss params are defined
if len(costParams) == 0:
gamma = 2.0
alpha = 0.25
else:
gamma = costParams[0]
alpha = costParams[1]
if numClasses == 1:
pt_0 = np.where(np.equal(Y, 0), AL, np.zeros_like(AL))
pt_1 = np.where(np.equal(Y, 1), AL, np.ones_like(AL))
#cost = -np.sum(alpha * np.power(1. - pt_1, gamma) * np.log(pt_1))-np.sum((1-alpha) * np.power( pt_0, gamma) * np.log(1. - pt_0))
cost = -np.mean(alpha * np.power(1. - pt_1, gamma) * np.log(pt_1))-np.sum((1-alpha) * np.power( pt_0, gamma) * np.log(1. - pt_0))
else:
raise Error("Cost function not supported")
#convert np float to regular float
cost = float(cost)
return [cost]
#compute accuracy during training
def compute_scores(input):
types = input[0]
AL = input[1]
Y = input[2]
numClasses = input[3]
threshold = input[4]
#dependent on number of classes accuracy is calculated different
if numClasses == 1:
#convert prob to classes
Y_pred = np.zeros(AL.shape)
Y_pred[AL >= threshold] = 1
else:
Y_pred = np.argmax(AL, axis=0)
Y_pred = np.reshape(Y_pred, (1, -1))
scores = []
#check which value should be calculated
for elem in types:
if elem == "accuracy":
#calc score
score = accuracy_score(Y.flatten(), Y_pred.flatten())
if elem == "pr":
score = recall_score(Y.flatten(), Y_pred.flatten())
#convert np float to regular float
score = float(score)
scores.append(score)
return [scores]
#udpdate weight and bias
def update_params(input):
params = input[0]
activation = input[1]
grads = input[2]
learning_rate = input[3]
for i, layer in enumerate(activation):
idx = i + 1
params["W" + str(idx)] -= learning_rate * grads["dW" + str(idx)]
params["b" + str(idx)] -= learning_rate * grads["db" + str(idx)]
return [params]
#create batches
def createBatches(data, batch_size):
#no batches should be created
if batch_size == 0:
return [data]
#no need to split if batch size exceeds data size
if batch_size >= data.shape[1]:
return[data]
#get number of batches
num_batches = int(data.shape[1] / batch_size)
#split
batches = np.array_split(data, num_batches, axis=1)
return batches
# reshape data so that it can be used by NN
def reshape(data):
#(209, 64, 64) -> (4096, 209) for 209 images with 64x64 resolution
_data = data.reshape(data.shape[0], -1).T
return _data
#check if a numpy array has NaN values
def hasNaN(data):
sum = np.sum(data)
return np.isnan(sum)
#check data and settings for consistency
def checkErrors(layer_activation, layer_dims, X, Y):
#give warning if hidden layers are bigger than input layers
if layer_dims[0] > X.shape[1]:
print("Warning: Hidden layer is bigger than input layer")
if len(layer_activation) != len(layer_dims):
raise ValueError("Layer dimensions (layer_dims) and activationtype (layer_activation) must have the same size")
if X.shape[0] != Y.shape[0]:
raise ValueError("X and Y have a different number of entries")
#check for NaN
if pd.isnull(X).any():
raise ValueError("The training data contains NaN values. Please clean the data")
#replace Nan with 0 so that the string test can work
#check for strings
temp = pd.DataFrame(X.flatten())
temp = temp.apply(lambda x: pd.to_numeric(x, errors='coerce')) #convert to int, so strings will be nan
if pd.isnull(temp).values.any():
raise ValueError("The training data contains string values. Please clean the data")
def debug(debugMode, func, args):
if debugMode:
start = time.time()
output = func(args)
end = time.time()
dura = str(round(end - start, 4)) + "s"
print(" " + func.__name__ + " finished (" + dura + ")")
else:
output = func(args)
return output