-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmachinelearning.py
84 lines (68 loc) · 2.45 KB
/
machinelearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, TimeDistributed, RepeatVector, Dense
from keras.callbacks import EarlyStopping
from random import shuffle
filename = 'data.bin'
infile = open(filename, 'rb')
data = pickle.load(infile)
infile.close()
print(data)
def prepare(data):
shuffle(data)
data_x, data_y = [], []
encode_next_value = 1
encode_dict = {}
for x, y in data:
data_x.append(x)
data_y.append(y)
for c in x:
if c not in encode_dict:
encode_dict[c] = encode_next_value
encode_next_value += 1
for c in y:
if c not in encode_dict:
encode_dict[c] = encode_next_value
encode_next_value += 1
return data_x, data_y, encode_dict, encode_next_value
def one_hot_encode(rows, max_len):
encoding = np.zeros((len(rows), max_len, encode_next_value), dtype=np.bool)
for i, s in enumerate(rows):
for j, c in enumerate(s):
v = encode_dict[c]
encoding[i][j][v] = 1
for j in range(len(s), max_len): # padding
v = 0
encoding[i][j][v] = 1
return encoding
data_x, data_y, encode_dict, encode_next_value = prepare(data)
max_len_x = max(len(x) for x in data_x)
max_len_y = max(len(y) for y in data_y)
decode_dict = {value: key for key, value in encode_dict.items()}
decode_dict[0] = "*"
x = one_hot_encode(data_x, max_len_x)
y = one_hot_encode(data_y, max_len_y)
hidden_size = 16
batch_size = 128
model = Sequential()
model.add(LSTM(hidden_size, input_shape=(max_len_x, encode_next_value)))
model.add(RepeatVector(max_len_y))
model.add(LSTM(hidden_size, return_sequences=True))
model.add(TimeDistributed(Dense(encode_next_value, activation='softmax')))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)
history = model.fit(x, y,
epochs=10000,
batch_size=batch_size,
validation_split=0.2,
callbacks=[es]
)
save_file = [decode_dict, encode_next_value, encode_dict, max_len_x]
filename = 'machinelearning.bin'
outfile = open(filename, 'wb')
pickle.dump(save_file, outfile)
outfile.close()
model.save("my_model")