-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrbm_with_random_forest.py
67 lines (54 loc) · 2.16 KB
/
rbm_with_random_forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import itertools
import numpy as np
import pandas as pd
import evaluation as e
import read_dataset as rd
from sklearn.pipeline import Pipeline
from sklearn.neural_network import BernoulliRBM
from sklearn.ensemble import RandomForestClassifier
def evaluate_parameters():
X,y = get_train_data(limit=25)
scores = []
scores_std = []
print('Start learning...')
forests = [70]
rbm_components = [1100]
rbm_learning_rate = [0.06]
rbm_n_iter = [20]
it = itertools.product(forests,rbm_components,rbm_learning_rate,rbm_n_iter)
for (trees,components,learning_rate,n_iter) in it:
classifier = get_classifier(trees,components,learning_rate,n_iter)
name = "plots_pipeline/pipeline_{}.png".format(trees)
e.evaluate_classifier(classifier,X,y, name=name)
def submission(trees=70,components=1100,learning_rate=0.06,n_iter=20):
X,y,test_X = get_train_and_test_data()
print("Defining classifiers")
classifier = get_classifier(trees,components,learning_rate,n_iter)
print("Training classifier")
classifier.fit(X,y)
predictions = classifier.predict(test_X)
#Most submitions are cute with a CSV. Might as well learn how to do it.
pd.DataFrame({"ImageId": range(1,len(predictions)+1), "Label": predictions}).to_csv('submit_rbm.csv', index=False, header=True)
def get_classifier(trees,components,learning_rate,n_iter):
rbm = BernoulliRBM(verbose=True,n_components=components,
n_iter=n_iter,learning_rate=learning_rate)
random_forest = RandomForestClassifier(trees)
return Pipeline(steps=[('rbm',rbm), ('forest',random_forest)])
def scale(X):
return (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling
def get_train_data(limit=-1):
print('Loading train data')
X,y = rd.read_train(limit=limit)
print('Augmenting data set')
X,y = rd.nudge_dataset(X,y)
print('Scaling data')
X = scale(X)
return X,y
def get_train_and_test_data(train_limit=-1,test_limit=-1):
X,y = get_train_data(train_limit)
print('Loading test data')
test_X = rd.read_test(limit=test_limit)
test_X = scale(test_X)
return X,y,test_X
#evaluate_parameters()
submission()