-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLesson7-Regression.py
81 lines (55 loc) · 2.43 KB
/
Lesson7-Regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/python3
"""
Starter code for the regression mini-project.
Loads up/formats a modified version of the dataset
(why modified? we've removed some trouble points
that you'll find yourself in the outliers mini-project).
Draws a little scatterplot of the training/testing data
You fill in the regression code where indicated:
"""
import sys
import joblib
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit
dictionary = joblib.load( open("../final_project/final_project_dataset_modified.pkl", "rb") )
### list the features you want to look at--first item in the
### list will be the "target" feature
features_list = ["bonus", "salary"]
data = featureFormat( dictionary, features_list, remove_any_zeroes=True, sort_keys = '../tools/python2_lesson06_keys.pkl')
target, features = targetFeatureSplit( data )
### training-testing split needed in regression, just like classification
from sklearn.model_selection import train_test_split
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.5, random_state=42)
train_color = "b"
test_color = "r"
### Your regression goes here!
### Please name it reg, so that the plotting code below picks it up and
### plots it correctly. Don't forget to change the test_color above from "b" to
### "r" to differentiate training points from test points.
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(feature_train,target_train)
### draw the scatterplot, with color-coded training and testing points
import matplotlib.pyplot as plt
for feature, target in zip(feature_test, target_test):
plt.scatter( feature, target, color=test_color )
for feature, target in zip(feature_train, target_train):
plt.scatter( feature, target, color=train_color )
### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")
### draw the regression line, once it's coded
try:
plt.plot( feature_test, reg.predict(feature_test) )
except NameError:
pass
#reg.fit(feature_test, target_test)
#plt.plot(feature_train, reg.predict(feature_train), color="b")
plt.xlabel(features_list[1])
plt.ylabel(features_list[0])
plt.legend()
plt.show()
print(reg.coef_)
print(reg.intercept_)
print(reg.score(feature_train,target_train))
print(reg.score(feature_test,target_test))