-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPolynomialRegression(1).py
97 lines (77 loc) · 3.06 KB
/
PolynomialRegression(1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import math
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
#Reading csv file
df = pd.read_csv("C:/Users/manta/OneDrive/Desktop/Semester/Semester 3/DATA SCIENCE 3/B21015_Lab_5_Ds3/abalone.csv")
#splitting the data in 70 and 30
[X_train, X_test] =train_test_split(df, test_size=0.3, random_state=42,shuffle=True)
#getting the x_train and x_label_train
X_label_train = X_train["Rings"]
X_train = X_train.drop(["Rings"] , axis = "columns" )
#getting the testing data into x_test and x_label_test
X_label_test = X_test["Rings"]
X_test = X_test.drop(["Rings"] , axis = "columns" )
col = X_train.columns
m = -1
#calculating the column with mac correlation coffecient
for i in col:
a = pearsonr(df[str(i)],df["Rings"])
if(m < a[0]):
m = a[0]
maxcorr = str(i)
#reshaping the data for linear regression model
X = (X_train[maxcorr].to_numpy()).reshape(-1,1)
Y = (X_label_train.to_numpy()).reshape(-1,1)
T = (X_test[maxcorr].to_numpy()).reshape(-1,1)
S = (X_label_test.to_numpy()).reshape(-1,1)
rmse_train = []
rmse_test = []
p = [2,3,4,5]
#calculating the rmse value for both train and test for various value of p
for i in p:
#defining a nonlinear regression model
poly_features = PolynomialFeatures(i)
#training our model
x_poly = poly_features.fit_transform(X)
y_poly = poly_features.fit_transform(T)
regressor = LinearRegression().fit(x_poly, Y)
#prediciting the result
Y_predict_training = regressor.predict(x_poly)
Y_predict_testing = regressor.predict(y_poly)
#calculating rmse
rmse_train.append(math.sqrt(mean_squared_error(Y , Y_predict_training)))
rmse_test.append(math.sqrt(mean_squared_error(S , Y_predict_testing)))
#plotting the bar plot
plt.bar(p,rmse_train)
plt.show()
plt.bar(p,rmse_test)
plt.show()
#plotting the best fit polynomial
min_rmse_train_index = p[rmse_train.index(min(rmse_train))]
poly_features = PolynomialFeatures(min_rmse_train_index)
x_poly = poly_features.fit_transform(X)
#y_poly = poly_features.fit_transform(T)
regressor = LinearRegression().fit(x_poly, Y)
Y_predict_training = regressor.predict(x_poly)
plt.scatter(X,Y,color = "Red")
plt.scatter(X,Y_predict_training,color = "Green")
plt.xlabel("Shell weight")
plt.ylabel("Rings")
plt.title("Best fit curve on the training data")
plt.show()
min_rmse_test_index = p[rmse_test.index((min(rmse_test)))]
poly_features = PolynomialFeatures(min_rmse_test_index)
x_poly = poly_features.fit_transform(X)
y_poly = poly_features.fit_transform(T)
regressor = LinearRegression().fit(x_poly, Y)
Y_predict_testing = regressor.predict(y_poly)
plt.scatter(S,Y_predict_testing)
plt.xlabel("Actual Number of rings")
plt.ylabel("Predicted Number of Rings")
plt.title("Actual no.of Rings Vs Predicted no.of Rings")
plt.show()