-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathUnit2_Recitation.R
79 lines (53 loc) · 1.5 KB
/
Unit2_Recitation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# VIDEO 1
# Read in the data
NBA = read.csv("NBA_train.csv")
str(NBA)
# VIDEO 2
# How many wins to make the playoffs?
table(NBA$W, NBA$Playoffs)
# Compute Points Difference
NBA$PTSdiff = NBA$PTS - NBA$oppPTS
# Check for linear relationship
plot(NBA$PTSdiff, NBA$W)
# Linear regression model for wins
WinsReg = lm(W ~ PTSdiff, data=NBA)
summary(WinsReg)
# VIDEO 3
# Linear regression model for points scored
PointsReg = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV + STL + BLK, data=NBA)
summary(PointsReg)
# Sum of Squared Errors
PointsReg$residuals
SSE = sum(PointsReg$residuals^2)
SSE
# Root mean squared error
RMSE = sqrt(SSE/nrow(NBA))
RMSE
# Average number of points in a season
mean(NBA$PTS)
# Remove insignifcant variables
summary(PointsReg)
PointsReg2 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + STL + BLK, data=NBA)
summary(PointsReg2)
PointsReg3 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + STL + BLK, data=NBA)
summary(PointsReg3)
PointsReg4 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + STL, data=NBA)
summary(PointsReg4)
# Compute SSE and RMSE for new model
SSE_4 = sum(PointsReg4$residuals^2)
RMSE_4 = sqrt(SSE_4/nrow(NBA))
SSE_4
RMSE_4
# VIDEO 4
# Read in test set
NBA_test = read.csv("NBA_test.csv")
# Make predictions on test set
PointsPredictions = predict(PointsReg4, newdata=NBA_test)
# Compute out-of-sample R^2
SSE = sum((PointsPredictions - NBA_test$PTS)^2)
SST = sum((mean(NBA$PTS) - NBA_test$PTS)^2)
R2 = 1 - SSE/SST
R2
# Compute the RMSE
RMSE = sqrt(SSE/nrow(NBA_test))
RMSE