-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogistic regression model
44 lines (31 loc) · 1.47 KB
/
Logistic regression model
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
DM<-read.csv(choose.files(caption = "Select csv",multi = FALSE))
head(DM)
# DM[DM$Outcome == 1,]$Outcome <- "Diabetic"
# DM[DM$Outcome == 0,]$Outcome <- "Non Diabetic"
#DM$Outcome <- ifelse(test=DM$Outcome == 0, yes ="Diabetic", no ="Non diabetic")
DM$Outcome <- as.factor(DM$Outcome)
str(DM)
#is.na(DM)
nrow(DM)
# xtabs(~ Outcome + Age, data=DM)
DM_reg<-glm(Outcome~Pregnancies+Glucose+BloodPressure+BMI+DiabetesPedigreeFunction,data = DM,family="binomial")
summary(DM_reg)
null <- DM_reg$null.deviance/-2
proposed <- DM_reg$deviance/-2
## McFadden's Pseudo R^2 = [ LL(Null) - LL(Proposed) ] / LL(Null)
(null - proposed) / null
predicted_data <- data.frame(
predicted_diabetes=DM_reg$fitted.values,
Original_outcome=DM$Outcome)
# predicted_data$Original_outcome <- as.factor(predicted_data$Original_outcome)
str(predicted_data)
predicted_data$Original_outcome <- ifelse(test=predicted_data$Original_outcome == 0, yes ="Not Diabetic", no ="diabetic")
#predicted_data[predicted_data$predicted_diabetes >= 0.5,]$predicted_diabetes <- "Diabetic"
#predicted_data[predicted_data$predicted_diabetes < 0.5,]$predicted_diabetes <- "Non Diabetic"
predicted_data <- predicted_data[
order(predicted_data$predicted_diabetes, decreasing=FALSE),]
predicted_data$rank <- 1:nrow(predicted_data)
ggplot(data=predicted_data, aes(x=rank, y=predicted_diabetes)) +
geom_point(aes(color=Original_outcome), alpha=1, shape=4, stroke=2) +
xlab("Ranks") +
ylab("Predicted probability of getting diabetes")