-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCODE_STAIML.R
109 lines (94 loc) · 6.47 KB
/
CODE_STAIML.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Load necessary libraries
library(ggplot2)
library(readr)
# Read the dataset from the URL
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data"
data <- read.table(url, header = FALSE, sep = ",", na.strings = "?")
# Assign column names based on dataset description
colnames(data) <- c("state", "county", "community", "communityname", "fold", "population", "householdsize",
"racepctblack", "racePctWhite", "racePctAsian", "racePctHisp", "agePct12t21", "agePct12t29",
"agePct16t24", "agePct65up", "numbUrban", "pctUrban", "medIncome", "pctWWage", "pctWFarmSelf",
"pctWInvInc", "pctWSocSec", "pctWPubAsst", "pctWRetire", "medFamInc", "perCapInc", "whitePerCap",
"blackPerCap", "indianPerCap", "AsianPerCap", "OtherPerCap", "HispPerCap", "NumUnderPov",
"PctPopUnderPov", "PctLess9thGrade", "PctNotHSGrad", "PctBSorMore", "PctUnemployed",
"PctEmploy", "PctEmplManu", "PctEmplProfServ", "PctOccupManu", "PctOccupMgmtProf",
"MalePctDivorce", "MalePctNevMarr", "FemalePctDiv", "TotalPctDiv", "PersPerFam", "PctFam2Par",
"PctKids2Par", "PctYoungKids2Par", "PctTeen2Par", "PctWorkMomYoungKids", "PctWorkMom",
"NumIlleg", "PctIlleg", "NumImmig", "PctImmigRecent", "PctImmigRec5", "PctImmigRec8",
"PctImmigRec10", "PctRecentImmig", "PctRecImmig5", "PctRecImmig8", "PctRecImmig10",
"PctSpeakEnglOnly", "PctNotSpeakEnglWell", "PctLargHouseFam", "PctLargHouseOccup",
"PersPerOccupHous", "PersPerOwnOccHous", "PersPerRentOccHous", "PctPersOwnOccup",
"PctPersDenseHous", "PctHousLess3BR", "MedNumBR", "HousVacant", "PctHousOccup",
"PctHousOwnOcc", "PctVacantBoarded", "PctVacMore6Mos", "MedYrHousBuilt", "PctHousNoPhone",
"PctWOFullPlumb", "OwnOccLowQuart", "OwnOccMedVal", "OwnOccHiQuart", "RentLowQ",
"RentMedian", "RentHighQ", "MedRent", "MedRentPctHousInc", "MedOwnCostPctInc",
"MedOwnCostPctIncNoMtg", "NumInShelters", "NumStreet", "PctForeignBorn", "PctBornSameState",
"PctSameHouse85", "PctSameCity85", "PctSameState85", "LemasSwornFT", "LemasSwFTPerPop",
"LemasSwFTFieldOps", "LemasSwFTFieldPerPop", "LemasTotalReq", "LemasTotReqPerPop",
"PolicReqPerOffic", "PolicPerPop", "RacialMatchCommPol", "PctPolicWhite", "PctPolicBlack",
"PctPolicHisp", "PctPolicAsian", "PctPolicMinor", "OfficAssgnDrugUnits", "NumKindsDrugsSeiz",
"PolicAveOTWorked", "LandArea", "PopDens", "PctUsePubTrans", "PolicCars", "PolicOperBudg",
"LemasPctPolicOnPatr", "LemasGangUnitDeploy", "LemasPctOfficDrugUn", "PolicBudgPerPop",
"murders", "murdPerPop", "rapes", "rapesPerPop", "robberies", "robbbPerPop", "assaults",
"assaultPerPop", "burglaries", "burglPerPop", "larcenies", "larcPerPop", "autoTheft",
"autoTheftPerPop", "arsons", "arsonsPerPop", "ViolentCrimesPerPop")
# a)
ggplot(data, aes(x = state, y = ViolentCrimesPerPop)) +
geom_boxplot() +
labs(title = "Distribution of Violent Crimes per Population Across States", x = "State", y = "Violent Crimes per Population")
# b)
ggplot(data, aes(x = medIncome, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Correlation between Household Income and Violent Crimes", x = "Median Income", y = "Violent Crimes per Population")
# c)
ggplot(data, aes(x = PctUnemployed, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship between Unemployment Rate and Violent Crimes", x = "Percentage Unemployed", y = "Violent Crimes per Population")
# d)
ggplot(data, aes(x = as.factor(pctUrban), y = ViolentCrimesPerPop)) +
geom_boxplot() +
labs(title = "Difference in Violent Crime Rates between Urban and Rural Areas", x = "Urban/Rural", y = "Violent Crimes per Population")
# e)
ggplot(data, aes(x = racepctblack, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship between Percentage of African American Population and Violent Crimes", x = "Percentage African American", y = "Violent Crimes per Population")
ggplot(data, aes(x = racePctWhite, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship between Percentage of Caucasian Population and Violent Crimes", x = "Percentage Caucasian", y = "Violent Crimes per Population")
ggplot(data, aes(x = racePctHisp, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship between Percentage of Hispanic Population and Violent Crimes", x = "Percentage Hispanic", y = "Violent Crimes per Population")
# f)
cor.test(data$PolicRate, data$ViolentCrimesPerPop)
ggplot(data, aes(x = PolicRate, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Relationship between Police Presence and Violent Crime Rate",
x = "Police Rate", y = "Violent Crimes per Population")
# g)
ggplot(data, aes(x = MedYrHousBuilt, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Correlation between Median Year of Housing Units Built and Violent Crimes", x = "Median Year of Housing Units Built", y = "Violent Crimes per Population")
# i)
ggplot(data, aes(x = pctWPubAsst, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship between Percentage of Households with Public Assistance Income and Violent Crimes", x = "Percentage of Households with Public Assistance Income", y = "Violent Crimes per Population")
# j)
cor.test(data$pctUrban, data$ViolentCrimesPerPop)
ggplot(data, aes(x = pctUrban, y = ViolentCrimesPerPop)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Relationship between Population Density and Violent Crime Rate",
x = "Percentage of Urban Population", y = "Violent Crimes per Population")
# h)
ggplot(data, aes(x = PctLargHouseFam, y = ViolentCrimesPerPop)) +
geom_point() +
labs(x = "Presence of gang units",
y = "Violent Crimes per Population")