-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1-analysis.R
180 lines (144 loc) · 6.27 KB
/
1-analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
library(tidyverse)
library(corrplot)
library(magrittr)
players <- readRDS("data/raw/players.Rds")
rank <- readRDS("data/raw/rank.Rds")
# ------------------------------------------------------------------------------
# two mutations to dataset
players$cume_dist = cume_dist(players$wins) # cumulative distribution column based on wins
players = players %>% mutate(goal_pct = goals / shots) # goal efficiency metric
players = players %>% arrange(desc(wins), desc(goals)) # arrange dataset by wins and then goals
# ------------------------------------------------------------------------------
# distribution similar for all numeric vars, as shown by corrplot
corrplot(cor(players[,5:10]), type = "lower", method = "number")
# ------------------------------------------------------------------------------
# cumulative distribution function plots of numeric variables
# show that many players have similar metrics below 80th-90th percentile rank
plot(ecdf(players$shots))
plot(ecdf(players$saves))
plot(ecdf(players$mvps))
plot(ecdf(players$goals))
plot(ecdf(players$assists))
plot(ecdf(players$wins))
# ------------------------------------------------------------------------------
# looking at win distribution for the top 10% of players
players %>%
# filter for top 10% of players based on wins
filter(cume_dist >= 0.90) %>%
ggplot(aes(x = wins)) +
geom_histogram(bins = 30) +
ggtitle("Win Distribution for Top 10% of Players") +
theme_bw()
# More goals per player are scored in 1v1 than 3v3. Goal cannot be
# calculated because all game_type wins/goals are grouped together
# ------------------------------------------------------------------------------
players %>%
# filter out cases where goals > shots, 158 cases of this in dataset
filter(goal_pct < 1) %>%
ggplot(aes(x = goals, y = goal_pct)) +
geom_point(alpha = 0.1) +
geom_smooth(method = lm) +
ggtitle("Goals vs Goal Percent") +
theme_bw()
# Check out the count of wins. The data is not wrong. RL's system for
# identifying shots is not perfect.
players %>% filter(goal_pct >= 1) %>% arrange(wins)
# ------------------------------------------------------------------------------
# Tier distributions for each ranked game type for season 5
head(rank)
# Re-label and re-type tier as factor for plots (index 1:19)
tier_labels <- c('Unranked',
'Bronze I', 'Bronze II', 'Bronze III',
'Silver I', 'Silver II', 'Silver III',
'Gold I', 'Gold II', 'Gold III',
'Platinum I', 'Platinum II', 'Platinum III',
'Diamond I', 'Diamond II', 'Diamond III',
'Champion I', 'Champion II', 'Champion III',
'Grand Champion')
rank$tier <- factor(rank$tier, labels = tier_labels)
# For each game type in season 5...
# 1. Create chart-ready data frame to simplify plotting
# 2. Plot a bar chart to match the distributions plots
# on the Global page of rocketleaguestats.com
################# Season 5
################# Ranked Duel
s5_duel <- rank %>%
filter(season == 5, tier != "Unranked", game_type == 10) %>%
group_by(tier, game_type) %>%
summarise(freq = n(), dist = freq/nrow(.)) %>%
mutate(bar_labels = paste(freq, "(", round(dist * 100, 2), "%)"))
s5_duel %>%
ggplot() +
geom_bar(aes(tier, dist), stat="identity") +
geom_text(aes(tier, dist, label = bar_labels), hjust=0) +
scale_y_continuous(limits = c(0, .13), labels = scales::percent) +
ggtitle("Season 5 Player Distribution", "Ranked Duel") +
ylab(paste("Percent of ", sum(s5_duel$freq), " players")) +
coord_flip()
################# Season 5
################# Ranked Doubles
s5_doubles <- rank %>%
filter(season == 5, tier != "Unranked", game_type == 11) %>%
group_by(tier, game_type) %>%
summarise(freq = n(), dist = freq/nrow(.)) %>%
mutate(bar_labels = paste(freq, "(", round(dist * 100, 2), "%)"))
s5_doubles %>%
ggplot() +
geom_bar(aes(tier, dist), stat="identity") +
geom_text(aes(tier, dist, label = bar_labels), hjust=0) +
scale_y_continuous(limits = c(0, .13), labels = scales::percent) +
ggtitle("Season 5 Player Distribution", "Ranked Doubles") +
ylab(paste("Percent of ", sum(s5_doubles$freq), " players")) +
coord_flip()
################# Season 5
################# Ranked Solo Standard
s5_ss <- rank %>%
filter(season == 5, tier != "Unranked", game_type == 12) %>%
group_by(tier, game_type) %>%
summarise(freq = n(), dist = freq/nrow(.)) %>%
mutate(bar_labels = paste(freq, "(", round(dist * 100, 2), "%)"))
s5_ss %>%
ggplot() +
geom_bar(aes(tier, dist), stat="identity") +
geom_text(aes(tier, dist, label = bar_labels), hjust=0) +
scale_y_continuous(limits = c(0, .13), labels = scales::percent) +
ggtitle("Season 5 Player Distribution", "Ranked Solo Standard") +
ylab(paste("Percent of ", sum(s5_ss$freq), " players")) +
coord_flip()
################# Season 5
################# Ranked Standard
s5_std <- rank %>%
filter(season == 5, tier != "Unranked", game_type == 13) %>%
group_by(tier, game_type) %>%
summarise(freq = n(), dist = freq/nrow(.)) %>%
mutate(bar_labels = paste(freq, "(", round(dist * 100, 2), "%)"))
s5_std %>%
ggplot() +
geom_bar(aes(tier, dist), stat="identity") +
geom_text(aes(tier, dist, label = bar_labels), hjust=0) +
scale_y_continuous(limits = c(0, .13), labels = scales::percent) +
ggtitle("Season 5 Player Distribution", "Ranked Standard") +
ylab(paste("Percent of ", sum(s5_std$freq), " players")) +
coord_flip()
# Although counts differ for rank dist for each ranked game type for season 4,
# the density distributions look very similar
# the most apparent difference is high density of tier 0 players in game type 10
# and higher density of higher tier players in game type 11
rank %>%
filter(season == 4) %>%
group_by(tier, game_type) %>%
mutate(freq = n()) %>%
ggplot() +
geom_density(aes(tier)) +
facet_wrap(~game_type)
# Rank distribution for each ranked game type and division for season 4
# (note from James: not sure if this even makes intuitive sense)
# division 0 has the most newbs across all game types
rank %>%
filter(season == 4) %>%
group_by(division, tier, game_type) %>%
mutate(freq = n()) %>%
ggplot() +
geom_density(aes(tier)) +
facet_wrap(c("game_type","division")) +
ggtitle("tier density distribution for each game type and division")