-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstat_model.R
145 lines (122 loc) · 4.86 KB
/
stat_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
##pkgs = c("lubridate", "ciTools", "tidyverse", "broom", "stringr", "doMC", "splines")
##install.packages(pkgs)
library(lubridate)
library(splines)
library(datacovidbr)
library(tidyverse)
library(broom)
library(stringr)
library(doMC)
registerDoMC(4)
eweekdata = brasilio() %>%
filter(place_type == "city", city != "Importados/Indefinidos") %>%
select(date, state, city, confirmed, deaths, estimated_population_2019, city_ibge_code) %>%
group_by(state, city) %>%
arrange(date) %>%
mutate(eweek = epiweek(date),
daily_cases = confirmed - lag(confirmed, n=1, default=0),
daily_deaths = deaths - lag(deaths, n=1, default=0),
cidade = paste(city, state, sep="-")) %>%
select(-confirmed, -deaths) %>%
group_by(cidade, estimated_population_2019, city_ibge_code, eweek) %>%
summarise(wcases = sum(daily_cases, na.rm=TRUE),
wdeaths = sum(daily_deaths, na.rm=TRUE)) %>%
ungroup() %>%
filter(eweek < lubridate::epiweek(lubridate::today())) %>%
group_by(cidade, estimated_population_2019, city_ibge_code) %>%
mutate(myweek = eweek - lag(eweek, 1, 0),
myweek = ifelse(myweek > 8, 1, myweek),
myweek = cumsum(myweek)) %>%
ungroup()
rm_few = eweekdata %>%
group_by(cidade, estimated_population_2019, city_ibge_code) %>%
summarise(tcases = sum(wcases), n=n()) %>%
filter(n < 2 | tcases < 100) %>%
ungroup()
rm_neg = eweekdata %>%
filter(wcases < 0) %>%
select(cidade, estimated_population_2019, city_ibge_code) %>%
distinct()
eweekdata = eweekdata %>% anti_join(
rm_few %>% bind_rows(rm_neg),
by = "city_ibge_code"
)
eweekdata$cidade = relevel(factor(eweekdata$cidade), "São Paulo-SP")
fit = glm(wcases ~ ns(myweek, 3) + cidade + offset(log(estimated_population_2019)),
family=quasipoisson, data = eweekdata)
model = tidy(fit)
mycoefs = model %>%
filter(str_detect(term, "cidade")) %>%
mutate(term = str_remove(term, "cidade"))
## automatizar predicoes (não utilizar intervalos de confianca e sim de predicao)
newdata = eweekdata %>%
group_by(cidade, estimated_population_2019, city_ibge_code) %>%
top_n(2, eweek) %>%
mutate(eweek = eweek + 2L,
wcases = NA,
wdeaths = NA,
myweek = myweek + 2L) %>%
ungroup() %>%
select(-wcases)
rodpoisson = function(n, lambda, disp){
rnbinom(n, size=(lambda/(disp-1)), mu=lambda)
}
getPI = function(data4pred, inputdata, model, nSim=10000, confs = c(.80, .90, .95)){
wcases_expected = predict(model, type='response')
preds = foreach(i=1:nSim, .combine = cbind) %dopar% {
eweekdata_sim = inputdata %>%
mutate(wcases = rodpoisson(length(wcases_expected), lambda = wcases_expected,
disp = summary(model)$dispersion))
##mutate(wcases = rpois(length(wcases_expected), lambda = wcases_expected))
fit_sim = glm(wcases ~ ns(myweek, 3) + cidade + offset(log(estimated_population_2019)),
family=quasipoisson, data = eweekdata_sim)
wcases_pred = predict(fit_sim, newdata=data4pred, type='response')
##rpois(length(wcases_pred), lambda = wcases_pred)
rodpoisson(length(wcases_pred), lambda = wcases_pred, disp = summary(fit_sim)$dispersion)
}
lwr = (1-confs)/2
names(lwr) = sprintf("lwr%.3f", confs)
upr = confs + lwr
names(upr) = sprintf("upr%.3f", confs)
PI = t(apply(preds, 1, quantile, probs = c(lwr, upr)))
colnames(PI) = names(c(lwr, upr))
cbind(pred = predict(model, newdata=data4pred, type='response'), PI) %>%
as_tibble()
}
out = newdata %>% bind_cols(newdata %>% getPI(eweekdata, fit))
### se for adicionar IDH
library(readxl)
IDH = read_excel("dados_fixos/atlas2013_dadosbrutos_pt.xlsx", sheet=2) %>%
filter(ANO == 2010) %>%
select(ANO:Município, IDHM) %>%
mutate(QIDHM = case_when(
IDHM < .5 ~ "Muito Baixo",
IDHM >= .5 & IDHM < .6 ~ "Baixo",
IDHM >= .6 & IDHM < .7 ~ "Médio",
IDHM >= .7 & IDHM < .8 ~ "Alto",
IDHM >= .8 ~ "Muito Alto"
),
QIDHM = factor(QIDHM,
levels = c("Muito Alto", "Alto", "Médio", "Baixo", "Muito Baixo")))
eweekdata2 = eweekdata %>%
inner_join(IDH, by=c('city_ibge_code'='Codmun7')) %>%
select(-ANO, -UF, -Codmun6, -Município)
fit2 = glm(wcases ~ ns(myweek, 3) + QIDHM + cidade + offset(log(estimated_population_2019)),
family=poisson(link="log"), data = eweekdata2)
model2 = tidy(fit2)
mycoefs2 = model2 %>%
filter(str_detect(term, "cidade")) %>%
mutate(term = str_remove(term, "cidade"))
newdata = eweekdata2 %>%
group_by(cidade, estimated_population_2019, city_ibge_code) %>%
top_n(2, eweek) %>%
mutate(eweek = eweek + 2L,
wcases = NA,
wdeaths = NA,
myweek = myweek + 2L) %>%
ungroup() %>%
select(-wcases)
newdata %>%
add_ci(fit2, yhatName = "wcases", names = c("lci", "uci")) %>%
add_pi(fit2, names = c("lpi", "upi")) %>%
select(cidade, eweek, wcases, lci, uci, pred, lpi, upi)