-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathphe_data.R
58 lines (50 loc) · 1.71 KB
/
phe_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
library(data.table)
library(stringr)
library(lubridate)
library(readxl)
# Assemble PHE data filename
phe_file = function(basename, dateid)
{
paste0("~/Documents/uk_covid_data_sensitive/phe/", dateid, "/", str_replace_all(basename, "\\$", dateid))
}
# Load various PHE data files
phe_deaths = function(dateid)
{
d = read_excel(phe_file("$ COVID19 Deaths.xlsx", dateid), guess_max = 5000)
setDT(d)
return (d)
}
phe_negatives1 = function(dateid)
{
fread(phe_file("$ Negatives pillar1.csv", dateid))
}
phe_negatives2 = function(dateid)
{
fread(phe_file("$ Negatives pillar2.csv", dateid))
}
phe_positives = function(dateid)
{
fread(phe_file("Anonymised Combined Line List $.csv", dateid))
}
phe_sgtf = function(dateid)
{
fread(phe_file("SGTF_linelist_$.csv", dateid))
}
sgtf_counts = function(dateid, group_cols, criterion = "under30CT")
{
pos = phe_positives(dateid)
sgtf = phe_sgtf(dateid)
data = merge(pos, sgtf, by = "FINALID", all = TRUE)
data[, age5 := pmin(90, (age %/% 5) * 5)]
data[, group := do.call(paste, c(.SD, sep = "|")), .SDcols = group_cols]
if (criterion == "under30CT") {
data[!is.na(sgtf_under30CT), .(other = sum(sgtf_under30CT == 0), sgtf = sum(sgtf_under30CT == 1)),
keyby = .(specimen_date = dmy(specimen_date.x), group)]
} else if (criterion == "ONS") {
data[!is.na(P2CH1CQ), .(other = sum(P2CH1CQ != 0 & P2CH2CQ != 0 & P2CH3CQ != 0), sgtf = sum(P2CH1CQ != 0 & P2CH2CQ != 0 & P2CH3CQ == 0)),
keyby = .(specimen_date = dmy(specimen_date.x), group)]
} else {
data[!is.na(sgtf), .(other = sum(sgtf == 0), sgtf = sum(sgtf == 1)),
keyby = .(specimen_date = dmy(specimen_date.x), group)]
}
}