forked from r-forge/surrosurv
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpoissonize.R
220 lines (201 loc) · 8.97 KB
/
poissonize.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
################################################################################
# Survival data tranformation for fitting a Poisson model #
################################################################################
# #
# This function tranform survival data into a format compatible with #
# the glm() function for fitting a Poisson model. #
# #
# Its parameters are #
# - data : a data frame with columns: #
# - id : the patient identifyier #
# - time : the event/censoring time #
# - status : the event(1) or censoring(0) indicator #
# - ... : other factors such like the covariables #
# needed in the regression model #
# - all.breaks : the breakpoints between time intervals #
# - interval.width: the width of the time intervals on wich the risks #
# will be assumed consant. #
# To be used if intervals of the same length #
# This parameter is ignored if all.breaks is specified #
# - nInts : the number of intervals containing the same expected #
# number of events #
# This parameter is ignored if either all.breaks or #
# interval.width is specified #
# - factors : a vector of characters, containing the names of the #
# factors to be kept in the transformed data set #
# - compress : a logical, indicating whether the record with the same #
# factor profile should be summarized into one record, #
# i.e. whether the data should be expressed in a short form#
# #
################################################################################
# #
# Author: Federico Rotolo <[email protected]> #
# Original code by Stephanie Kovalchik #
# http://r.789695.n4.nabble.com/exponential-proportional-hazard-model-td805536.html #
# #
# Date: June 22, 2015 #
# Last modification on: June 15, 2016 #
#################################################################################
poissonize <- function(data,
all.breaks = NULL,
interval.width = NULL,
nInts = 8,
factors = NULL,
compress = TRUE) {
if (is.null(all.breaks)) {
if (!is.null(interval.width)) {
all.breaks <- seq(0, max(data$time),
interval.width)
} else {
# library('survival')
smod <- survfit(Surv(time, status) ~ 1, data = data)
smin <- min(smod$surv)
all.breaks <- c(0, sapply(1 - 1:nInts / nInts, function(p)
smod$time[(smod$surv - smin) / (1 - smin) <= p][1]))
}
}
tol <- max(data$time) / 1e4
all.breaks <- sort(unique(setdiff(round(
c(0, all.breaks, max(data$time)) / tol
),
NA))) * tol
# THE FOLLOWING FUNCTION COMPUTES ALL THE TIME BREAKS GIVEN THE EXIT TIME
person.breaks <- Vectorize(function(stopT)
unique(c(all.breaks[all.breaks < stopT], stopT) / tol) * tol, SIMPLIFY = FALSE)
# NEXT WE GET A LIST OF EACH SUBJECT'S TIME PERIODS
the.breaks <- person.breaks(data$time)
# NOW WE OBTAIN THE EXPANDED PERIOD OF OBSERVATION
startT <-
lapply(the.breaks, function(x)
x[-length(x)]) # LEFT TIME POINT
stopT <-
lapply(the.breaks, function(x)
x[-1]) # RIGHT TIME POINTS
# THE FOLLOWING ARE NEEDED TO COMPLETE THE LONG VERSION OF THE DATA SET
count.per.id <- sapply(startT, length)
index <- tapply(data$id, data$id, length)
index <-
cumsum(index) # INDEX OF LAST OBSERVATION FOR EACH PATIENT
event <- rep(0, sum(count.per.id))
event[cumsum(count.per.id)] <- data$status[index]
# BRING ALL OF THIS TOGETHER TO CREATE THE EXPANDED DATASET
expData <- cbind(data.frame(
id = rep(data$id[index], count.per.id),
startT = unlist(startT),
stopT = unlist(stopT),
event = event
),
data[sapply(rep(data$id[index], count.per.id),
function(x)
which(data$id == x)), factors, drop = FALSE])
# CREATE TIME VARIABLE WHICH INDICATES THE PERIOD OF OBSERVATION
# THIS WILL BE THE OFFSET IN THE POISSON MODEL FIT
expData$time <-
expData$stopT - expData$startT # LENGTH OF OBSERVATION
# NEXT WE CREATE A FACTOR FOR EACH INTERVAL THAT WILL ALLOW US
# TO HAVE A DIFFERENT RATE FOR EACH PERIOD
expData$interval <-
factor(expData$start, levels = unique(expData$start))
expData <- subset(expData, select = -c(startT, stopT))
#expData$time <- expData$time
if (compress) {
m <- aggregate(x = expData$event,
by = expData[, c('interval', factors), drop = FALSE],
sum)
colnames(m)[ncol(m)] <- 'm'
Rt <- aggregate(x = expData$time,
by = expData[, c('interval', factors), drop = FALSE],
sum)
colnames(Rt)[ncol(Rt)] <- 'Rt'
N <- aggregate(x = expData$event,
by = expData[, c('interval', factors), drop = FALSE],
function(x)
sum(!is.na(x)))
colnames(N)[ncol(N)] <- 'N'
expData <- merge(m, merge(Rt, N, sort = FALSE), sort = FALSE)
}
attr(expData, 'interval.width') <- interval.width
attr(expData, 'all.breaks') <- all.breaks
return(expData)
}
################################################################################
# Plots the hazard function and the survival curve #
# estimated via a Poisson model #
################################################################################
# #
# Parameters #
# - x : the fitted Poisson model #
# - type : the type of plot, either 'haz' for the hazard function #
# or 'Surv', for the survival curve #
# #
################################################################################
# #
# Author: Federico Rotolo <[email protected]> #
# #
# Date: June 22, 2015 #
# Last modification on: November 14, 2016 #
################################################################################
plotsson <- function(x,
type = c('survival', 'hazard'),
add = FALSE,
xscale = 1,
by,
col,
...) {
type <- tolower(type)
type <- match.arg(type)
breaks <- attr(x$data, 'all.breaks')
risks <- exp(coef(x)[grep('interval', names(coef(x)))]) *
diff(breaks)
if (type == 'survival') {
risks <- exp(-cumsum(risks))
Ylims <- 0:1
Ylab <- 'Survival probability'
} else {
Ylims <- c(0, 1.1 * max(risks))
Ylab <- 'Hazard'
}
startT <- breaks[-length(breaks)] / xscale
stopT <- breaks[-1] / xscale
if (!add)
plot(
0,
xlim = c(0, 1.1 * max(stopT)),
ylim = Ylims,
xlab = 'time',
ylab = Ylab,
type = 'n',
...
)
byVals <- if (missing(by)) {
0
} else {
unique(x$data[, by])
}
beta <- if (missing(by)) {
0
} else {
coef(x)[by]
}
if (type == 'survival') {
for (v in 1:length(byVals))
segments(
startT,
c(1, risks[2:length(risks) - 1] ^ exp(beta * byVals[v])),
stopT,
risks ^ exp(beta * byVals[v]),
col = ifelse(missing(col), v, col),
...
)
} else {
for (v in 1:length(byVals))
segments(
startT,
risks * exp(beta * byVals[v]),
stopT,
risks * exp(beta * byVals[v]),
col = ifelse(missing(col), v, col),
...
)
}
}