-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbatch.R
65 lines (60 loc) · 2.27 KB
/
batch.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# An implementation of batch gradient descent for GLMs.
batch <- function(data, sequence=1:nrow(data$X), intercept=F, slope=T) {
# Find the optimal parameter values using batch gradient descent for a
# generalized linear model.
#
# Args:
# data: DATA object created through sample.data(..) (see functions.R)
# sequence: Vector of iterate indices to calculate batch for. This is in
# case you only aim to run batch on a subset of indices.
# Default is all indices.
# intercept: boolean specifying whether to include intercept term for regression
# slope: boolean specifying whether to include slope term for regression
#
# Returns:
# A p x length(sequence) matrix where the jth column is the theta update
# with iterate index sequence[j].
# Check input.
stopifnot(
all(is.element(c("X", "Y", "model"), names(data))),
length(sequence) > 0, sequence[1] > 1
)
p <- ncol(data$X)
glm.model <- data$model
# Return the mean if one desires to fit with a zero slope.
# NOTE(ptoulis): This was a bit confusing. Where are we using no-slope?
#if (slope == FALSE) {
# theta.batch <- t(apply(data$X, 2, function(x) {
# cumsum(x)/(1:length(x))
# }))
# theta.batch <- theta.batch[, sequence]
# colnames(theta.batch) <- sequence
# return(theta.batch)
#}
# Initialize parameter matrix for batch (p x length(sequence)).
# Will return this matrix.
theta.batch <- matrix(0, nrow=p, ncol=length(sequence))
colnames(theta.batch) <- sequence
# Include or exclude the intercept term in the regression formula.
if (intercept == FALSE) {
glm.formula <- "y ~ X+0"
} else {
glm.formula <- "y ~ X"
}
# Main iteration: idx = index to store the ith update
for (idx in 1:length(sequence)) {
i <- sequence[idx]
X <- data$X[1:i, ]
y <- data$Y[1:i]
# Make the update.
if (glm.model$name == "gaussian") {
theta.new <- glm(as.formula(glm.formula), family=gaussian)$coefficients
} else if (glm.model$name == "poisson") {
theta.new <- glm(as.formula(glm.formula), family=poisson)$coefficients
} else if (glm.model$name == "logistic") {
theta.new <- glm(as.formula(glm.formula), family=binomial)$coefficients
}
theta.batch[, idx] <- theta.new
}
return(theta.batch)
}