-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheda.Rmd
381 lines (294 loc) · 10 KB
/
eda.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
---
title: "Exploratory Analysis"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,
message=FALSE,
warning=FALSE)
```
```{r}
lc <- read.csv("datasets/processed/lc.csv")
```
## First moment descriptive statistics
Plot Canada Lynx within BC
```{r}
plot(decimalLatitude ~ decimalLongitude,
pch = 16,
col = "#046C9A",
data = lc,
xlab = "Longtitude",
ylab = "Latitude",
main = "Lynx Locations")
```
Visualizing the observation window
```{r}
library(spatstat)
library(sf)
load("datasets/raw/BC_Covariates.Rda")
# Create a SpatialPolygons object for the window
bc_window_sf <- st_as_sf(DATA$Window)
bc_window_owin <- as.owin(bc_window_sf)
# Visualize the window
plot(bc_window_owin, main = "Observation Window")
```
Plot observations in BC window
```{r}
# Convert to a ppp object
lc_ppp <- ppp(x = lc$decimalLongitude, # X coordinates
y = lc$decimalLatitude, # Y coordinates
window = bc_window_owin, # Observation window
)
# Access the rejected points
# attr(lc_ppp, "rejects")
# Remove rejected/out of window points
lc_ppp <- as.ppp(lc_ppp)
# Plot the ppp object
plot(lc_ppp,
pch = 16,
cex = 0.5,
cols = "#046C9A",
main = "Marked Canada Lynx")
```
```{r}
#Get units
unitname(lc_ppp)
```
```{r}
# summary(lc_ppp)
intensity(lc_ppp)
```
### Intensity
```{r}
#Split into a 3 by 3 quadrat and count points
Q <- quadratcount(lc_ppp,
nx = 2,
ny = 4)
#Plot the output
plot(lc_ppp,
pch = 16,
cex = 0.5,
cols = "#046C9A",
main = "Lynx locations")
plot(Q, cex = 2, col = "red", add = T)
```
```{r}
#Plot the output Note the use of image = TRUE
plot(intensity(Q, image = T),
main = "Lynx intensity")
plot(lc_ppp, pch = 16, cex = 0.6, cols = "white", add = T)
plot(lc_ppp, pch = 16, cex = 0.5, cols = "black", add = T)
```
From the above plot it can be seen that the assumption of homogeneity is not appropriate for this dataset as the lynx tend to be clustered in certain areas of the study site, whereas others have no lynx at all. We are further doing quadrat test to verify the inhomogeneity.
Quadrat test of homogeneity
```{r}
quadrat.test(Q)
```
The small p-value suggests that there is a significant deviation from homogeneity. So, the assumption of homogeneity is not met.
### Relationships with covariates
We are usually interested in determining whether the intensity depends on a covariate(s). One simple approach to check for a relationship between inhomogeneous $\lambda(u)$ and a spatial covariate $Z(u)$ is via quadrat counting.
```{r}
# Create 5 elevation classes with equal width
elev_classes <- cut(DATA$Elevation, breaks = 5, labels = c("low", "low-medium", "medium", "high-medium", "high"))
table(elev_classes[lc_ppp])
```
```{r}
library(viridis)
cols = terrain.colors(5)
# Plot the elevation class image and overlay the lynx locations
plot(elev_classes, col = cols, main = "Elevation Classes", par(bg="grey50", cex.main = 2, cex = 0.6))
points(lc_ppp, pch = 16, cex = 0.6, col = "black")
```
```{r}
# Create 5 forest classes with equal width
forest_classes <- cut(DATA$Forest, breaks = 5, labels = c("low", "low-medium", "medium", "high-medium", "high"))
table(forest_classes[lc_ppp])
```
```{r}
# Plot the forest class image and overlay the lynx locations
plot(forest_classes, col = cols, main = "Forest Classes", par(bg="grey50", cex.main = 2, cex = 0.6))
points(lc_ppp, pch = 16, cex = 0.6, col = "black")
```
```{r}
# Create 5 human footprint index classes with equal width
hfi_classes <- cut(DATA$HFI, breaks = 5, labels = c("low", "low-medium", "medium", "high-medium", "high"))
table(hfi_classes[lc_ppp])
```
```{r}
# Plot the human footprint index class image and overlay the lynx locations
plot(hfi_classes, main = "Footprint Index Classes", par(bg="grey50", cex.main = 2, cex = 0.6))
points(lc_ppp, pch = 16, cex = 0.5, col = "white")
```
```{r}
# Create 5 dist water classes with equal width
dist_water_classes <- cut(DATA$Dist_Water, breaks = 5, labels = c("low", "low-medium", "medium", "high-medium", "high"))
table(dist_water_classes[lc_ppp])
```
```{r}
# Plot the dist wanter class image and overlay the lynx locations
plot(dist_water_classes, main = "Dist Water Classes", par(bg="grey50", cex.main = 2, cex = 0.6))
points(lc_ppp, pch = 16, cex = 0.5, col = "white")
```
Based on a visual inspection, it appears that the heterogeneity observed in the data may be associated with a preference for certain elevations and distances from water.
More formally, in testing for relationships with covariates we are assuming that $λ$ is a function of $Z$, such that $$\lambda(u)=\rho(Z(u))$$
A non-parametric estimate of $\rho$ can be obtained via kernel estimation, available via the rhohat() function.
```{r}
#Estimate Rho
rho <- rhohat(lc_ppp, DATA$Elevation)
```
```{r}
plot(rho,
xlim = c(0,3000),
xlab = "Elevation",
main = "Rho vs elevation")
```
There’s a non-linear relationship between elevation and lynx intensity. Majority of the lynx are at low elevations than would be expected by chance, fewer lynx at intermediate elevations and no lynx at high elevations.
```{r}
#Estimate Rho
rho_forest <- rhohat(lc_ppp, DATA$Forest)
```
```{r}
plot(rho_forest,
#xlim = c(0,3000),
xlab = "Forest",
main = "Rho vs Forest")
```
```{r}
#Estimate Rho
rho_water <- rhohat(lc_ppp, DATA$Dist_Water)
```
```{r}
plot(rho_water,
xlim = c(0,15000),
xlab = "Water Dist",
main = "Rho vs Water Dist")
```
```{r}
rho_hfi <- rhohat(lc_ppp, DATA$HFI)
```
```{r}
plot(rho_hfi,
#xlim = c(0,3000),
xlab = "Human Footprint Index",
main = "Rho vs HFI")
```
### Plot on BC Map
```{r}
library(raster)
dmap1 <- density.ppp(lc_ppp, sigma = bw.ppl(lc_ppp),edge=T)
r1 <- raster(dmap1)
```
```{r}
library(leaflet)
#make sure we have right CRS, which in this case is British National Grid
bc_crs <- "+proj=aea +lat_0=45 +lon_0=-126 +lat_1=50 +lat_2=58.5 +x_0=1000000 +y_0=0 +datum=NAD83 +units=m +no_defs"
crs(r1) <- sp::CRS(bc_crs)
```
```{r}
# create a colour palet
pal <- colorNumeric(c("#41B6C4", "red"), values(r1),
na.color = "transparent")
#and then make map!
leaflet() %>%
addTiles() %>%
addRasterImage(r1, colors = pal, opacity = 0.8) %>%
addLegend(pal = pal, values = values(r1),
title = "Canada Lynx in BC")
```
### Nearest neighbour distances
```{r}
col_pal <- colorRampPalette(c('orange', 'green'))
# Calculate nearest neighbor distances
nn_dist <- data.frame(nndist(lc_ppp))
# Add distance to nearest neighbor mark
marks(lc_ppp) <- nn_dist
# Plot point pattern with distance to nearest neighbor mark
plot(lc_ppp, main = "Seperation Distance",
which.marks = NULL,
cols = col_pal(nrow(nn_dist)), #The colours of the points
pch = 16)
```
## Second Moment Descriptives
### K-Function
```{r}
# Estimate the k- function
k_lc <- Kest(lc_ppp)
# visualise the results
plot (k_lc, main = "Homogeneous K-Function", lwd = 2)
```
Here we can see that theoretical k-function $k_{pois}(r)$ deviates from other k-function corrections indicates clustering but these estimates assume homogeneity.
```{r}
# Bootstrapped CIs
# rank = 1 means the max and min
# values will be used for CI
E_lc <- envelope(lc_ppp , Kest , rank = 1, nsim = 19, fix.n = T)
# visualise the results
plot (E_lc , main = "Homogeneous K-function")
```
Now we have evidence that suggests significant clustering, but these estimates assume homogeneity.
Relaxing homogeneity assumption.
```{r}
#Estimate intensity
lambda_lc <- density(lc_ppp, bw.ppl)
Kinhom_lc <- Kinhom(lc_ppp, lambda_lc)
# Estimate a strictly positive density
lambda_lc_pos <- density(lc_ppp, sigma=bw.ppl, positive=TRUE)
# Simulation envelope (with points drawn from the estimated intensity)
E_lc_inhom <- envelope(lc_ppp,
Kinhom,
simulate = expression(rpoispp(lambda_lc_pos)),
correction="border",
rank = 1,
nsim = 19,
fix.n = TRUE)
# visualise the results
plot(E_lc_inhom, xlim = c(0,320000), main = "Inhomogeneous K-function", lwd = 2)
```
When correcting for inhomogeneity, the clustering is not as strong homogeneous k-function. Clustering appears to exist in and around 0 to 125000 units.
### Pair Correlation Function
```{r}
#Simulation envelope (with points drawn from the estimated intensity)
pcf_lc_inhom <- envelope(lc_ppp,
pcfinhom,
simulate = expression(rpoispp(lambda_lc_pos)),
rank = 1,
nsim = 19)
# visualise the results
par(mfrow = c(1,2))
plot(pcf_lc_inhom, main = "Inhomogeneous g-function")
# Zoom in on range where significant deviations appear
plot(pcf_lc_inhom, xlim = c(0,20000), main = "", lwd = 2)
```
There appear to be more lynx than expected by random chance between 0 - 13500 as $g(r) > 1$. Beyond that, the locations of lynx appear not to exhibit any significant correlations.
### Collinearity
Collinearity among elevation, forest, human footprint index and water distance.
```{r}
cor.im(DATA$Elevation, DATA$Forest, DATA$HFI, DATA$Dist_Water, use = "complete.obs")
```
The correlation coefficients are relatively weak. We can proceed without too much worry.
## Fit Model
```{r}
# mean centering and scaling the elevation and distance to water variables
mu <- mean(DATA$Elevation)
stdev <- sd(DATA$Elevation)
DATA$Elevation_scaled <- eval.im((Elevation - mu)/stdev, DATA)
mu <- mean(DATA$Dist_Water)
stdev <- sd(DATA$Dist_Water)
DATA$Dist_Water_scaled <- eval.im((Dist_Water - mu)/stdev, DATA)
```
```{r}
fit_1 <- ppm(lc_ppp ~ Elevation_scaled + I(Elevation_scaled^2) + Forest + I(Forest^2) + Dist_Water_scaled + I(Dist_Water_scaled^2) + HFI, data = na.omit(DATA))
fit_1
```
```{r}
quadrat.test(fit_1, nx = 2, ny =4)
```
Significant deviation from model's predictions.
```{r}
# Calculate the residuals
res <- residuals(fit_1)
plot(res,
cols = "transparent",
main = "Model Residuals")
```