-
Notifications
You must be signed in to change notification settings - Fork 0
/
LS_HEI_QA_v2.R
152 lines (119 loc) · 7.23 KB
/
LS_HEI_QA_v2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# purpose: calculates categorical accuracy and contributor agreement # Geneva version
# in: geojson output from laco-wiki (requiers currently reformating downloaded zip shp to geojson)
# out: statistics and graphs of accuracys and agreement
# 05.11.2018, [email protected]
LocationOfThisScript = function() # Function LocationOfThisScript returns the location of this .R script (may be needed to source other files in same dir)
{
this.file = NULL
# This file may be 'sourced'
for (i in -(1:sys.nframe())) {
if (identical(sys.function(i), base::source)) this.file = (normalizePath(sys.frame(i)$ofile))
}
if (!is.null(this.file)) return(dirname(this.file))
# But it may also be called from the command line
cmd.args = commandArgs(trailingOnly = FALSE)
cmd.args.trailing = commandArgs(trailingOnly = TRUE)
cmd.args = cmd.args[seq.int(from=1, length.out=length(cmd.args) - length(cmd.args.trailing))]
res = gsub("^(?:--file=(.*)|.*)$", "\\1", cmd.args)
# If multiple --file arguments are given, R uses the last one
res = tail(res[res != ""], 1)
if (0 < length(res)) return(dirname(res))
# Both are not the case. Maybe we are in an R GUI?
return(NULL)
}
LandUse_qa_v1=function(In, Out, a, n, s){ # arguments: In = Input data, Out = Output data, a = campaign prefix, n = redundancy depth if 0 or 1 than no redundancy (needs implementation), s = sampling population
# dependencies
x <- c('jsonlite', 'rgdal', 'reshape2', 'ggplot2', 'raster') # list of libraries
lapply(x, library, character.only = T); rm(x) # load libraries and clean
fd <- LocationOfThisScript()
#print(paste0('working dir ', fd))
source(paste(fd, 'ScottsPi_v1.R', sep="/")) # Scotts Pi, https://en.wikipedia.org/wiki/Fleiss%27_kappa # worked example
source(paste(fd, 'fun_accuracy_stratified.R', sep="/")) # accuracy Card 1982
# /home/michi/Dropbox/LandSense_QA/LandUse_qa/in/ /home/michi/Dropbox/LandSense_QA/LandUse_qa/out/ GENEVA 5 150
#In <- '/home/michi/Dropbox/LandSense_QA/LandUse_qa/in/'
#Out <- '/home/michi/Dropbox/LandSense_QA/LandUse_qa/out/'
#a <- 'GENEVA'
#n <- 5
#s <- 150
# C:/Users/ezzjfr/Documents/lucas_git/landsenseR/LandSense_QA/LandUse_qa/In/ C:/Users/ezzjfr/Documents/lucas_git/landsenseR/LandSense_QA/LandUse_qa/Out/ GENEVAJ 5 150
# In <-'C:/Users/ezzjfr/Documents/lucas_git/landsenseR/LandSense_QA/LandUse_qa/In/'
# Out <- 'C:/Users/ezzjfr/Documents/lucas_git/landsenseR/LandSense_QA/LandUse_qa/Out/'
# a <- 'GENEVAJules'
# n <- 5
# s <- 150
# program start
setwd(In) # Input data
fl <- list.files(pattern = paste0('*', '.geojson')) # geojson list
r <- raster(list.files(pattern = paste0('*', '.tif'))) # tif raster file
# agreement calculation
co <- matrix(0, ncol = 0, nrow = s) # initiate matrix
co <- data.frame(co) # as data frame
for (i in 1:length(fl)){
x <- fromJSON(txt=fl[i]) # call json
x <- as.data.frame(cbind(as.numeric(as.character(x$features$properties$ValValue)), x$features$properties$SampleID)) # extract relevant data
colnames(x) <- c('r' , 'id') # column names r = reference , id = of samples needed for comparison
x <- x[order(x$id),] # sort data
co <- cbind(co, x$r) # populate data frame
}; rm(i, x) # combine data sets and clean
mv <- as.numeric(apply(co,1,function(x) names(which.max(table(x))))) # majority response (will be compared with the map)
si <- ScottsPi(co, n) # agreement calculation
# fetch map response
x <- fromJSON(txt=fl[1]) # call geojson
x <- as.data.frame(cbind(as.numeric(as.character(x$features$properties$DataValue)), x$features$properties$SampleID)) # extract relevant data
colnames(x) <- c('m' , 'id') # column names m = map , id = of samples needed for comparison
x <- x[order(as.numeric(as.character(x$id))),] # sort data
# format data
x <- as.data.frame(cbind(si, as.numeric(as.character(x$m)), mv, as.numeric(as.character(x$id)))) # combine ScottsPi, map response and majority vote
colnames(x) <- c('s', 'm', 'r', 'id') # colnames: ScottsPi, map response, and majority vote as reference
# ouput raw QA
write.csv(x, file = paste0(Out, a, '_raw', '.csv')) # output raw results
jpeg(paste0(Out, a, '_agreementDist', '.jpg')) # figure name
plot(table(x$s)/sum(table(x$s)), main="data user agreement", xlab="Scotts PI", ylab="percentage") # agreement distribution
dev.off() # save plot
# calculate accuracy
# class proportions
f <- as.data.frame(freq(r)) # class frequencies
f <- f[-nrow(f),] # remove NA
p <- f$count/sum(f$count) # area proportions
# calculate accuracy
x <- x[ which(x$r!=99),]
con <- table(x$m, x$r) # map, reference
acc <- accuracy(con, p) # accuracy
# format data for plotting
oa <- c(acc$overall.area.weighted[1], acc$overall.area.w.upper[1]) # overall acccuracy and confidence
d <- cbind(acc$user.accuracy, acc$producer.accuracy) # add accuracy measures
lc <- rownames(d); d <- cbind(d, lc) # land cover classes
dc <- as.data.frame(cbind(d$uac, d$pac, as.numeric(as.character(d$lc)))); colnames(dc) <- c('uc', 'pc', 'lc'); dc <- melt(dc, id = 'lc') # accuracy
da <- as.data.frame(cbind(d$uaw, d$paw, as.numeric(as.character(d$lc)))); colnames(da) <- c('ua', 'pa', 'lc'); da <- melt(da, id = 'lc') # accuracy
d <- data.frame((da$value)*100, (dc$value)*100 , dc$variable, da$lc); colnames(d) <- c('acc', 'con', 'type', 'lc') # area bias, area bias confidence, land cover, site, method
d$con[abs(d$acc - d$con) > 100] <- abs(abs(d$acc[d$acc - d$con < -100]) -100) # fix exceding confidence
# graph data regulations
d$acc <- round(d$acc, digits = 1); d$con <- round(d$con, digits = 2) # round
d$con[(100 - d$acc) < d$con] <- abs(d$acc[(100 - d$acc) < d$con] - 100) # 1st fix exceding confidence
d$con[d$acc < d$con] <- d$acc[d$acc < d$con] # 2nd fix exceding confidence
d$con <- round(d$con, digits = 2) # additional round
# create accuracy comparison figure
ggplot(d, aes(x = lc, y = acc, fill = type)) + # call plot
geom_bar(position=position_dodge(), stat="identity")+ # basic bar plot
geom_errorbar(aes(ymin=acc-con, ymax=acc+con), position=position_dodge()) + # errorbars
geom_hline(yintercept = 85, color="green", linetype="dashed", size = 1)+
labs(list(title = '', x = "", y = "Accuracy [%]", color = ""))+ # captions
geom_text(label = '',fontface = "bold", colour = "red", position = position_dodge(width = 0.9), angle = 90, hjust = 1.2)+
# theme
theme(axis.text = element_text(size=15, colour = 'black'),
axis.text.x = element_text(size=15, colour = 'black', angle = 90),# font axis text
axis.title = element_text(size=15, colour = 'black'), # font axis caption
legend.text = element_text(size=15), # font legend caption
legend.title = element_text(size=15), # font legend title
axis.ticks = element_line(colour = 'black', size = 1), legend.position="bottom",
strip.background = element_blank(),
strip.text = element_text(size = 15)) # axis ticks
ggsave(paste0(Out, a, '_Accuracy', '.jpg'), width = 4, height = 4)
}
args <- commandArgs(T) # parse arguments
In <- as.character(args[1])
Out <- as.character(args[2])
a <- as.character(args[3])
n <- as.numeric(args[4])
s <- as.numeric(args[5])
result = LandUse_qa_v1(In, Out, a, n, s) # execute