-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathplot_uncertainty.r
393 lines (341 loc) · 23.5 KB
/
plot_uncertainty.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# plot_uncertainty.r
####
# California Natural and Working Lands Carbon and Greenhouse Gas
# Model (CALAND) Copyright (c) 2020, The Regents of the University of
# California, through Lawrence Berkeley National Laboratory (subject to
# receipt of any required approvals from the U.S. Dept. of Energy). All
# rights reserved.
# If you have questions about your rights to use or distribute this software,
# please contact Berkeley Lab's Intellectual Property Office at
# [email protected].
#
# NOTICE. This Software was developed under funding from the U.S. Department
# of Energy and the U.S. Government consequently retains certain rights. As
# such, the U.S. Government has been granted for itself and others acting on
# its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the
# Software to reproduce, distribute copies to the public, prepare derivative
# works, and perform publicly and display publicly, and to permit others to do so.
####
# This software and its associated input data are licensed under a modified BSD open source license
# Please see license.txt for details
# plot the uncertainty range (low and high emsissions) around average values for chosen variable over time for each scenario
# variables are plotted using the .csv outputs from `plot_caland()`
############################################# Overview of `plot_uncertainty()`#############################################
# The `plot_uncertainty()` function is designed to plot shaded uncertainty bounds around average values for a single
# variable over time for each scenario using .csv outputs from `plot_caland()`. This requires having run `CALAND()` and
# `plot_caland()` three times each for each desired scenario; once for the mean inputs, and the other two times for lower
# uncertainty bounds, and upper uncertainty bounds. For example, using the following combination of inputs to `CALAND()`
# will generate minimum and maximum emissions:
# Low emissions: low initial carbon density (i.e., mean-SD) and high carbon fluxes (i.e., mean+SD)
# High emissions: high initial carbon density (i.e., mean+SD) and low carbon fluxes (i.e., mean-SD)
################################################ Inputs to `plot_uncertainty()`################################################
# Inputs to `plot_uncertainty()` are .csv output files from `plot_caland()`. Within each .csv file, there can be any number of
# scenarios. Each group of scenarios must have a corresponding .csv file for the mean, low, and high emissions. It is possible
# to plot up to three groupings (a, b, c) with `plot_uncertainty()`. Three scenario groups ammounts to a total of nine .csv
# input files to `plot_uncertainty()`; three input .csv files (mean, low, and high emissions) for each group. All .csv files
# have matching formats. For a single group ("group a"), they are assumed to be in caland/outputs/mean, caland/outputs/low,
# caland/outputs/high, respectively, unless `figdir` is specified differently than the default:
# `figdir = c("mean","low","high")`.
############################################### Arguments in `plot_uncertainty()`##############################################
# 1. `start_year`: year to start plotting; default is `start_year = 2010`.
# 2. `end_year`: year to end plotting; default is `end_year = 2051`.
# 3. `varname`: name of a single variable to plot (see the .csv output filenames from `plot_caland()`); the variable name is
# between the land type and "_output" in these file names. However, do not include the surrounding "\_" characters.
# 4. `ylabel`: label for y-axis corresponding to the units of your selected output variable, and whether they are changes from
# baseline (i.e., `varname` ending in "diff") or absolute values. Note that this function does not convert units so the
# output units of your desired plotting variable must be correctly matched with the .csv file.
# - Here are some exmaples:
# - `ylabel = "Change from Baseline (MMT CO2eq)"`
# - `ylabel = "MMT CO2eq"`
# - `ylabel = "Change from Baseline (MMT C)"`
# - `ylabel = "MMT C"`
# - `ylabel = "Change from Baseline (Mg C/ha)"`
# - `ylabel = "Mg C/ha"`
# - `ylabel = "Change from Baseline (Mg C/ac)"`
# - `ylabel = "Mg C/ac"`
# 5. `file_tag`: tag to add to end of the new file names created by `plot_uncertainty()` (e.g., to note what time period is
# plotted); default is `file_tag = ""` (nothing added).
# 6. `data_dir_a`: the path to the directory containing the three folders of `plot_caland()` outputs (mean, low, and high
# emissions) for scenario group a; do not include the "/" character at the end; default is `data_dir_a = "./outputs"`.
# 7. `figdirs_a`: a vector of three folder names within `data_dir_a` containing the .csv data files to plot. The folder names
# must be assigned in order of mean, low, and high; do not include the "/" character at the end of each folder name. The
# default is `figdirs_a = c("mean", "low", "high")`; thus, the .csv files for the mean and lower and upper uncertainty
# bounds are assumed to be in `data_dir_a`/mean, `data_dir_a`/low, and `data_dir_a`/high, respectively, and in the
# appropriate region, land type, and ownership directories. The figures will be written to the folder representing the
# mean.
# 8. `scenarios_a`: a vector of one or more scenario names for group 'a' that are listed in the Scenario column in the
# .csv file containing `varname` for the mean.
# 9. `scen_labs_a`: a vector of one or more scenario labels; must match the same number of elements in `scenarios_a`, and
# correspond directly to the order of elements in `scenarios_a`.
# 10. `data_dir_b`: the path to the directory containing the three folders of `plot_caland()` outputs (mean, low, and
# high emissions) for scenario group b; do not include the "/" character at the end; default is `data_dir_b = NA` (i.e.,
# no group b), which will plot only scenario group a.
# 11. `figdirs_b`: a vector of three folder names within `data_dir_b` containing the .csv data files to plot. The folder
# names must be assigned in order of mean, low, and high; do not include the "/" character at the end of each folder
# name. The default is `figdirs_b = NA` (i.e., no group b). However if you choose to plot a group b, the location of
# the .csv files for the mean and lower and upper uncertainty bounds should follow the same logic as group a. The
# figures will be written to the folder representing the mean.
# 12. `scenarios_b`: a vector of one or more scenario names for group 'b' that are listed in the Scenario column in the
# .csv file containing `varname` for the mean.
# 13. `scen_labs_b`: a vector of one or more scenario labels; must match the same number of elements in `scenarios_b`,
# and correspond directly to the order of elements in `scenarios_b`.
# 14. `data_dir_c`: the path to the directory containing the three folders of `plot_caland()` outputs (mean, low, and
# high emissions) for scenario group c; do not include the "/" character at the end; default is `data_dir_c = NA`
# (i.e., no group c), which will only plot scenario group a, or group a and b.
# 15. `figdirs_c`: a vector of three folder names within `data_dir_c` containing the .csv data files to plot. The
# folder names must be assigned in order of mean, low, and high; do not include the "/" character at the end of each
# folder name. The default is `figdirs_c = NA` (i.e., no group c). However if you choose to plot a group c, the
# location of the .csv files for the mean and lower and upper uncertainty bounds should follow the same logic as
# group a. The figures will be written to the folder representing the mean.
# 16. `scenarios_c`: a vector of one or more scenario names for group 'c' that are listed in the Scenario column in the
# .csv file containing `varname` for the mean.
# 17. `scen_labs_c`: a vector of one or more scenario labels; must match the same number of elements in `scenarios_c`,
# and correspond directly to the order of elements in `scenarios_c`.
# 18. `reg`: a vector of one or more region names to plot; default is `reg = "All_region"` (i.e., aggregated across all
# regions), but can be any number of available regions: "All_region", "Central_Coast", "Central_Valley", "Delta",
# "Deserts", "Eastside", "Klamath", "North_Coast", "Sierra_Cascades", "South_Coast".
# 19. `lt`: a vector of one or more land types to plot; default is `lt = "All_land"` (i.e., aggregated across all land
# types), but can be any number of available land types
# 20. `own`: a vector of one or more ownerships to plot; default = "All_own" (i.e., aggregated across all ownerships),
# but can be any number of available ownerships.
# Notes on `plot_uncertainty()`:
# plotting the ocean region does not provide any comparison with land types because only seagrass exists in the ocean
# Seagrass has only a subset of all the .csv files output from `plot_caland()`; thus, if including Seagrass make sure
# that the desired `varname` is available
############################################### Outputs from `plot_uncertainty()` ###############################################
# Output figures (.pdf) and corresponding data (.csv) will be saved to the mean folder for scenario group a.
####################################################### Start script ############################################################
# make sure that the working directory is caland/
# setwd("<your_path>/caland/")
setwd("./")
# this enables java to use up to 4GB of memory for reading and writing excel files
options(java.parameters = "-Xmx4g" )
# Load all the required packages
libs <- c( "ggplot2", "grid", "RColorBrewer" )
for( i in libs ) {
if( !require( i, character.only=T ) ) {
cat( "Couldn't load", i, "\n" )
stop( "Use install.packages() to download this library\nOr use the GUI Package Installer\nInclude dependencies, and install it for local user if you do not have root access\n" )
}
library( i, character.only=T )
}
###################################################################################################################################################
################################################### Assign function to wrap text in plot titles ###################################################
###################################################################################################################################################
wrapper <- function(x, ...)
{
paste(strwrap(x, ...), collapse = "\n")
}
###################################################################################################################################################
########################### Assign function to plot variables over time with shaded uncertainty range for each scenario ###########################
###################################################################################################################################################
plot_uncertainty <- function(start_year=2010, end_year=2051, varname, ylabel, file_tag="",
data_dir_a = "./outputs", figdirs_a=c("mean","low","high"), scenarios_a, scen_labs_a,
data_dir_b = NA, figdirs_b = NA, scenarios_b = NA, scen_labs_b = NA,
data_dir_c = NA, figdirs_c = NA, scenarios_c = NA, scen_labs_c = NA,
reg = "All_region", lt = "All_land", own="All_own") {
outputdir_a = paste0(data_dir_a, "/")
outputdir_b = paste0(data_dir_b, "/")
outputdir_c = paste0(data_dir_c, "/")
num_reg = length(reg)
num_lt = length(lt)
num_own = length(own)
num_scen_a = length(scenarios_a)
num_scen_b = length(scenarios_b)
num_scen_c = length(scenarios_c)
if(num_scen_a != length(scen_labs_a)) {
stop( "Ensure that the length and content of scen_labs_a matches scenarios_a\n" )
}
if(num_scen_b != length(scen_labs_b)) {
stop( "Ensure that the length and content of scen_labs_b matches scenarios_b\n" )
}
if(num_scen_c != length(scen_labs_c)) {
stop( "Ensure that the length and content of scen_labs_c matches scenarios_c\n" )
}
# determine how many scenario groups to plot
if(is.na(data_dir_b) & is.na(data_dir_c)) { num_groups = 1
} else if ( (!is.na(data_dir_b) & is.na(data_dir_c)) | (is.na(data_dir_b) & !is.na(data_dir_c)) ) { num_groups = 2
} else {num_groups = 3}
# need to add an underscore if an optional file tag is input
if (nchar(file_tag) > 0) { added_file_tag = paste0("_", file_tag)
} else {added_file_tag = file_tag}
for (r in 1:num_reg) {
reg_lab = reg[r]
# loop over the ownerships
for (o in 1:num_own) {
own_lab = own[o]
# loop over land types to get data
for (l in 1:num_lt) {
lt_lab = lt[l]
# create empty all_df table to fill with the mean, low and hi outputs below
all_df = NULL
# loop over the scenario groups for read in
for (g in 1:num_groups) {
# determine which group variables to use
if(g==1) {
outputdir = outputdir_a
figdirs = figdirs_a
scenarios = scenarios_a
num_scen = num_scen_a
group_label = figdirs_a[1]
} else if (g==2) {
outputdir = outputdir_b
figdirs = figdirs_b
scenarios = scenarios_b
num_scen = num_scen_b
group_label = figdirs_b[1]
} else {
outputdir = outputdir_c
figdirs = figdirs_c
scenarios = scenarios_c
num_scen = num_scen_c
group_label = figdirs_c[1]
}
# create empty group_df table to fill with the mean, low and hi outputs below
group_df = NULL
# read in the 3 files for scenario group a (mean, lo, hi) and select scenarios and rows in start:end_year
for (f in 1:3) {
fig_lab <- figdirs[f]
fname = paste0(outputdir, fig_lab, "/", reg_lab, "/", lt_lab, "/", own_lab, "/", reg_lab, "_", lt_lab, "_", own_lab, "_", varname, "_output.csv")
in_df_pre = read.csv(fname)
# the mean scenario name is input, so need to find the low and high scenario names
# for carbon input uncertainty the different levels will be in different figdirs, so these are the basis for which is which
# for cultivated soil conservation uncertainty, the different levels can be in the same file, so need to account for this
# save each (mean, lo, hi) as their own column
if (f==1) {
# mean
# extract the years specified between start_year and end_year for each desired scenario
in_df <- in_df_pre[in_df_pre$Scenario %in% scenarios & in_df_pre$Year %in% start_year:end_year,]
# subset the current region and ownership for given land type
in_df = in_df[in_df$Region == reg_lab & in_df$Ownership == own_lab,]
# add these to the group df
group_df <- rbind(group_df, in_df)
colnames(group_df)[ncol(group_df)] <- "Mean"
} else if (f==2) {
# low
if(figdirs[2] == figdirs[1]) { # soil conservation uncertainty
# need to get the low soil conservation cases
snames_all = NULL
for (n in 1:num_scen) {
pre <- substr(scenarios[n], 1, regexpr("_output", scenarios[n])-1)
post <- substr(scenarios[n], regexpr("_BC", scenarios[n]), nchar(scenarios[n]))
snames = grep(pre, in_df_pre$Scenario, value = TRUE)
snames = grep(post, snames, value = TRUE)
snames = grep("S-", snames, value = TRUE, fixed = TRUE)
snames = unique(snames)
snames_all = c(snames_all, snames)
} # end n loop over desired scenarios
# extract the years specified between start_year and end_year for each desired scenario
in_df <- in_df_pre[in_df_pre$Scenario %in% snames_all & in_df_pre$Year %in% start_year:end_year,]
} else { # carbon input uncertainty
# just match the scenario name, the soil tag, the BC tag, and the NR tag
snames_all = NULL
for (n in 1:num_scen) {
pre <- substr(scenarios[n], 1, regexpr("_output", scenarios[n])-1)
post <- substr(scenarios[n], regexpr("_BC", scenarios[n]), nchar(scenarios[n]))
soil <- substr(scenarios[n], regexpr("_S", scenarios[n]), regexpr("_BC", scenarios[n])-1)
if(regexpr("_S", scenarios[n]) < 0) {soil = "_S=mean"}
snames = grep(pre, in_df_pre$Scenario, value = TRUE)
snames = grep(post, snames, value = TRUE)
snames = grep(soil, snames, value = TRUE, fixed = TRUE)
snames = unique(snames)
snames_all = c(snames_all, snames)
} # end n loop over desired scenarios
# extract the years specified between start_year and end_year for each desired scenario
in_df <- in_df_pre[in_df_pre$Scenario %in% snames_all & in_df_pre$Year %in% start_year:end_year,]
} # end if-else soil cons vs carbon uncertainty
# subset the current region and ownership for given land type
in_df = in_df[in_df$Region == reg_lab & in_df$Ownership == own_lab,]
# add these to the group df
group_df <- cbind(group_df,in_df$Value)
colnames(group_df)[ncol(group_df)] <- "Min"
} else {
# high
if(figdirs[3] == figdirs[1]) { # soil conservation uncertainty
# need to get the high soil conservation cases
snames_all = NULL
for (n in 1:num_scen) {
pre <- substr(scenarios[n], 1, regexpr("_output", scenarios[n])-1)
post <- substr(scenarios[n], regexpr("_BC", scenarios[n]), nchar(scenarios[n]))
snames = grep(pre, in_df_pre$Scenario, value = TRUE)
snames = grep(post, snames, value = TRUE)
snames = grep("S+", snames, value = TRUE, fixed = TRUE)
snames = unique(snames)
snames_all = c(snames_all, snames)
} # end n loop ove desired scenarios
# extract the years specified between start_year and end_year for each desired scenario
in_df <- in_df_pre[in_df_pre$Scenario %in% snames_all & in_df_pre$Year %in% start_year:end_year,]
} else { # carbon input uncertainty
# just match the scenario name, the soil tag, the BC tag, and the NR tag
snames_all = NULL
for (n in 1:num_scen) {
pre <- substr(scenarios[n], 1, regexpr("_output", scenarios[n])-1)
post <- substr(scenarios[n], regexpr("_BC", scenarios[n]), nchar(scenarios[n]))
soil <- substr(scenarios[n], regexpr("_S", scenarios[n]), regexpr("_BC", scenarios[n])-1)
if(regexpr("_S", scenarios[n]) < 0) {soil = "_S=mean"}
snames = grep(pre, in_df_pre$Scenario, value = TRUE)
snames = grep(post, snames, value = TRUE)
snames = grep(soil, snames, value = TRUE, fixed = TRUE)
snames = unique(snames)
snames_all = c(snames_all, snames)
} # end n loop over desired scenarios
# extract the years specified between start_year and end_year for each desired scenario
in_df <- in_df_pre[in_df_pre$Scenario %in% snames_all & in_df_pre$Year %in% start_year:end_year,]
} # end if-else soil cons vs carbon uncertainty
# subset the current region and ownership for given land type
in_df = in_df[in_df$Region == reg_lab & in_df$Ownership == own_lab,]
# add these to the group df
group_df <- cbind(group_df,in_df$Value)
colnames(group_df)[ncol(group_df)] <- "Max"
} # end if-else for figdirs
} # end f loop reading in each file for a scenario group (mean, lo, hi)
# add group label column
group_df$Group = group_label
# add group_df to all_df
all_df <- rbind(all_df, group_df)
} # end g loop over scenario groups
# get full scenario names
scen_names <- as.character(unique(all_df$Scenario))
# get number of scenarios
num_scen_all <- length(scen_names)
# extract beginning of scenario full name (use first scenario as sample which is not to be included in the plot title)
scen_lab <- substr(scen_names[1], 1, regexpr("_output", scen_names[1])-1)
# extract NWL scenario version, NR setting (default=default), and climate
file_lab <- paste0("v",gsub("^.*?_v", "", scen_lab))
# create plot title combining the geography, version/assumptions, and variable
title <- paste(reg_lab, own_lab, lt_lab, file_lab, varname)
# replace the names of scenarios for plotting
all_df$Scenario <- as.character(all_df$Scenario)
for(n in 1:num_scen_all) {
all_df$Scenario[all_df$Scenario==scenarios_a[n]] = scen_labs_a[n]
all_df$Scenario[all_df$Scenario==scenarios_b[n]] = scen_labs_b[n]
all_df$Scenario[all_df$Scenario==scenarios_c[n]] = scen_labs_c[n]
}
# get list of scenario labels
scen_labels = as.character(unique(all_df$Scenario))
### plot scenarios on one plot
p <- ggplot(all_df) + geom_line(aes(x=Year, y=Mean, linetype=Scenario, color=Scenario)) + ylab(ylabel) +
geom_ribbon(data=all_df,aes(x=Year, y=Mean, ymin=Min,ymax=Max, group=Scenario, linetype=Scenario),alpha=0.075) +
ggtitle(wrapper(title, width=40)) + theme_bw() + theme(legend.text=element_text(size=12),
plot.title = element_text(size=8),
axis.text=element_text(size=12),
axis.title.y=element_text(size=12),
axis.title.x=element_text(size=12),
legend.title=element_text(size=12))
# save plot as pdf
out_file <- paste0(outputdir_a, figdirs_a[1], "/", reg_lab, "/", lt_lab , "/", own_lab, "/", reg_lab, "_", lt_lab, "_", own_lab, "_", varname, "_scen_comp_uncert_bands", added_file_tag, ".pdf")
ggsave(out_file,dpi=300, width=2560/300, height=1540/300)
# save df as csv
out_csv <- gsub(".pdf", ".csv", out_file)
write.csv(all_df, out_csv, row.names=FALSE)
#### plot scenarios individually
for ( s in 1:num_scen_all) {
# create plot title combining the geography, scenario, version/assumptions, and variable
title <- paste(reg_lab, own_lab, lt_lab, scen_labels[s], file_lab, varname)
p <- ggplot(all_df[all_df$Scenario==scen_labels[s],]) + geom_line(aes(x=Year, y=Mean, linetype=Scenario)) + ylab(ylabel) +
geom_ribbon(data=all_df[all_df$Scenario==scen_labels[s],],
aes(x=Year, y=Mean, ymin=Min,ymax=Max, group=Scenario),alpha=0.2) +
ggtitle(wrapper(title, width=40)) + theme_bw() + theme(legend.text=element_text(size=12),
plot.title = element_text(size=8),
axis.text=element_text(size=12),
axis.title.y=element_text(size=12),
axis.title.x=element_text(size=12),
legend.title=element_text(size=12))
out_file = paste0(outputdir_a, figdirs_a[1], "/", reg_lab, "/", lt_lab , "/", own_lab, "/", reg_lab, "_", lt_lab, "_", own_lab, "_", varname, "_scen_comp_uncert_bands", added_file_tag, "_", scen_labels[s], ".pdf")
ggsave(out_file,dpi=300, width=2560/300, height=1540/300)
} # end s loop plot individual scenarios
} # end for l loop over reading land types
} # end o loop over ownerships
} # end r loop over regions
} # end plot_uncertainty()