Skip to content
This repository has been archived by the owner on Jun 2, 2023. It is now read-only.

Commit

Permalink
Merge pull request #19 from lekoenig/add-nhd-met-data
Browse files Browse the repository at this point in the history
Add nhd met data
  • Loading branch information
lkoenig-usgs authored Sep 6, 2022
2 parents f328c58 + 0cb3652 commit f4ab37a
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 64 deletions.
103 changes: 75 additions & 28 deletions 1_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,18 @@ source("1_fetch/src/read_netcdf.R")

p1_targets_list <- list(

# Read in the NHM - NHDv2 crosswalk file
# Important! This pipeline uses two versions of the NHM-NHDv2 crosswalk table
# for different purposes. All targets pertaining to the "dendritic" version
# of the crosswalk table will include the string "dendritic" in the name.
# Crosswalk #1: Read in the NHM - NHDv2 crosswalk file that contains all
# NHDPlusv2 COMIDs in the DRB (including divergent reaches and reaches
# without catchments)
tar_target(
p1_GFv1_NHDv2_xwalk,
read_csv(GFv1_NHDv2_xwalk_url,col_types = cols(.default = "c"))
),

# Reshape crosswalk table to return all COMIDs that overlap each NHM segment
tar_target(
p1_drb_comids_segs,
p1_GFv1_NHDv2_xwalk %>%
select(PRMS_segid, segidnat, comid_seg) %>%
tidyr::separate_rows(comid_seg,sep=";") %>%
rename(COMID = comid_seg)
),

# Reshape crosswalk table to return all COMIDs that drain to each NHM segment
# Reshape crosswalk table to return all COMIDs that drain to each NHM segment.
tar_target(
p1_drb_comids_all_tribs,
p1_GFv1_NHDv2_xwalk %>%
Expand All @@ -31,25 +27,38 @@ p1_targets_list <- list(
rename(COMID = comid_cat)
),

# Use crosswalk table to fetch just the NHDv2 reaches that overlap the NHM network
# Reshape crosswalk table to return all COMIDs that overlap each NHM segment.
tar_target(
p1_nhd_reaches_along_NHM,
download_nhdplus_flowlines(p1_drb_comids_segs$COMID)
p1_drb_comids_segs,
p1_GFv1_NHDv2_xwalk %>%
select(PRMS_segid, segidnat, comid_seg) %>%
tidyr::separate_rows(comid_seg,sep=";") %>%
rename(COMID = comid_seg)
),

# Use crosswalk table to fetch all NHDv2 reaches in the DRB
# Use crosswalk table to fetch all NHDv2 reaches in the DRB. These COMIDs
# should be used for preparing feature data, including aggregating feature
# values from the NHD-scale to the NHM-scale and/or for deriving feature
# values from raster data.
tar_target(
p1_nhd_reaches,
download_nhdplus_flowlines(p1_drb_comids_all_tribs$COMID)
),

## May take a while to run
# Use crosswalk table to fetch just the NHDv2 reaches that overlap the NHM network.
tar_target(
p1_nhd_reaches_along_NHM,
download_nhdplus_flowlines(p1_drb_comids_segs$COMID)
),

# Download all NHDPlusv2 catchments in the DRB (may take awhile to run).
tar_target(
p1_nhd_catchments,
get_nhdplusv2_catchments(comid = p1_nhd_reaches$comid)
),

## polygons are analogous to HRU
# Dissolve NHDPlusv2 catchments to create a single catchment polygon for
# each NHM segment (analogous to HRU).
tar_target(
p1_nhm_catchments_dissolved,
{sf_use_s2(FALSE)
Expand All @@ -61,13 +70,32 @@ p1_targets_list <- list(
}
),

# Track depth to bedrock raster dataset in 1_fetch/in
# Crosswalk #2: Read in the NHM - NHDv2 crosswalk file that corresponds to
# the *dendritic* network only (i.e., divergent reaches have been omitted).
# This version of the crosswalk table was used to build the network distance
# matrix in drb-network-prep, and so includes the COMIDs that we will make
# predictions on in the NHD-downscaling set of experiments.
tar_target(
p1_depth_to_bedrock_tif,
Shangguan_dtb_cm_250m_clip_path,
format = "file"
p1_GFv1_NHDv2_xwalk_dendritic,
read_csv(GFv1_NHDv2_xwalk_dendritic_url,col_types = cols(.default = "c"))
),

# Reshape dendritic crosswalk table to return all COMIDs that overlap each
# NHM segment
tar_target(
p1_drb_comids_dendritic_segs,
p1_GFv1_NHDv2_xwalk_dendritic %>%
select(PRMS_segid, segidnat, comid_seg) %>%
tidyr::separate_rows(comid_seg,sep=";") %>%
rename(COMID = comid_seg)
),

# Use crosswalk table to fetch just the dendritic NHDv2 reaches that overlap
# the NHM network
tar_target(
p1_dendritic_nhd_reaches_along_NHM,
download_nhdplus_flowlines(p1_drb_comids_dendritic_segs$COMID)
),

# Manually download temperature site locations from ScienceBase using the
# commented-out code below and place the downloaded zip file in 1_fetch/in.
Expand All @@ -78,7 +106,6 @@ p1_targets_list <- list(
# download_sb_file(sb_id = "623e54c4d34e915b67d83580",
# file_name = "study_monitoring_sites.zip",
# out_dir = "1_fetch/in")

tar_target(
p1_drb_temp_sites_shp,
{
Expand Down Expand Up @@ -147,6 +174,17 @@ p1_targets_list <- list(
read_netcdf(p1_sntemp_input_output_nc)
),

# Read in meteorological data aggregated to NHDPlusV2 catchments for the
# DRB (prepped in https://github.com/USGS-R/drb_gridmet_tools). Note that
# the DRB met data file must be stored in 1_fetch/in. If working outside
# of tallgrass/caldera, this file will need to be downloaded from the
# PGDL-DO project's S3 bucket and manually placed in 1_fetch/in.
tar_target(
p1_drb_nhd_gridmet,
"1_fetch/in/drb_climate_2022_06_14.nc",
format = "file"
),

# Download ref-gages v0.6 to help QC matching NWIS sites to NHDv2 flowlines
tar_target(
p1_ref_gages_geojson,
Expand All @@ -163,16 +201,16 @@ p1_targets_list <- list(
),

# STATSGO SOIL Characteristics
## get selected child items nhdv2 STATSGO Soil Characteristics
## 1) Text attributes and 2) Layer attributes
# get selected child items nhdv2 STATSGO Soil Characteristics,
# including 1) texture and 2) layer attributes.
tar_target(
p1_selected_statsgo_sbid_children,
sbtools::item_list_children(sb_id = nhd_statsgo_parent_sbid) %>%
Filter(function(x){str_detect(x[['title']],'Text|Layer')},
.)
),
),

## download selected CONUS STATSGO datasets from Science base
# Download selected CONUS STATSGO datasets from Science base
tar_target(
p1_download_statsgo_text_layer_zip,
lapply(p1_selected_statsgo_sbid_children,
Expand All @@ -182,9 +220,10 @@ p1_targets_list <- list(
overwrite_file = TRUE)}
) %>%
unlist(),
format = 'file'),
format = 'file'
),

## Combine statsgo TEXT and Layer Attributes for CAT and TOT and filter to drb
# Combine statsgo TEXT and Layer Attributes for CAT and TOT and filter to drb
tar_target(
p1_statsgo_soil_df,
sb_read_filter_by_comids(data_path = '1_fetch/out/statsgo',
Expand All @@ -194,5 +233,13 @@ p1_targets_list <- list(
"NO4AVE","SILTAVE","CLAYAVE",
"SANDAVE",'WTDEP'),
cbind = TRUE)
),

# Track depth to bedrock raster dataset in 1_fetch/in
tar_target(
p1_depth_to_bedrock_tif,
Shangguan_dtb_cm_250m_clip_path,
format = "file"
)

)
116 changes: 84 additions & 32 deletions 2_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,18 @@ source("2_process/src/subset_closest_nhd.R")
source('2_process/src/raster_in_polygon_weighted_mean.R')
source("2_process/src/estimate_mean_width.R")
source("2_process/src/write_data.R")
source("2_process/src/combine_nhd_input_drivers.R")

p2_targets_list <- list(

# Subset NHDv2 reaches that overlap the NHM network to only include those
# that have a corresponding catchment (and meteorological data)
tar_target(
p2_dendritic_nhd_reaches_along_NHM_w_cats,
p1_dendritic_nhd_reaches_along_NHM %>%
filter(areasqkm > 0)
),

# Match temperature monitoring locations to "mainstem" NHDPlusv2 flowline
# reaches, i.e. those that NHD reaches that intersect the NHM river network.
# Note that this site-to-segment matching procedure emulates the process
Expand All @@ -13,7 +22,7 @@ p2_targets_list <- list(
# for which the downstream vertex (endpoint) is close to the site point.
tar_target(
p2_drb_temp_sites_w_segs,
subset_closest_nhd(nhd_lines = p1_nhd_reaches_along_NHM,
subset_closest_nhd(nhd_lines = p2_dendritic_nhd_reaches_along_NHM_w_cats,
sites = p1_drb_temp_sites_sf)
),

Expand Down Expand Up @@ -60,15 +69,15 @@ p2_targets_list <- list(
# Estimate mean width for each "mainstem" NHDv2 reach
tar_target(
p2_nhd_mainstem_reaches_w_width,
estimate_mean_width(p1_nhd_reaches_along_NHM,
estimate_mean_width(p2_dendritic_nhd_reaches_along_NHM_w_cats,
estimation_method = 'nwis',
network_pos_variable = 'arbolate_sum',
ref_gages = p1_ref_gages_sf)
),

# Pull static segment attributes from PRMS SNTemp model driver data
tar_target(
p2_input_drivers_prms,
p2_static_input_drivers_prms,
p1_sntemp_input_output %>%
group_by(seg_id_nat) %>%
summarize(seg_elev = unique(seg_elev),
Expand All @@ -78,42 +87,85 @@ p2_targets_list <- list(
select(segidnat, seg_elev, seg_slope, seg_width)
),

# Compile river-dl input drivers at NHDv2 resolution, including river width
# (meters), slope (unitless), and min/max elevation (transformed to meters
# from cm). Note that these input drivers represent "mainstem" NHDv2 reaches
# only (i.e., those reaches that intersect the NHM fabric).
# Pull dynamic segment attributes from PRMS SNTemp model driver data
tar_target(
p2_dynamic_input_drivers_prms,
p1_sntemp_input_output %>%
mutate(segidnat = as.character(seg_id_nat)) %>%
select(segidnat, date, seginc_potet)
),

# Subset the DRB meteorological data to only include the NHDPlusv2 catchments
# (COMID) that intersect the NHM segments. `subset_nc_to_comid()` originally
# developed by Jeff Sadler as part of the PGDL-DO project:
# https://github.com/USGS-R/drb-do-ml/blob/main/2_process/src/subset_nc_to_comid.py
# The resulting target is ~0.6 GB.
tar_target(
p2_met_data_nhd_mainstem_reaches,
{
reticulate::source_python("2_process/src/subset_nc_to_comid.py")
subset_nc_to_comids(p1_drb_nhd_gridmet,
p2_nhd_mainstem_reaches_w_width$comid) %>%
as_tibble() %>%
relocate(c(COMID,time), .before = "tmmx") %>%
# format dates
mutate(date = lubridate::as_date(time, tz = "UTC")) %>%
# convert gridmet precip units from inches to meters, and temperature
# units from degrees Farenheit to degrees Celsius.
# Note that we average the daily minimum and maximum temperatures from
# gridmet and call that "seg_tave_air", which we assume corresponds
# approximately to the PRMS-SNTemp variable with the same name.
mutate(tmmean = rowMeans(select(., c(tmmn,tmmx))),
seg_tave_air = ((tmmean - 32) * (5/9)),
seg_rain = pr * 0.0254) %>%
# rename gridmet columns to conform to PRMS-SNTemp names used in river-dl
select(COMID, date, seg_tave_air, srad, seg_rain) %>%
rename(seginc_swrad = srad)
}
),

# Compile river-dl static input drivers at NHDv2 resolution, including river
# width (m) slope (unitless), and min/max elevation (m). Note that these input
# drivers represent "mainstem" NHDv2 reaches only (i.e., those reaches that
# intersect the NHM fabric). Some of the columns in p2_static_input_drivers_nhd
# are meant to facilitate comparison/EDA between segment attributes at the NHM and
# NHDPlusv2 scales (i.e., seg_slope ~ slope_len_wtd_mean; seg_elev ~ seg_elev_min;
# seg_width ~ seg_width_max).
tar_target(
p2_static_input_drivers_nhd,
prepare_nhd_static_inputs(nhd_flowlines = p2_nhd_mainstem_reaches_w_width,
prms_inputs = p2_static_input_drivers_prms,
nhd_nhm_xwalk = p1_drb_comids_all_tribs)
),

# Combine NHD-scale static input drivers with dynamic climate drivers.
# Note that there is currently no pot ET analog variable at the NHD-scale,
# so we are using seginc_potet from PRMS-SNTemp and assuming that there is
# not much intra-segment variation in potential ET among NHD reaches that
# contribute to a given NHM segment. The resulting target contains 15,800
# days of climate data across each of 3,173 COMIDs = 50,133,400 total rows.
tar_target(
p2_input_drivers_nhd,
p2_nhd_mainstem_reaches_w_width %>%
sf::st_drop_geometry() %>%
mutate(COMID = as.character(comid),
min_elev_m = minelevsmo/100,
max_elev_m = maxelevsmo/100,
slope = if_else(slope == -9998, NA_real_, slope)) %>%
# add NHM segment identifier segidnat
left_join(y = p1_drb_comids_all_tribs, by = "COMID") %>%
rename(subsegid = PRMS_segid) %>%
# calculate length-weighted average slope for NHDv2 reaches associated
# with each NHM reach. For simplicity, weight by the reach length rather
# than another value-added attribute, slopelenkm, which represents the
# length over which the NHDv2 attribute slope was computed.
group_by(subsegid) %>%
mutate(slope_len_wtd_mean = weighted.mean(x = slope, w = lengthkm, na.rm = TRUE),
seg_width_max = max(est_width_m, na.rm = TRUE),
seg_elev_min = min(min_elev_m, na.rm = TRUE)) %>%
ungroup() %>%
# join select attributes from PRMS-SNTemp
left_join(y = p2_input_drivers_prms, by = "segidnat") %>%
select(COMID, segidnat, subsegid, est_width_m, slope, slopelenkm, slope_len_wtd_mean,
lengthkm, min_elev_m, max_elev_m, seg_elev, seg_slope, seg_width, seg_width_max,
seg_elev_min)
{
static_inputs <- p2_static_input_drivers_nhd %>%
select(COMID, segidnat, subsegid,
est_width_m, min_elev_m, slope) %>%
rename(seg_width_mean = est_width_m,
seg_elev = min_elev_m,
seg_slope = slope)
combine_nhd_input_drivers(nhd_static_inputs = static_inputs,
climate_inputs = p2_met_data_nhd_mainstem_reaches,
prms_dynamic_inputs = p2_dynamic_input_drivers_prms,
earliest_date = "1979-01-01",
latest_date = "2022-04-04")
}
),

# Save river-dl input drivers at NHDv2 resolution as a feather file
# Save river-dl input drivers at NHDv2 resolution as a zarr data store
tar_target(
p2_input_drivers_nhd_zarr,
write_df_to_zarr(p2_input_drivers_nhd,
index_cols = c("COMID"),
index_cols = c("date", "COMID"),
"2_process/out/nhdv2_inputs_io.zarr"),
format = "file"
)
Expand Down
Loading

0 comments on commit f4ab37a

Please sign in to comment.