diff --git a/.gitignore b/.gitignore index 752c28f..f6b39ce 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,4 @@ vignettes/tutorial check runners/secret runners/data -in-progress +in-progress \ No newline at end of file diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 0880d33..ee609aa 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -1,104 +1,113 @@ # Generate the flowlines layer for the final cross_sections_.gpkg for each VPU -# source("runners/cs_runner/config.R") +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") -# # load libraries -# library(terrainSliceR) +# # # # load libraries +# library(hydrofabric3D) # library(dplyr) # library(sf) +# install.packages("devtools") -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" - -# transect bucket prefix -transects_prefix <- paste0(s3_bucket, "v20/3D/transects/") +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -model_attr_files <- list.files(model_attr_dir, full.names = FALSE) - -# string to fill in "cs_source" column in output datasets -net_source <- "terrainSliceR" +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) +# model_attr_files <- list.files(MODEL_ATTR_DIR, full.names = FALSE) # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, - y = model_attr_files, - base = base_dir + x = NEXTGEN_FILES, + y = NEXTGEN_FILES, + base = BASE_DIR ) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { - + # nextgen file and full path nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) - # model attributes file and full path - model_attr_file <- path_df$y[i] - model_attr_path <- paste0(model_attr_dir, model_attr_file) + vpu <- path_df$vpu[i] - message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + # Get FEMA by VPU directory and files for current VPU + fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))]) + # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + + vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)] + + message("Creating VPU ", vpu, " transects:", + "\n - flowpaths: '", + nextgen_file, "'", + "\n - FEMA polygons: '", + basename(vpu_fema_file), "'" + ) + + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + # sf::write_sf( + # dplyr::slice(dplyr::filter(flines, order == 2), 2), + # "/Users/anguswatters/Desktop/example_flowline.gpkg" + # ) # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - - # model attributes - model_attrs <- arrow::read_parquet(model_attr_path) - - # join flowlines with model atttributes - flines <- dplyr::left_join( - flines, - dplyr::select( - model_attrs, - id, eTW - ), - by = "id" - ) # calculate bankfull width flines <- flines %>% - dplyr::mutate( - bf_width = 11 * eTW - ) %>% - dplyr::mutate( # if there are any NAs, use exp(0.700 + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width - bf_width = dplyr::case_when( - is.na(bf_width) ~ exp(0.700 + 0.365* log(tot_drainage_areasqkm)), - TRUE ~ bf_width - ) - ) %>% + hydrofabric3D::add_powerlaw_bankful_width( + total_drainage_area_sqkm_col = "tot_drainage_areasqkm", + min_bf_width = 50 + ) %>% dplyr::select( hy_id = id, lengthkm, tot_drainage_areasqkm, bf_width, + mainstem, geometry = geom ) - + # dplyr::mutate( + # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) + # ) %>% + # dplyr::select( + # hy_id = id, + # lengthkm, + # tot_drainage_areasqkm, + # bf_width, + # mainstem, + # geometry = geom + # ) + # flines$bf_width <- ifelse(is.na(flines$bf_width), exp(0.700 + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width) time1 <- Sys.time() - # system.time({ - # create transect lines - transects <- terrainSliceR::cut_cross_sections( - net = flines, # flowlines network - id = "hy_id", # Unique feature ID - cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") - num = 10, # number of cross sections per "id" linestring ("hy_id") - smooth = TRUE, # smooth lines - densify = 3, # densify linestring points - rm_self_intersect = TRUE, # remove self intersecting transects - fix_braids = FALSE, # whether to fix braided flowlines or not - #### Arguments used for when fix_braids = TRUE - # terminal_id = NULL, - # braid_threshold = NULL, - # version = 2, - # braid_method = "comid", - # precision = 1, - add = TRUE # whether to add back the original data - ) - # }) - + + # create transect lines + transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = "hy_id", # Unique feature ID + cs_widths = flines$bf_width, # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data + ) + + gc() + time2 <- Sys.time() time_diff <- round(as.numeric(time2 - time1 ), 2) @@ -106,43 +115,126 @@ for(i in 1:nrow(path_df)) { # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg") - out_path <- paste0(transects_dir, out_file) - - message("Saving transects to:\n - filepath: '", out_path, "'") + out_path <- paste0(TRANSECTS_DIR, out_file) - # add cs_source column and keep just the desired columns to save and upload to S3 - transects <- + # add cs_source column and rename cs_widths to cs_lengthm + transects <- transects %>% dplyr::mutate( - cs_source = net_source - ) %>% + cs_source = CS_SOURCE + ) + + # --------------------------------------------------------------------- + # --- Extend transects out to FEMA 100yr floodplains + # --------------------------------------------------------------------- + message("Reading in FEMA polygons...") + + # fema polygons and transect lines + fema <- sf::read_sf(vpu_fema_file) + + message("Simplifying FEMA polygons...") + message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + + # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. + # keep 1% of the original points for speed + fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + + message(" - Number of geoms AFTER simplifying: ", nrow(fema)) + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + + transects <- + transects %>% + dplyr::left_join( + dplyr::select(sf::st_drop_geometry(flines), + hy_id, + mainstem + ), + by = "hy_id" + ) + + # TODO: make sure this 3000m extension distance is appropriate across VPUs + # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + crosswalk_id = "hy_id", + grouping_id = "mainstem", + max_extension_distance = 3000 + ) + + message("FEMA extensions complete! - ( ", Sys.time(), " )") + + transects <- dplyr::select(transects, -tmp_id) + transects <- hydrofabric3D::add_tmp_id(transects) + + transects <- + transects %>% + # dplyr::select(-cs_lengthm) %>% + # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% dplyr::select( - hy_id, + hy_id, + cs_id, + cs_lengthm, + # cs_lengthm = new_cs_lengthm, cs_source, - cs_id, cs_measure, - cs_lengthm = cs_widths, geometry + # is_extended, + # is_fema_extended, + # geometry = geom ) - - # save flowlines to out_path (lynker-spatial/01_transects/transects_.gpkg) + + gc() + + # # --------------------------------------------------------------------- + message("Saving transects to:\n - filepath: '", out_path, "'") + + # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_.gpkg) sf::write_sf( - transects, + # save dataset with only subset of columns to upload to S3 + dplyr::select(transects, + hy_id, + cs_source, + cs_id, + cs_measure, + cs_lengthm, + # sinuosity, + geometry + ), out_path - ) + ) # command to copy transects geopackage to S3 - if (!is.null(aws_profile)) { - copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - } else { - copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file) - } + copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_TRANSECTS_DIR, out_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)) + ) - message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", copy_to_s3, "'\n==========================") system(copy_to_s3, intern = TRUE) + + message("Overwritting local copy of transects to include 'is_extended' column...\n==========================") + + # Overwrite transects with additional columns for development purposes (is_extended) to have a local copy of dataset with information about extensions + sf::write_sf( + dplyr::select( + dplyr::mutate(transects, is_extended = FALSE), + hy_id, + cs_source, + cs_id, + cs_measure, + cs_lengthm, + # sinuosity, + is_extended, + geometry + ), + out_path + ) + + rm(fema, transects, flines) + gc() } diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index da6e3e8..1614e10 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -1,137 +1,509 @@ # Generate the flowlines layer for the final cross_sections_.gpkg for each VPU -# source("runners/cs_runner/config.R") +source("runners/cs_runner/config.R") # # load libraries -# library(terrainSliceR) -# library(dplyr) -# library(sf) - -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" - -# cross section bucket prefix -cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") +library(hydrofabric3D) +library(dplyr) +library(sf) # paths to nextgen datasets -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) # paths to nextgen datasets -transect_files <- list.files(transects_dir, full.names = FALSE) +transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) +transect_files <- transect_files[!grepl("updated_", transect_files)] + +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) -# string to fill in "cs_source" column in output datasets -cs_source <- "terrainSliceR" +# reference features dataframe +ref_df <- data.frame( + vpu = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = REF_FEATURES +) # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, - y = transect_files, - base = base_dir - ) + x = NEXTGEN_FILES, + y = transect_files, + base = BASE_DIR +) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 1:nrow(path_df)) { - +for (i in 20:nrow(path_df)) { + + start <- Sys.time() + # nextgen file and full path nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) # model attributes file and full path transect_file <- path_df$y[i] - transect_path <- paste0(transects_dir, transect_file) - message("Creating VPU ", path_df$vpu[i], " cross section points:\n - flowpaths: '", nextgen_file, "'\n - transects: '", transect_file, "'") + transect_path <- paste0(TRANSECTS_DIR, transect_file) + + # model attributes file and full path + ref_file <- path_df$ref_file[i] + ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) + + # current VPU being processed + VPU <- path_df$vpu[i] + + start <- Sys.time() + + message("Creating VPU ", VPU, + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - transects: '", transect_file, "'", + "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) ################### + message("Reading in transects...\n > ", transect_file) + # read in transects data + transects <- sf::read_sf(transect_path) - # read in transects data - transects <- sf::read_sf(transect_path) + message("Reading in flowlines... \n > ", nextgen_file) + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # read in nextgen data - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + message("Reading in waterbodies... \n > ", ref_file) + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] + + rm(waterbodies) + gc() - transects <- - transects %>% - dplyr::rename(lengthm = cs_lengthm) + start_cs_pts <- Sys.time() + # # ------------------------------------------------------------------------ + # # ------ TESTING DATA ------- + # # ------------------------------------------------------------------------ + # flines <- + # flines %>% + # dplyr::slice(1:3500) + # + # transects <- + # transects %>% + # dplyr::filter(hy_id %in% flines$id) - # get start time for log messages - time1 <- Sys.time() - - # get cross section point elevations - cs_pts <- terrainSliceR::cross_section_pts( - cs = transects[lengths(sf::st_intersects(transects, flines)) == 1, ], - points_per_cs = NULL, - min_pts_per_cs = 10, - dem = DEM_URL - ) - - # get end time for log messages - time2 <- Sys.time() - time_diff <- round(as.numeric(time2 - time1 ), 2) + # ------------------------------------------------------------------------ + + message("Extracting cross section points (", start_cs_pts, ")") + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 1: Extract cs points from DEM ---- + # ---------------------------------------------------------------------------------------------------------------- + # system.time({ + + # get cross section point elevations + cs_pts <- hydrofabric3D::cross_section_pts( - message("\n\n ---> Cross section point elevations processed in ", time_diff) + cs = transects, + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH + ) + + # }) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- + # ---------------------------------------------------------------------------------------------------------------- + + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg") + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg") + + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg") + # cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() + # + # cs_pts %>% + # hydrofabric3D::drop_incomplete_cs_pts("hy_id") + + # system.time({ + + # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points + cs_pts <- + # cs_pts2 <- + cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% + hydrofabric3D::classify_points( + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + # }) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- + # ---------------------------------------------------------------------------------------------------------------- + # dplyr::rename(flines, hy_id = id) + # profvis::profvis({ + # system.time({ - # Remove any cross section that has ANY missing (NA) Z values. - cs_pts <- - cs_pts %>% - # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::filter(!any(is.na(Z))) %>% - dplyr::ungroup() + # # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief + # fixed_pts <- hydrofabric3D::improve_invalid_cs2( + # cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + # net = dplyr::rename(flines, hy_id = id), # original flowline network + # # net = flines, # original flowline network + # transects = transects, # original transect lines + # points_per_cs = NULL, + # min_pts_per_cs = 10, # number of points per cross sections + # dem = DEM_PATH, # DEM to extract points from + # scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + # fix_ids = FALSE, + # verbose = TRUE + # ) + + + # system.time({ + fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = dplyr::rename(flines, hy_id = id), # original flowline network + # net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) + # }) + + # transects %>% + # sf::st_drop_geometry() %>% + # dplyr::group_by(id, cs_id) + # fixed_pts2$is_extended %>% sum() + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more + # fixed_pts <- hydrofabric3D::rectify_cs( + # # cs_pts <- hydrofabric3D::rectify_flat_cs( + # cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + # net = flines, # original flowline network + # transects = transects, # original transect lines + # points_per_cs = NULL, + # min_pts_per_cs = 10, # number of points per cross sections + # dem = DEM_PATH, # DEM to extract points from + # scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + # fix_ids = FALSE, + # verbose = TRUE + # ) + + # get a summary dataframe and print out details message + rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE) + rectify_summary <- + rectify_summary %>% + dplyr::mutate( + vpu = VPU + ) %>% + dplyr::relocate(vpu, metric, value) + + readr::write_csv(rectify_summary, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 4: Update transects with extended transects (if exists) ---- + # ---------------------------------------------------------------------------------------------------------------- + + # get the counts of each point type to add this data to the transects dataset + point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts) + # point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, crosswalk_id = "hy_id") + + # # check the number of cross sections that were extended + # fixed_pts$is_extended %>% table() + message("Subsetting cross section points generated after extending transects...") + + # extract cross section points that have an "is_extended" value of TRUE + extended_pts <- + fixed_pts %>% + dplyr::filter(is_extended) %>% + hydrofabric3D::add_tmp_id() + + # extract transects that have a "hy_id" in the "extended_pts" dataset + update_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) + + # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages + if (nrow(update_transects) > 0) { + message("Updating ", nrow(update_transects), " transects") - # classify the cross section points - cs_pts <- - cs_pts %>% - dplyr::rename(cs_widths = lengthm) %>% - terrainSliceR::classify_points() %>% - dplyr::mutate( - X = sf::st_coordinates(.)[,1], - Y = sf::st_coordinates(.)[,2] - ) %>% - dplyr::select( - hy_id, cs_id, pt_id, - cs_lengthm = cs_widths, - relative_distance, - X, Y, Z, - class + update_transects <- + update_transects %>% + # apply extend_by_percent function to each transect line: + hydrofabric3D:::extend_by_percent( + pct = EXTENSION_PCT, + length_col = "cs_lengthm" + ) + + # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() + # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") + # and then replace with old transects with the "update_transects" + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids + dplyr::bind_rows( # bring in the new updated extended transects + dplyr::mutate( + update_transects, + is_extended = TRUE ) + ) - # Drop point geometries, leaving just X, Y, Z values - cs_pts <- sf::st_drop_geometry(cs_pts) + } else { + + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) + } + + # finalize new transects + out_transects <- + out_transects %>% + dplyr::left_join( + point_type_counts, + by = c("hy_id", "cs_id") + ) %>% + dplyr::left_join( + dplyr::ungroup( + dplyr::slice( + dplyr::group_by( + dplyr::select(sf::st_drop_geometry(fixed_pts), + hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), + hy_id, cs_id), + 1) + ), + by = c("hy_id", "cs_id") + ) %>% + dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geom + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Re enumerate the transects & cross section points "cs_id" ---- + # ---------------------------------------------------------------------------------------------------------------- + + # make a dataframe that has a new_cs_id column that has + # the cs_id renumbered to fill in any missing IDs, + # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id + # The dataframe below will be used to join the "new_cs_id" with + # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets + renumbered_ids <- + fixed_pts %>% + sf::st_drop_geometry() %>% + # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% + dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% + dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::group_by(hy_id) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::select(new_cs_id, tmp_id) + + # Renumber the transects to have correct CS IDs + out_transects <- dplyr::left_join( + hydrofabric3D::add_tmp_id(out_transects), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::select(hy_id, cs_source, + cs_id = new_cs_id, + cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geometry = geom + ) + + # Renumber the cross sections points to have correct CS IDs + fixed_pts <- dplyr::left_join( + hydrofabric3D::add_tmp_id(fixed_pts), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::rename(cs_id = new_cs_id) + # ---------------------------------------------------------------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + # # Drop point geometries, leaving just X, Y, Z values + # fixed_pts <- sf::st_drop_geometry(fixed_pts) + # add Z_source column for source of elevation data - cs_pts <- - cs_pts %>% + fixed_pts <- + fixed_pts %>% dplyr::mutate( - Z_source = cs_source - ) %>% - dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class) + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) - ################### + ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Aligning banks and smoothing bottoms...") + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + + ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Reclassifying cross section points...") + fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + } else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + } + + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Re upload the updated transects geopackage to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path) + + ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections + message("Saving updated transects to:\n - filepath: '", updated_path, "'") + + # save flowlines to out_path (lynker-spatial/01_transects/transects_.gpkg) + sf::write_sf( + out_transects, + # transect_path + updated_path + ) + + # command to copy transects geopackage to S3 + trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", S3_TRANSECTS_DIR, transect_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))) + + message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", + trans_to_s3, + "'\n==========================") + + system(trans_to_s3, intern = TRUE) + + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") - out_path <- paste0(cs_pts_dir, out_file) + out_path <- paste0(CS_PTS_DIR, out_file) message("Saving cross section points to:\n - filepath: '", out_path, "'") # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_.parquet) - arrow::write_parquet(cs_pts, out_path) + arrow::write_parquet(fixed_pts, out_path) # command to copy cross section points parquet to S3 - if (!is.null(aws_profile)) { - copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - } else { - copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file) - - } - + copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))) + message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", - paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), + paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))), "'\n==========================") system(copy_cs_pts_to_s3, intern = TRUE) - + + end <- Sys.time() + + message("Finished cross section point generation for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(fixed_pts) + gc() + gc() } + +# ########################################################################################################################################### diff --git a/runners/cs_runner/03_driver.R b/runners/cs_runner/03_driver.R deleted file mode 100644 index cce576d..0000000 --- a/runners/cs_runner/03_driver.R +++ /dev/null @@ -1,13 +0,0 @@ -### Run this file to have all runner scripts run in order - -# downloads nextgen datasets -source("runners/cs_runner/config.R") - -# downloads nextgen datasets -source("runners/cs_runner/download_nextgen.R") - -# generate and upload transects datasets -source("runners/cs_runner/01_transects.R") - -# generate and upload cross sections points datasets -source("runners/cs_runner/02_cs_pts.R") diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R new file mode 100644 index 0000000..f29efc7 --- /dev/null +++ b/runners/cs_runner/03_inject_ml.R @@ -0,0 +1,390 @@ +# ---------------------------------------------------------------------------------------------------------------- +# ---- data paths ----- +# ---------------------------------------------------------------------------------------------------------------- +library(dplyr) +library(hydrofabric3D) +library(sf) +library(patchwork) + +# Generate the flowlines layer for the final cross_sections_.gpkg for each VPU +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# cross section bucket prefix +S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/") +# S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, "v20/3D/dem-cross-sections/") + +ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE) + +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) + +# paths to nextgen datasets +cs_files <- list.files(CS_PTS_DIR, full.names = FALSE) + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = NEXTGEN_FILES, + y = cs_files, + base = BASE_DIR +) +# dplyr::left_join( +# ref_df, +# by = "vpu" +# ) + +# ML Outputs +ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) + +# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# Save parquet locally and upload to specified S3 bucket +for (i in 1:nrow(path_df)) { + # i = 8 + start <- round(Sys.time()) + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) + + # model attributes file and full path + cs_file <- path_df$y[i] + cs_pts_path <- paste0(CS_PTS_DIR, cs_file) + + # current VPU being processed + VPU = path_df$vpu[i] + + message("Augmenting DEM cross sections with ML estimated widths/depths: ", VPU, + " cross section points:", + "'\n - cross section points: '", cs_file, "'", + "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_FILE, "'", + # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_URI, "'", + "\n - CONUS network file: '", CONUS_NETWORK_URI, "'", + "\n - flowpaths: '", nextgen_file, + # "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Read in data ----- + # ---------------------------------------------------------------------------------------------------------------- + message("Loading data...") + + # CONUS network parquet + net <- + CONUS_NETWORK_URI %>% + arrow::open_dataset() %>% + dplyr::filter(vpu == VPU) %>% + dplyr::collect() + + # Cross section points parquet + cs_pts <- arrow::read_parquet(cs_pts_path) + + # TODO: Not needed + # fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Extract the max hydroseq "hy_id" for each flowline in the CONUS network parquet ----- + # Use this to join "hy_id" to "hf_id" in ML outputs data + # ---------------------------------------------------------------------------------------------------------------- + + net_subset <- + net %>% + dplyr::select(id, hf_id, hf_hydroseq) %>% + # dplyr::filter(id %in% unique(cs$hy_id)) %>% + dplyr::filter(stringr::str_detect(id, "^wb-") & !is.na(id)) %>% + dplyr::group_by(id) %>% + dplyr::slice_max(hf_hydroseq, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(hy_id = id, + hf_id, + hf_hydroseq + ) + + number_hyids_in_cs_pts <- length(unique(cs_pts$hy_id )) + number_hyids_in_conus_net <- length(unique(net_subset$hy_id)) + + message("VPU ", VPU, ": ", + "\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'", + "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'" + ) + # ---------------------------------------------------------------------------------------------------------------- + # ---- Find the stream orders for the flowlines in the network ----- + # ---------------------------------------------------------------------------------------------------------------- + + # stream_order <- + # fline_net %>% + # sf::st_drop_geometry() %>% + # dplyr::select(hy_id = id, + # order, + # # hydroseq, + # tot_drainage_areasqkm + # ) %>% + # dplyr::filter(hy_id %in% net_subset$hy_id) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Subset ML data to specific VPU and add "hy_id" column to ML data ----- + # ---------------------------------------------------------------------------------------------------------------- + + # Join hy_id onto the ML outputs and then remove the rows WITHOUT matches in hy_id + # this should give us a (nearly) one-to-one cross walk between "hy_id" in the cross section points + # and "hf_id" in the ML outputs dataset + ml_subset <- + ml_output %>% + dplyr::left_join( + net_subset, + by = "hf_id" + ) %>% + dplyr::filter(!is.na(hy_id)) %>% + dplyr::relocate(hy_id, hf_id) + + message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + + # join the ML outputs data to the cross section points + cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_subset, + hy_id, + hf_id, + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r), + by = "hy_id" + ) + + message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + + cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = "hy_id") + + # TODO: for now we replace any negative TW values with the length of the bottom of the cross section + # TODO: This method + the negative model output values both need to be looked into (04/05/2024) + cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + + # extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data + # TODO: look at this stuff with Arash (04/09/2024) + missing_cs <- + cs_pts %>% + dplyr::filter(is.na(hf_id) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r)) %>% + hydrofabric3D::add_tmp_id() + + # TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" + readr::write_csv( + dplyr::select(missing_cs, -tmp_id), + paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") + ) + + # Split the cross sections into 2 groups: + # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model + # - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model + inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + + bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + + # bankful_cs %>% + # dplyr::slice(1000:1200) %>% + # hydrofabric3D::plot_cs_pts(color = "point_type") + + # sanity check that all rows are accounted for after splitting up data + split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs) + # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + + if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) + } + message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") + + # system.time({ + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + crosswalk_id = "hy_id" + ) + + # }) + # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") + # ml_subset %>% + # dplyr::filter(hy_id == "wb-1005207") %>% + # dplyr::select(owp_y_inchan, owp_tw_inchan) %>% + # .$owp_y_inchan + message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") + + # system.time({ + # Add bathymetry using "bankful" estimates + bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + crosswalk_id = "hy_id" + ) + # }) + + # combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points + final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point + ), + dplyr::select( + bankful_cs, + -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point + ), + dplyr::select( + missing_cs, + -hf_id, -dplyr::starts_with("owp"), -tmp_id + ) + ) %>% + dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + + message(round(Sys.time()), " - Reclassifying cross section point types...") + + # system.time({ + # reclassify + final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = "hy_id") + # }) + + # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids() + # random_uids <- sample(x=final_uids, size=12) + # cs_subset <- dplyr::filter(hydrofabric3D::add_tmp_id(final_cs), + # tmp_id %in% random_uids) + # hydrofabric3D::classify_points(cs_subset) %>% hydrofabric3D::plot_cs_pts(color = "point_type") + + starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = "hy_id") + ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = "hy_id") + + has_same_number_of_uids <- length(starting_uids) == length(ending_uids) + all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) + all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + + # throw some warnings if: + # - the number of uids in the input is different from the output + # - there are missing hy_id/cs_id + if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) + } + + if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + + } + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # name of file and path to save transects gpkg too + out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") + out_path <- paste0(CS_OUTPUT_DIR, out_file) + + message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", out_path, "'") + + # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_.parquet) + arrow::write_parquet(final_cs, out_path) + + s3_save_uri <- paste0(S3_CS_ML_PTS_DIR, out_file) + + # command to copy cross section points parquet to S3 + copy_cs_pts_to_s3 <- paste0("aws s3 cp ", + out_path, + " ", + s3_save_uri, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)) + ) + + message( + "Copy VPU ", path_df$vpu[i], " ML augmented cross sections to S3:\n - S3 copy command:\n'", + copy_cs_pts_to_s3, "'", + "'\n==========================" + ) + + system(copy_cs_pts_to_s3, intern = TRUE) + + end <- round(Sys.time()) + + message("Finished augmenting cross section points with ML for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(net, net_subset, + final_cs, cs_pts, + inchannel_cs, bankful_cs) + gc() + gc() + +} + + + + \ No newline at end of file diff --git a/runners/cs_runner/04_driver.R b/runners/cs_runner/04_driver.R new file mode 100644 index 0000000..212e1ac --- /dev/null +++ b/runners/cs_runner/04_driver.R @@ -0,0 +1,26 @@ +### Run this file to have all runner scripts run in order + +# downloads nextgen datasets +source("runners/cs_runner/config.R") + +# downloads datasets +# - Nextgen data +# - Reference features (for waterbody filtering) +# - ML outputs +# - FEMA 100 year floodplain polygons (FGBs) +source("runners/cs_runner/download_nextgen.R") +source("runners/cs_runner/download_ref_features.R") +source("runners/cs_runner/download_ml_outputs.R") +source("runners/cs_runner/download_fema100.R") + +# simplify, dissolve, FEMA polygons and partition FEMA polygons by VPU +source("runners/cs_runner/partition_fema_by_vpu.R") + +# generate and upload transects datasets +source("runners/cs_runner/01_transects.R") + +# generate and upload cross sections points datasets +source("runners/cs_runner/02_cs_pts.R") + +# Apply machine learning topwidths and depths estimates to DEM cross section points +source("runners/cs_runner/03_inject_ml.R") diff --git a/runners/cs_runner/bathy.R b/runners/cs_runner/bathy.R new file mode 100644 index 0000000..7a550f9 --- /dev/null +++ b/runners/cs_runner/bathy.R @@ -0,0 +1,337 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) + +# Unique Flowline ID column name +CROSSWALK_ID <- "id" + +VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) + +ML_CROSSWALK_ID <- "id" + +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() %>% +# dplyr::select( +# dplyr::any_of(ML_CROSSWALK_ID), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +# ml_outputs <- +# ml_outputs %>% +# dplyr::distinct( +# dplyr::across(dplyr::any_of(CROSSWALK_ID)), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) + +# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +# rm(ml_outputs, ml) +ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml") + +ml_outputs <- + ml_outputs %>% + dplyr::select( + dplyr::any_of(ML_CROSSWALK_ID), + vpuid, + owp_y_bf = YCC, + owp_y_inchan = Y, + owp_tw_bf = TopWdthCC, + owp_tw_inchan = TopWdth, + owp_dingman_r = dingman_r + ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% + dplyr::distinct( + dplyr::across(dplyr::any_of(CROSSWALK_ID)), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# --------------------------------------------------------------------------------- +# ---- Read in CS PTS data ---- +# --------------------------------------------------------------------------------- +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") + +cs_pts <- arrow::read_parquet(CS_PTS_OUTPUT_PATH) + +# --------------------------------------------------------------------------------- +# ---- Join CS PTS with ML data --- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + +# ml_outputs %>% +# dplyr::group_by(id) %>% +# dplyr::count(id) %>% +# dplyr::arrange(-n) + +# join the ML outputs data to the cross section points +cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_outputs, + dplyr::any_of(CROSSWALK_ID), + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r + ), + by = CROSSWALK_ID + ) + +# --------------------------------------------------------------------------------- +# ---- Fixing negative depths/widths estimates ---- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + +cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID) + +# TODO: for now we replace any negative TW values with the length of the bottom of the cross section +# TODO: This method + the negative model output values both need to be looked into (04/05/2024) +cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c(CROSSWALK_ID, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + +# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data +# TODO: look at this stuff with Arash (04/09/2024) +missing_cs <- + cs_pts %>% + dplyr::filter( + is.na(.data[[CROSSWALK_ID]]) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r) + ) %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) + +# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" +# readr::write_csv( +# dplyr::select(missing_cs, -tmp_id), +# paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") +# ) + +# Split the cross sections into 2 groups: +# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model +# - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model +inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + # dplyr::slice(1:1000) + +bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + # dplyr::slice(1:1000) + +# # Replace any topwidth values that are GREATER than the actual cross section length (meters) +# bankful_cs2 <- hydrofabric3D:::fix_oversized_topwidths( +# cross_section_pts = bankful_cs2, +# crosswalk_id = CROSSWALK_ID +# ) +# bankful_cs %>% +# dplyr::filter(TW >= cs_lengthm) %>% +# dplyr::select(TW) + +# sanity check that all rows are accounted for after splitting up data +split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)) +# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + +if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) +} + +message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") +# tmp <- +# inchannel_cs %>% +# dplyr::slice(1:10000) +# system.time({ + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = tmp, + crosswalk_id = CROSSWALK_ID + ) + +# }) + + gc() + + +# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") +# ml_subset %>% +# dplyr::filter(hy_id == "wb-1005207") %>% +# dplyr::select(owp_y_inchan, owp_tw_inchan) %>% +# .$owp_y_inchan +message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") +# system.time({ + + # Add bathymetry using "bankful" estimates + bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + # cross_section_pts = dplyr::slice(bankful_cs, 1:10000), + crosswalk_id = CROSSWALK_ID + ) + +# }) + +# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points +final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + # inchannel_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + bankful_cs, + # bankful_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + dplyr::mutate( + missing_cs, + is_dem_point = FALSE + ), + # -hf_id, + # -is_dem_point, + -dplyr::starts_with("owp"), + -tmp_id + ) +) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet") +# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet") + + +message(round(Sys.time()), " - Reclassifying cross section point types...") + +# reclassify +final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID) +ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID) + +has_same_number_of_uids <- length(starting_uids) == length(ending_uids) +all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) +all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + +# throw some warnings if: +# - the number of uids in the input is different from the output +# - there are missing hy_id/cs_id +if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) +} + +if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + +} +tmp_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs.parquet") +message("saving file", tmp_path) + +arrow::write_parquet( + final_cs, + tmp_path +) + diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index 855cd9b..b70ef37 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -2,77 +2,284 @@ pacman::p_load( archive, hydrofabric, - terrainSliceR + hydrofabric3D ) +# # install.packages("devtools") +# devtools::install_github("anguswg-ucsb/hydrofabric3D") + # load root directory source("runners/cs_runner/config_vars.R") +source("runners/cs_runner/utils.R") sf::sf_use_s2(FALSE) -# name of bucket with nextgen data -nextgen_bucket <- "lynker-spatial" +### Cross section point + +# # ------------------------------------------------------------------------------------- +# # ----- S3 names ------ +# # ------------------------------------------------------------------------------------- + +# # AWS S3 bucket URI +# LYNKER_SPATIAL_HF_S3_URI <- "s3://lynker-spatial/" + +# # name of bucket with nextgen data +# LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial" + +# # the name of the folder in the S3 bucket with the nextgen data +# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" + +# # full URI to the S3 bucket folder with the nextgen data +# S3_BUCKET_NEXTGEN_DIR_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR) + +# # reference features S3 bucket prefix +# S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" + +# # S3 prefix/folder of version run +# VERSION <- "v20.1" + +# # ------------------------------------------------------------------------------------- + +# # ------------------------------------------------------------------------------------- +# # ----- Local directories ------ +# # ------------------------------------------------------------------------------------- -# directory to copy nextgen bucket data too -nextgen_dir <- paste0(base_dir, "/pre-release/") +# ### LOCAL DIRS -# model attributes directory -model_attr_dir <- paste0(base_dir, "/model_attributes/") +# # directory to copy nextgen bucket data too +# NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) +# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/") -# cross-section data model data directories -transects_dir <- paste0(base_dir, "/01_transects/") -cs_pts_dir <- paste0(base_dir, "/02_cs_pts/") +# # model attributes directory +# MODEL_ATTR_DIR <- paste0(BASE_DIR, "/model_attributes/") -# final output directory with geopackages per VPU -final_dir <- paste0(base_dir, "/cross_sections/") +# # cross-section data model data directories +# TRANSECTS_DIR <- paste0(BASE_DIR, "/01_transects/") +# CS_PTS_DIR <- paste0(BASE_DIR, "/02_cs_pts/") +# # final output directory with geopackages per VPU +# CS_OUTPUT_DIR <- paste0(BASE_DIR, "/cross_sections/") + +# # directory to copy nextgen bucket data too +# REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/") + +# # make a directory for the ML outputs data +# ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") + +# ------------------------------------------------------------------------------------- +# ----- Create local directories ------ +# ------------------------------------------------------------------------------------- # create directories -dir.create(transects_dir, showWarnings = FALSE) -dir.create(cs_pts_dir, showWarnings = FALSE) -dir.create(final_dir, showWarnings = FALSE) -# dir.create(model_attr_dir, showWarnings = FALSE) - - -##### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ###### -# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to -# make sure they are aligned and in the same order -# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string -# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string -# base is a character vector of the base directory of the files. Defaults to NULL -# Returns a dataframe with VPU, x, and y columns -align_files_by_vpu <- function( - x, - y, - base = NULL -) { - - # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period - regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*" +dir.create(TRANSECTS_DIR, showWarnings = FALSE) +dir.create(CS_PTS_DIR, showWarnings = FALSE) +dir.create(REF_FEATURES_DIR, showWarnings = FALSE) +dir.create(paste0(REF_FEATURES_DIR, "gpkg/"), showWarnings = FALSE) +dir.create(CS_OUTPUT_DIR, showWarnings = FALSE) +dir.create(ML_OUTPUTS_DIR, showWarnings = FALSE) + +# create the directory if it does NOT exist +if(!dir.exists(NEXTGEN_DIR)) { + message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'") - # path dataframe for X filepaths - x_paths <- data.frame(x = x) + dir.create(NEXTGEN_DIR) +} + +# # create the directory if it does NOT exist +# if(!dir.exists(MODEL_ATTR_DIR)) { +# message("Directory does not exist at: \n\t'", MODEL_ATTR_DIR, "'\nCreating directory at: \n\t'", MODEL_ATTR_DIR, "'") +# dir.create(MODEL_ATTR_DIR) +# } +# dir.create(MODEL_ATTR_DIR, showWarnings = FALSE) + +# ------------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------------- +# ----- Get the paths / locations of reference_features data ------ +# ------------------------------------------------------------------------------------- + +## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory + +# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +list_ref_features <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '" + + # Regular expression pattern to match object keys + PATTERN="reference_features.gpkg" + + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# -------------------------------------------------------------------------- +# ---- Get a list of reference features geopackages ---- +# -------------------------------------------------------------------------- + +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +ref_features <- system(list_ref_features, intern = TRUE) + +# ref features datasets +ref_features_keys <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features) +ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features) + +# -------------------------------------------------------------------------- +# ---- Create empty file structure for a "new_domain" ---- +# -------------------------------------------------------------------------- + +create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME, with_output = TRUE) + +# -------------------------------------------------------------------------- +# ---- Create empty file structure for a "domain_with_fema" ---- +# -------------------------------------------------------------------------- + +create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME, with_output = TRUE) + +# -------------------------------------------------------------------------- +# ---- Get locations of ML parquet files in S3 --- +# -------------------------------------------------------------------------- + +VPU_ML_BATHYMETRY_URIS <- unlist( + lapply(VPU_ML_BATHYMETRY_S3_DIRS, function(s3_dir) { + s3_file <- list_s3_objects(s3_dir, ".parquet$", AWS_PROFILE) + paste0(s3_dir, s3_file) + }) +) + +# ------------------------------------------------------------------------------------- +# ---- Download ML parquets for DOMAIN_WITH_FEMA ---- +# ------------------------------------------------------------------------------------- + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (s3_uri in VPU_ML_BATHYMETRY_URIS) { + # message(s3_uri) + # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=18/part-0.parquet" + # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=03W/part-0.parquet" + # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=21/" - # path dataframe for Y filepaths - y_paths <- data.frame(y = y) + is_parquet <- endsWith(s3_uri, ".parquet") + vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) - # generate VPU IDs based on file path regular expression matching with "regex_pattern" above - x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x) - y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y) + message("Checking S3 bucket for VPU ", vpu_id, " ML data...") - # match paths based on VPU column - matched_paths <- dplyr::left_join( - x_paths, - y_paths, - by = "vpu" - ) + if (is_parquet) { + s3_file <- basename(s3_uri) + # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) + new_file_name <- paste0(vpu_id, "_ml.parquet") + # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file)) + + local_save_path <- paste0(DOMAIN_WITH_FEMA_ML_DIR, "/", new_file_name) + + if(!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', s3_uri, + " ", local_save_path, " --profile ", AWS_PROFILE) + + message("S3 object:\n > '", s3_uri, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + # message("Copy command:\n > '", copy_cmd, "'") + + system(copy_cmd) + + message(" > '", new_file_name, "' download complete!") + message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") + } + + } else { + message("No S3 bucket ML data for VPU ", vpu_id, "...") + } + +} + +# VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) +# +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() + +# -------------------------------------------------------------------------- +# ---- Get locations of diffusive domain DEM files in S3 ---- +# -------------------------------------------------------------------------- + +COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, + list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE) + ) + +# ------------------------------------------------------------------------------------- +# ---- Download diffusive domain DEM files from S3 for DOMAIN_WITH_FEMA ---- +# ------------------------------------------------------------------------------------- + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (s3_uri in COASTAL_BATHY_DEM_S3_URIS) { + message(s3_uri) - # reorder columns - matched_paths <- dplyr::relocate(matched_paths, vpu, x, y) + is_tif <- endsWith(s3_uri, ".tif") + # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) - if(!is.null(base)) { - matched_paths$base_dir <- base - } + message("Checking S3 bucket for DEM data...") - return(matched_paths) + if (is_tif) { + + s3_file <- basename(s3_uri) + # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) + # new_file_name <- paste0(vpu_id, "_ml.parquet") + # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file)) + + local_save_path <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", s3_file) + + if (!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', s3_uri, + " ", local_save_path, " --profile ", AWS_PROFILE) + + message("S3 object:\n > '", s3_uri, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + # message("Copy command:\n > '", copy_cmd, "'") + + system(copy_cmd) + + message(" > '", s3_file, "' download complete!") + message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") + } + + } else { + message("No S3 bucket DEM data... ") + } } + +COASTAL_BATHY_DEM_PATHS <- list.files(DOMAIN_WITH_FEMA_DEM_DIR, full.names = TRUE) + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 9738869..bee5b45 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -1,16 +1,293 @@ -### EDIT base_dir, aws_profile, and DEM_URL ### -base_dir <- '/Users/anguswatters/Desktop/lynker-spatial' +### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ### + +# --------------------------------------------------------------------------------- +# ---- General paths and constants variables ---- +# - edit to match your local environment +# - BASE_DIR: base directory for local file storage +# - AWS_PROFILE: AWS profile to run CLI commands +# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for +# --------------------------------------------------------------------------------- +# Base directory for local file storage +BASE_DIR <- '/Users/anguswatters/Desktop/lynker-spatial' # AWS profile to run CLI commands -aws_profile <- "angus-lynker" +AWS_PROFILE <- "angus-lynker" + +# S3 prefix/folder of version run +VERSION <- "v20.1" + +# string to fill in "CS_SOURCE" column in output datasets +CS_SOURCE <- "hydrofabric3D" + +# name of bucket with nextgen data +LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial" +LYNKER_SPATIAL_HF_S3_PREFIX <- "hydrofabric" + +# AWS S3 bucket URI +LYNKER_SPATIAL_BASE_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/") +LYNKER_SPATIAL_HF_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/") +# LYNKER_SPATIAL_HF_S3_URI <- "s3://lynker-spatial/" + +# ------------------------------------------------------------------------------------- +# ---- S3 output directories ----- +# - transects +# - cross section points +# - ML cross section points +# ------------------------------------------------------------------------------------- + +# transect bucket prefix +S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") + +# cross section bucket prefix +S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/") + +# cross section bucket prefix +S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/") + +# ------------------------------------------------------------------------------------- +# ---- S3 nextgen data paths / directories ----- +# ------------------------------------------------------------------------------------- + +# the name of the folder in the S3 bucket with the nextgen data +S3_BUCKET_NEXTGEN_DIR <- paste0(VERSION, "/gpkg/") +# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" + +# full URI to the S3 bucket folder with the nextgen data +S3_BUCKET_NEXTGEN_DIR_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR) + +# reference features S3 bucket prefix +S3_BUCKET_REF_FEATURES_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/") +# S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" + +# ---------------------------------------------------------------------------- +# ---- Machine learning data path variables ---- +# ---------------------------------------------------------------------------- + +ML_OUTPUTS_S3_FILE <- "channel_ml_outputs.parquet" + +# ML_OUTPUTS_S3_DIR <- paste0(VERSION, "/3D/ml-outputs/") +# ML_OUTPUTS_S3_DIR <- "v20.1/3D/ml-outputs/" + +ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE) +# ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE) + +ML_OUTPUTS_PATH <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE) + +# path to the remote CONUS net parquet file +CONUS_NETWORK_FILE <- "conus_net.parquet" +CONUS_NETWORK_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE) + +# ---------------------------------------------------------------------------- + + +# ------------------------------------------------------------------------------------- +# ---- Local directory / path variables ---- +# ------------------------------------------------------------------------------------- + +# directory to copy nextgen bucket data too +NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) +# NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/") + +# # model attributes directory +# MODEL_ATTR_DIR <- paste0(BASE_DIR, "/model_attributes/") + +# cross-section data model data directories +TRANSECTS_DIR <- paste0(BASE_DIR, "/01_transects/") +CS_PTS_DIR <- paste0(BASE_DIR, "/02_cs_pts/") + +# final output directory with geopackages per VPU +CS_OUTPUT_DIR <- paste0(BASE_DIR, "/cross_sections/") + +# directory to copy nextgen bucket data too +REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/") +REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/") + +# make a directory for the ML outputs data +ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") + +DEM_DIR <- paste0(BASE_DIR, "/dem") +DEM_VRT_DIR <- paste0(DEM_DIR, "/vrt") +DEM_TIF_DIR <- paste0(DEM_DIR, "/tif") + +# ------------------------------------------------------------------------------------- +# ---- Create local directory / path variables (FEMA data) ---- +# ------------------------------------------------------------------------------------- + +# location of FEMA 100 year flood plain FGB files +FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100/" +LYNKER_HF_FEMA_S3_URI <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX) + +# FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +FEMA_FGB_PATH <- paste0(BASE_DIR, "/FEMA100") +FEMA_GEOJSON_PATH <- paste0(BASE_DIR, "/FEMA100_geojson") +FEMA_CLEAN_PATH <- paste0(BASE_DIR, "/FEMA100_clean") +FEMA_GPKG_PATH <- paste0(BASE_DIR, "/FEMA100_gpkg") +FEMA_GPKG_BB_PATH <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet + +FEMA_BY_VPU_PATH <- paste0(BASE_DIR, "/FEMA_BY_VPU") +VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + +FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) +# FEMA_VPU_SUBFOLDERS <- paste0( +# FEMA_BY_VPU_PATH, "/VPU_", +# unlist( +# lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) { +# unlist(regmatches(vpu_file_names, gregexpr("\\d+[A-Za-z]*", vpu_file_names)))}) +# ) +# ) + +CS_EXTENSION_POLYGONS_DIR <- paste0(BASE_DIR, "/cs-extension-polygons") +CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg') + +# paste0(BASE_DIR/ +# ------------------------------------------------------------------------------------- +# ---- OVERWRITE_FEMA_FILES constant logicals---- +# ---- > if TRUE, processing steps will be run again +# and overwrite existing previously processed files +# TODO: Describe these variables +# ------------------------------------------------------------------------------------- + +# Default is TRUE (i.e. a fresh processing run is done from start to finish) +OVERWRITE_FEMA_FILES <- TRUE +DELETE_STAGING_GPKGS <- TRUE # remove intermediary files from the main output folder + +# ---------------------------------------------------------------------------- +# ---- Cross section point extraction constant variables ---- +# ---------------------------------------------------------------------------- # DEM URL -DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +# scale argument for cross_section_pts() function. +# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line +# Default setting is 50% of the original transect lines length (0.5) +EXTENSION_PCT <- 0.5 + +# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not +# 1% of the cross sections length is the default value we are using +# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") +PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01 + +# Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R) +# TODO: Probably delete this +COLLECT_META <- TRUE + +# Where should meta data CSVs be saved to? +# Local path to save CSVs of cross section meta data during each iteration +# TODO: Probably delete this +META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' +# META_PATH <- "/local/path/to/save/cross_section_meta_data/" + +# ------------------------------------------------------------------------------------- +# ---- (New single domain) Local directory / path variables ---- +# ------------------------------------------------------------------------------------- + +# directory for new domain data +NEW_DOMAIN_DIRNAME <- "new_domain" +NEW_DOMAIN_DIR <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME) + +NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines" +NEW_DOMAIN_FLOWLINES_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME) + +NEW_DOMAIN_DEM_DIRNAME <- "dem" +NEW_DOMAIN_DEM_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME) + +NEW_DOMAIN_FLOWLINES_FILE <- "AllDiffusiveCombined.gpkg" +NEW_DOMAIN_FLOWLINES_PATH <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE) + +# # Local DEM file +# NEW_DOMAIN_DEM_FILE <- "hi_dem.tif" +# NEW_DOMAIN_DEM_PATH <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE) + +# Remote DEM file +NEW_DOMAIN_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +NEW_DOMAIN_TRANSECTS_DIRNAME <- "transects" +NEW_DOMAIN_CS_PTS_DIRNAME <- "cs_pts" +NEW_DOMAIN_CROSS_SECTIONS_DIRNAME <- "cross_sections" + +NEW_DOMAIN_TRANSECTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME) +NEW_DOMAIN_CS_PTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME) +NEW_DOMAIN_CROSS_SECTIONS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME) + +# ------------------------------------------------------------------------------------- +# ---- (New single domain) Local directory / path variables ---- +# ------------------------------------------------------------------------------------- + +# directory for new domain data +DOMAIN_WITH_FEMA_DIRNAME <- "domain_with_fema" +DOMAIN_WITH_FEMA_DIR <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME) + +DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines" +DOMAIN_WITH_FEMA_FLOWLINES_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME) + +DOMAIN_WITH_FEMA_SUBSET_DIRNAME <- "domain_subset" +DOMAIN_WITH_FEMA_SUBSET_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME) + +DOMAIN_WITH_FEMA_DEM_DIRNAME <- "dem" +DOMAIN_WITH_FEMA_DEM_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME) + +DOMAIN_WITH_FEMA_FLOWLINES_FILE <- "ls_conus.gpkg" +DOMAIN_WITH_FEMA_FLOWLINES_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE) + +# Geopackage containing area to subset flowlines to before processing +DOMAIN_WITH_FEMA_SUBSET_FILE <- "AllDiffusiveCombined.gpkg" +DOMAIN_WITH_FEMA_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE) + +# # Local DEM file +# DOMAIN_WITH_FEMA_DEM_FILE <- "hi_dem.tif" +# DOMAIN_WITH_FEMA_DEM_PATH <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE) + +# Remote DEM file +DOMAIN_WITH_FEMA_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME <- "transects" +DOMAIN_WITH_FEMA_CS_PTS_DIRNAME <- "cs_pts" +DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME <- "cross_sections" +DOMAIN_WITH_FEMA_OUTPUT_DIRNAME <- "outputs" +DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME <- "vpu-subsets" +DOMAIN_WITH_FEMA_ML_DIRNAME <- "ml" +# aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt +DOMAIN_WITH_FEMA_TRANSECTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME) +DOMAIN_WITH_FEMA_CS_PTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME) +DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME) +DOMAIN_WITH_FEMA_OUTPUT_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME) +DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME) +DOMAIN_WITH_FEMA_ML_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME) + +ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data") +# LYNKER_SPATIAL_BASE_S3_URI +LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI <- "s3://lynker-hydrofabric/" +ML_BATHYMETRY_S3_DATA_DIR <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/" +ML_BATHYMETRY_S3_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR) +VPU_ML_BATHYMETRY_S3_DIRS <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/") + +COASTAL_BATHY_DEM_S3_DIR <- "coastal_bathy/diffusive_domain/" +COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR) +# COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) + + +# # # -------------------------------------------------------------------------- +# # # ---- Get locations of diffusive domain DEM files in S3 ---- +# # # -------------------------------------------------------------------------- +# COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) +# +# # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, +# # list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE) +# # ) + + + + + + + + + + + + + -# # create the directory if it does NOT exist -# if(!dir.exists(base_dir)) { -# message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}')) -# dir.create(base_dir) -# } -### EDIT ### diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R new file mode 100644 index 0000000..f673ffb --- /dev/null +++ b/runners/cs_runner/domain_with_fema.R @@ -0,0 +1,1561 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) + +# Unique Flowline ID column name +CROSSWALK_ID <- "id" + +# --------------------------------------------------------------------- +# --- Read in flowlines +# --------------------------------------------------------------------- +# sf::st_layers(DOMAIN_WITH_FEMA_SUBSET_PATH) + +# Subsetting area +aoi <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides") +aoi <- rmapshaper::ms_simplify(aoi, keep = 0.05) + +id_col <- "id" +# id_col <- "divide_id" + +# query the conus.gpkg for matching IDs +ids <- unique(aoi[[id_col]]) +query_ids <- paste0(paste0("'", ids, "'"), collapse= ", ") + +gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +layer <- gpkg_layers$name[gpkg_layers$name == "flowpaths"] + +wkt <- + aoi %>% + rmapshaper::ms_dissolve() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sfc() %>% + sf::st_sf() %>% + sf::st_geometry() %>% + sf::st_as_text() + +# wkt <- sf::st_as_text(sf::st_geometry(aoi)) +# wkt <- +# aoi %>% +# # sf::st_bbox() %>% +# sf::st_as_sfc() %>% +# sf::st_sf() %>% +# sf::st_geometry() %>% +# sf::st_as_text() + +# length(wkt) + +# read in flowlines based on IDs in AOI +flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths", + wkt_filter = wkt + # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids), + # query = query + ) + +# bad_ids <- c("wb-14538", "wb-14686", "wb-14687") +# flines <- +# flines %>% +# dplyr::filter(id %in% bad_ids) +# --------------------------------------------------------------------- +# --- Split flowlines by VPU +# --------------------------------------------------------------------- + +# VPUs polygons +VPU_boundaries <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines)) + +# add a VPU ID column to each flowline +flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID") + +# TODO: improve this, manual remap some VPUIDs so that there are +# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected +flines <- + flines %>% + dplyr::mutate( + VPUID = dplyr::case_when( + VPUID == "12" ~ "11, 12, 13", + VPUID == "12, 13" ~ "11, 12, 13", + VPUID == "11, 12" ~ "11, 12, 13", + VPUID == "01" ~ "01, 02", + TRUE ~ VPUID + ) + ) +# rm(flines2) +# unnest_ids(flines2$VPUID2) + +# set of unique VPUs + +VPU_IDS <- unnest_ids(flines$VPUID) +# VPU_IDS <- unnest_ids(flines2$VPUID2) +VPU_IDS + +# all possible FEMA dirs +AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ] + +flines %>% + sf::st_drop_geometry() %>% + dplyr::group_by(VPUID) %>% + dplyr::count() +# unique(flines$VPUID) + +# calculate bankfull width +flines <- + flines %>% + hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + VPUID, + # hy_id = id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + mainstem + ) %>% + hydroloom::rename_geometry("geometry") + +transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flines$bf_width, + # cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data +) + +sf::write_sf( + transects, + "/Users/anguswatters/Desktop/tmp.gpkg" +) + +`# flines <- +# flines %>% +# dplyr::mutate( +# bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm) +# # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) +# ) %>% +# # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% +# dplyr::select( +# dplyr::any_of(CROSSWALK_ID), +# VPUID, +# # hy_id = id, +# lengthkm, +# tot_drainage_areasqkm, +# bf_width, +# mainstem +# ) %>% +# hydroloom::rename_geometry("geometry") + + +transects <- + transects %>% + dplyr::left_join( + flines %>% + dplyr::select(id, VPUID, tot_drainage_areasqkm) %>% + sf::st_drop_geometry(), + by = "id" + ) + +# read each FEMA geopackage into a list +fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) +transects$VPUID %>% unique() + +GROUP_VPU_IDS <- unnest_ids(transects$VPUID) + +# all FEMA dirs for the current area +GROUP_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ] +GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))] + +fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) + +fema <- + fema %>% + dplyr::bind_rows() %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) + +message("Simplifying FEMA polygons...") +message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + +# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. +# keep 1% of the original points for speed +fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) +# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + +message(" - Number of geoms AFTER simplifying: ", nrow(fema)) +message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + +transects <- + transects %>% + dplyr::left_join( + dplyr::select(sf::st_drop_geometry(flines), + dplyr::any_of(CROSSWALK_ID), + mainstem + ), + by = CROSSWALK_ID + ) + +# TODO: make sure this 3000m extension distance is appropriate across VPUs +# TODO: also got to make sure that this will be feasible on memory on the larger VPUs... +ext_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + crosswalk_id = CROSSWALK_ID, + grouping_id = "mainstem", + max_extension_distance = 3000 +) + +sf::write_sf( + ext_transects, + "/Users/anguswatters/Desktop/tmp_ext.gpkg" +) + +ext_transects <- + ext_transects %>% + dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, lengthm, sinuosity, geometry) + +# get cross section point elevations +cs_pts <- hydrofabric3D::cross_section_pts( + cs = ext_transects, + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH +) + +sf::write_sf( + cs_pts, + "/Users/anguswatters/Desktop/tmp_cs_pts.gpkg" +) +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- +# ---------------------------------------------------------------------------------------------------------------- +# cs_pts2 %>% +# dplyr::slice(1:200) %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type") + +cs_pts <- + # cs_pts2 <- + cs_pts %>% + hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% + hydrofabric3D::classify_points( + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +# }) + +ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id +# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + +# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") +# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- +# ---------------------------------------------------------------------------------------------------------------- + +# system.time({ +fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = flowlines, # original flowline network + transects = ext_transects, # original transect lines + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE +) +# }) + +ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- +ext_transects <- + ext_transects %>% + dplyr::mutate(cs_source = CS_SOURCE) + +out_transects <- match_transects_to_extended_cs_pts( + transect_lines = ext_transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + extension_pct = EXTENSION_PCT +) + +sf::write_sf( + out_transects, + "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +) + +trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id") +ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id") +fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id") + +all(trans_uids %in% ext_trans_uids) +all(ext_trans_uids %in% trans_uids) + +all(trans_uids %in% fixed_pts_uids) +all(fixed_pts_uids %in% trans_uids) + +all(ext_trans_uids %in% fixed_pts_uids) +all(fixed_pts_uids %in% ext_trans_uids) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- + +# fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +# out_transects <- hydrofabric3D:::renumber_cs_ids( +# df = dplyr::mutate(out_transects, pt_id = 1), +# crosswalk_id = "hy_id" +# ) %>% +# dplyr::select(-pt_id) + +fixed_pts2 <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) +out_transects2 <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) +sf::write_sf( + out_transects2, + "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg" +) +object.size(out_transects2) +# classify the cross section points +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + +# add Z_source column for source of elevation data +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + +ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Aligning banks and smoothing bottoms...") +fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID) + +ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Reclassifying cross section points...") + +fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") +} else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") +} + +if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") +} else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") +} + +if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") +} else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") +} + +sf::write_sf( + out_transects, + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") +) + +arrow::write_parquet( + fixed_pts, + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") +) +# save the flowlines subset +DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg") + +sf::write_sf( + flines, + DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH +) + +# split the flowlines into groups by VPU +fline_groups <- dplyr::group_split(flines, VPUID) + +# loop through each VPU group of flowlines and save a local copy +for (i in seq_along(fline_groups)) { + + flowlines <- fline_groups[[i]] + VPU <- unique(flowlines$VPUID) + + # save the flowlines subset + vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg") + + sf::write_sf( + flowlines, + vpu_flowlines_path + ) +} + +for (i in seq_along(fline_groups)) { + + # i = 3 + flowlines <- fline_groups[[i]] + VPU <- unique(flowlines$VPUID) + + message("(", i, "/", length(fline_groups), ")\n", + " > Processing flowlines in VPU group: '", VPU, "'") + + # unique VPUs for group + GROUP_VPU_IDS <- unnest_ids(flowlines$VPUID) + + # all FEMA dirs for the current area + GROUP_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ] + GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))] + # GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T) + # GROUP_FEMA_FILES <- GROUP_FEMA_FILES[grepl("_output.gpkg", GROUP_FEMA_FILES)] + + # ((flowlines$bf_width) / 11)[1] * 11 + + # create transect lines + transects <- hydrofabric3D::cut_cross_sections( + net = flowlines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flowlines$bf_width, + # cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data + ) +# } + + # add cs_source column and rename cs_widths to cs_lengthm + transects <- + transects %>% + dplyr::mutate( + cs_source = CS_SOURCE + ) + + # --------------------------------------------------------------------- + # --- Extend transects out to FEMA 100yr floodplains + # --------------------------------------------------------------------- + message("Reading in FEMA polygons...") + + # unique(flowlines$VPUID) + + # fema polygons and transect lines + # fema <- sf::read_sf(vpu_fema_file) + + # read each FEMA geopackage into a list + fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) + + fema <- + fema %>% + dplyr::bind_rows() %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) + + message("Simplifying FEMA polygons...") + message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + + # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. + # keep 1% of the original points for speed + fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + + message(" - Number of geoms AFTER simplifying: ", nrow(fema)) + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + + transects <- + transects %>% + dplyr::left_join( + dplyr::select(sf::st_drop_geometry(flowlines), + dplyr::any_of(CROSSWALK_ID), + mainstem + ), + by = CROSSWALK_ID + ) + + # TODO: make sure this 3000m extension distance is appropriate across VPUs + # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + ext_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = "mainstem", + max_extension_distance = 3000 + ) + + # mapview::mapview(transects, color = "green") + + # mapview::mapview(transects2, color = "red") + + # transects <- + # transects %>% + ext_transects <- + ext_transects %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::mutate(is_extended = FALSE) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + cs_source, + cs_measure, + is_extended, + geometry + ) + + out_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", + paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" + ) + + # ext_transects %>% + # dplyr::filter(id == "wb-2414904") %>% + # .$geometry %>% + # mapview::mapview() + + # ext_transects %>% + # dplyr::slice(90000:100000) %>% + # .$geometry %>% + # mapview::mapview() + + # wb-2414904 + + # length_data <- + # ext_transects %>% + # dplyr::mutate( + # line_len = as.numeric(sf::st_length(geometry)) + # ) + + message("Writting transect lines for VPU group: '", VPU, "'", + "\n > '", out_path, "'") + + sf::write_sf(ext_transects, out_path) + + message("Finished writting transects!") + +} + +# --------------------------------------------------------------------- +# --- Bind together transects into single dataset +# --------------------------------------------------------------------- +vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T) + +transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)] +flowline_paths <- vpu_subset_paths[grepl("_flowlines.gpkg", vpu_subset_paths)] + +# read each transect geopackages into a list +transects <- lapply(transect_paths, function(gpkg) sf::read_sf(gpkg)) +# transects[[5]] + +transects <- + transects %>% + dplyr::bind_rows() + +paths_df <- data.frame( + t = transect_paths, + f = flowline_paths +) %>% + dplyr::mutate( + vpu = gsub("_transects.gpkg", "", basename(t)) + ) + +for (i in 1:nrow(paths_df)) { + # i =3 + # i = 2 + VPU <- paths_df$vpu[i] + t_path <- paths_df$t[i] + f_path <- paths_df$f[i] + + message("Creating VPU ", VPU, " cross section points:", + "\n - flowpaths: '", f_path, "'", + "\n - transects: '", t_path, "'" + ) + + ################### + message("Reading in transects...\n > ", t_path) + # read in transects data + transects <- sf::read_sf(t_path) + + message("Reading in flowlines... \n > ", f_path) + # read in nextgen data + flowlines <- sf::read_sf(f_path) + + message("Extracting cross section points...") + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 1: Extract cs points from DEM ---- + # ---------------------------------------------------------------------------------------------------------------- + # system.time({ + + # get cross section point elevations + cs_pts <- hydrofabric3D::cross_section_pts( + cs = transects, + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- + # ---------------------------------------------------------------------------------------------------------------- + # cs_pts2 %>% + # dplyr::slice(1:200) %>% + # dplyr::rename(hy_id = id) %>% + # hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type") + + cs_pts <- + # cs_pts2 <- + cs_pts %>% + hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% + hydrofabric3D::classify_points( + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + # }) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id + # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- + # ---------------------------------------------------------------------------------------------------------------- + + # system.time({ + fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flowlines, # original flowline network + # net = flowlines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) + # }) + + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Update transects with extended transects (if exists) ---- + # ---------------------------------------------------------------------------------------------------------------- + + out_transects <- match_transects_to_extended_cs_pts( + transect_lines = transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + extension_pct = EXTENSION_PCT + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Re enumerate the transects & cross section points "cs_id" ---- + # ---------------------------------------------------------------------------------------------------------------- + + # fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") + # out_transects <- hydrofabric3D:::renumber_cs_ids( + # df = dplyr::mutate(out_transects, pt_id = 1), + # crosswalk_id = "hy_id" + # ) %>% + # dplyr::select(-pt_id) + + fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) + out_transects <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) + + # out_transects %>% + # dplyr::filter(id == "wb-2425607") %>% .$geometry %>% plot() + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 4: Update transects with extended transects (if exists) ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + # add Z_source column for source of elevation data + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + + ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + + message("Aligning banks and smoothing bottoms...") + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID) + + ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + + message("Reclassifying cross section points...") + + fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + + if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + } else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + } + + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + sf::write_sf( + out_transects, + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") + ) + + arrow::write_parquet( + fixed_pts, + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") + ) + +} + +# TODO: Save all cross section points + transects into single geopackages (transects.gpkg, cs_pts.gpkg) + +# --------------------------------------------------------------------------------- +# ---- Merge all the transects / cs points into single files +# --------------------------------------------------------------------------------- + +vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T) + +# --------------------------------------------------------------------------------- +# ---- Read all transects and save to single gpkg ---- +# --------------------------------------------------------------------------------- + +transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)] + +transects <- lapply(transect_paths, function(gpkg) {sf::read_sf(gpkg) }) %>% + dplyr::bind_rows() %>% + hydroloom::rename_geometry("geometry") + +TRANSECTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") + +sf::write_sf( + transects, + TRANSECTS_OUTPUT_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Read all CS PTs and save to single parquet ---- +# --------------------------------------------------------------------------------- + +cs_pts_paths <- vpu_subset_paths[grepl("_cs_pts.parquet", vpu_subset_paths)] +cs_pts <- lapply(cs_pts_paths, function(prq) arrow::read_parquet(prq)) %>% + dplyr::bind_rows() + +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") + +arrow::write_parquet( + cs_pts, + CS_PTS_OUTPUT_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Inject ML predicted top widths / Dingman's R ---- +# --------------------------------------------------------------------------------- + +# --------------------------------------------------------------------------------- +# ---- Read in ML data ---- +# --------------------------------------------------------------------------------- +VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) + +ML_CROSSWALK_ID <- "id" + +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() %>% +# dplyr::select( +# dplyr::any_of(ML_CROSSWALK_ID), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +# ml_outputs <- + # ml_outputs %>% + # dplyr::distinct( + # dplyr::across(dplyr::any_of(CROSSWALK_ID)), + # vpu_id, + # owp_y_bf, owp_y_inchan, + # owp_tw_bf, owp_tw_inchan, + # owp_dingman_r + # ) + +# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +# rm(ml_outputs, ml) +ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml") + +ml_outputs <- + ml_outputs %>% + dplyr::select( + dplyr::any_of(ML_CROSSWALK_ID), + vpuid, + owp_y_bf = YCC, + owp_y_inchan = Y, + owp_tw_bf = TopWdthCC, + owp_tw_inchan = TopWdth, + owp_dingman_r = dingman_r + ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% + dplyr::distinct( + dplyr::across(dplyr::any_of(CROSSWALK_ID)), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# --------------------------------------------------------------------------------- +# ---- Read in CS PTS data ---- +# --------------------------------------------------------------------------------- +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") + +cs_pts <- arrow::read_parquet(CS_PTS_OUTPUT_PATH) + +# --------------------------------------------------------------------------------- +# ---- Join CS PTS with ML data --- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + +# ml_outputs %>% +# dplyr::group_by(id) %>% +# dplyr::count(id) %>% +# dplyr::arrange(-n) + +# join the ML outputs data to the cross section points +cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_outputs, + dplyr::any_of(CROSSWALK_ID), + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r + ), + by = CROSSWALK_ID + ) + +# --------------------------------------------------------------------------------- +# ---- Fixing negative depths/widths estimates ---- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + +cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID) + +# TODO: for now we replace any negative TW values with the length of the bottom of the cross section +# TODO: This method + the negative model output values both need to be looked into (04/05/2024) +cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c(CROSSWALK_ID, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + +# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data +# TODO: look at this stuff with Arash (04/09/2024) +missing_cs <- + cs_pts %>% + dplyr::filter( + is.na(.data[[CROSSWALK_ID]]) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r) + ) %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) + +# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" +# readr::write_csv( +# dplyr::select(missing_cs, -tmp_id), +# paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") +# ) + +# Split the cross sections into 2 groups: +# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model +# - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model +inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + +bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + + +# sanity check that all rows are accounted for after splitting up data +split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)) +# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + +if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) +} + +message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") +# tmp <- +# inchannel_cs %>% +# dplyr::slice(1:10000) +system.time({ + +# Add bathymetry using "inchannel" estimates +inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = tmp, + crosswalk_id = CROSSWALK_ID +) + +}) + + + +# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") +# ml_subset %>% +# dplyr::filter(hy_id == "wb-1005207") %>% +# dplyr::select(owp_y_inchan, owp_tw_inchan) %>% +# .$owp_y_inchan +message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") +system.time({ + +# Add bathymetry using "bankful" estimates +bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + # cross_section_pts = dplyr::slice(bankful_cs, 1:10000), + crosswalk_id = CROSSWALK_ID +) + +}) + +# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points +final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + # inchannel_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + bankful_cs, + # bankful_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + dplyr::mutate( + missing_cs, + is_dem_point = FALSE + ), + # -hf_id, + # -is_dem_point, + -dplyr::starts_with("owp"), + -tmp_id + ) + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet") +# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet") + + +message(round(Sys.time()), " - Reclassifying cross section point types...") + +# reclassify +final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID) +ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID) + +has_same_number_of_uids <- length(starting_uids) == length(ending_uids) +all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) +all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + +# throw some warnings if: +# - the number of uids in the input is different from the output +# - there are missing hy_id/cs_id +if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) +} + +if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + +} + +# tmp_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# message("saving file", tmp_path) +# +# final_cs <- arrow::read_parquet(tmp_path) +# +# # save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet) +# arrow::write_parquet( +# dplyr::select(final_cs, +# -is_dem_point +# ), +# paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# ) + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# --------------------------------------------------------------------------------- + +CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'") + +# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet) +arrow::write_parquet( + dplyr::select(final_cs, + -is_dem_point + ), + CROSS_SECTIONS_OUTPUT_PATH +) + +INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet") +arrow::write_parquet( + final_cs, + INTERNAL_CROSS_SECTIONS_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Substitue diffusive domain DEMs Z values ---- +# --------------------------------------------------------------------------------- +final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") + +bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) { + r <- terra::rast(path) + extent <- terra::ext(r) + # r <- terra::project(r, "EPSG:5070") + # terra::set.crs(r, "EPSG:5070") + ext_df <- data.frame(lapply(extent, function(i) {i})) + ext_df$crs <- terra::crs(r) + ext_df$file <- basename(path) + ext_df$path <- path + + return(ext_df) + }) %>% + dplyr::bind_rows() +# final_cs + +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet") +# arrow::write_parquet( +# final_cs, +# INTERNAL_CROSS_SECTIONS_PATH +# ) + +# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") +# +# arrow::write_parquet( +# final_cs, +# START_PATH +# ) + +# rm(i, cs, df, pts_subset, has_pts_in_bb, bb, updated_depths) + +for (i in 1:nrow(bb_df)) { + + # i = 1 + + message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...") + + df <- bb_df[i, ] + + START_EPSG_CODE <- 5070 + + cs <- arrow::read_parquet(START_PATH) + cs <- + cs %>% + # dplyr::slice(1:10000) %>% + sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE) + + # convert to bounding box CRS + cs <- + cs %>% + sf::st_transform(df$crs[1]) + + # create bounding box shape + bb <- sf::st_as_sf( + sf::st_as_sfc( + sf::st_bbox( + c(xmin = df$xmin, + xmax = df$xmax, + ymax = df$ymax, + ymin = df$ymin + ), + crs = sf::st_crs(df$crs) + ) + ) + ) + + # get pts that are in the bounding box + pts_subset <- sf::st_filter( + cs, + bb + ) + + has_pts_in_bb <- nrow(pts_subset) > 0 + + message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box") + + if(!has_pts_in_bb) { + message(" > No points to update!") + next + } + + if(has_pts_in_bb) { + + message(" > Loading Raster") + + # load DEM + dem <- terra::rast(df$path) + + message(" > Extracting new cross section depth values from DEM...") + + updated_depths <- + pts_subset %>% + dplyr::mutate( + Z2 = hydrofabric3D:::extract_pt_val(dem, .), + Z_source2 = df$file + # Z = extract_pt_val(terra::rast(dem), .) + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + # Z, + Z2, + # Z_source, + Z_source2 + ) + + message(" > Replacing old depth values with new depth values...") + cs <- + cs %>% + dplyr::left_join( + updated_depths, + by = c(CROSSWALK_ID, "cs_id", "pt_id") + ) %>% + dplyr::mutate( + Z = dplyr::case_when( + !is.na(Z2) ~ Z2, + TRUE ~ Z + ), + Z_source = dplyr::case_when( + !is.na(Z_source2) ~ Z_source2, + TRUE ~ Z_source + ) + ) %>% + dplyr::select(-Z2, -Z_source2) + + message(" > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...") + cs <- + cs %>% + sf::st_transform(START_EPSG_CODE) + + message(" > Dropping point geometries and preserving X / Y coordinates...") + + cs <- + cs %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, + Z, + class, point_type, + Z_source, + bottom, left_bank, right_bank, valid_banks, + has_relief + + # newly added columns (03/06/2024) + ) + + message(" > Overwritting original cross section points parquet with updated depth values ...") + message(" > '", START_PATH, "'") + + arrow::write_parquet(cs, START_PATH) + + } + + } + +cross_sections <- arrow::read_parquet(START_PATH) + +cross_sections <- + cross_sections %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# reclassify +# system.time({ +cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) +# }) + +# final_cross_sections %>% +# dplyr::filter(id == "wb-1000") %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(color = "point_type") + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# ---- Diffusive Domain DEM + FEMA + ML +# --------------------------------------------------------------------------------- + +CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") + +message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'") + +# sum(is.na(final_cross_sections$id)) +# sum(is.na(final_cross_sections$cs_id)) +# sum(is.na(final_cross_sections$pt_id)) +# sum(is.na(final_cross_sections$X)) + +# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet) +arrow::write_parquet( + # dplyr::select(final_cs, + # -is_dem_point + # ), + cross_sections, + CROSS_SECTIONS_ML_OUTPUT_PATH +) + +# cross_sections %>% +# dplyr::select(id, Z_source) %>% +# dplyr::group_by(Z_source) %>% +# dplyr::count(Z_source) %>% +# dplyr::arrange(-n) + +# CROSS_SECTIONS_ML_OUTPUT_PATH +# +# CROSS_SECTIONS_OUTPUT_PATH +# +# CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +# +# CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +# DEM_CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross_sections.parquet") +# +# cs <- arrow::read_parquet(CROSS_SECTIONS_PATH) %>% +# dplyr::rename(hy_id = id) +# +# dem_cs <- arrow::read_parquet(DEM_CROSS_SECTIONS_PATH) %>% +# dplyr::rename(hy_id = id) +# +# z_ids <- +# cs %>% +# dplyr::group_by(Z_source) %>% +# dplyr::slice(1:4) %>% +# dplyr::pull(hy_id) +# +# # subset_ids <- z_ids[1:1] +# # z_ids <- z_ids[1:2] +# # for (i in seq_along(z_ids)) { +# for (i in seq_along(z_ids)) { +# message(i) +# # i = 1 +# subset_id <- z_ids[i] +# +# new_cs <- +# cs %>% +# dplyr::filter(hy_id %in% subset_id) +# +# old_cs <- +# dem_cs %>% +# dplyr::filter(hy_id %in% subset_id) +# +# title_str <- paste0( +# unique(new_cs$Z_source)[1], " vs. ", +# unique(old_cs$Z_source)[1] +# ) +# +# save_path = paste0( +# "/Users/anguswatters/Desktop/z_source_plots/", +# gsub(".tif", "", unique(new_cs$Z_source)[1]), +# "_vs_", +# unique(old_cs$Z_source)[1], "_", i, ".png" +# ) +# +# message("making plot at \n > '", save_path, "'") +# +# comp_plot <- dplyr::bind_rows( +# new_cs, +# old_cs +# ) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = Z_source +# ), +# size = 3, +# alpha = 0.9 +# ) + +# ggplot2::labs(title = title_str) + +# ggplot2::facet_wrap(hy_id~cs_id) + +# ggplot2::theme_bw() + +# ggplot2::theme( +# plot.title = ggplot2::element_text(size = 14), +# legend.text = ggplot2::element_text(size = 14), +# legend.title = ggplot2::element_text(size = 14, face = "bold") +# ) +# +# ggplot2::ggsave(plot = comp_plot, +# filename = save_path, +# scale = 1 +# ) +# +# } +# +# +# dplyr::bind_rows( +# cs %>% +# dplyr::filter(hy_id %in% subset_ids), +# dem_cs %>% +# dplyr::filter(hy_id %in% subset_ids) +# +# ) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = Z_source +# +# )) + +# ggplot2::labs(title = "al_nwfl_ncei_1 vs hydrofabric3D") + +# ggplot2::facet_wrap(hy_id~cs_id) +# +# cs %>% +# dplyr::filter(hy_id %in% subset_ids) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = point_type +# +# )) + +# ggplot2::facet_wrap(hy_id~cs_id) +# +# +# dem_cs %>% +# dplyr::filter(hy_id %in% subset_ids) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = point_type +# +# )) + +# ggplot2::facet_wrap(hy_id~cs_id) +# +# +# +# +# +# + + + + + + + + + + + + + diff --git a/runners/cs_runner/domain_with_fema2.R b/runners/cs_runner/domain_with_fema2.R new file mode 100644 index 0000000..3d574e0 --- /dev/null +++ b/runners/cs_runner/domain_with_fema2.R @@ -0,0 +1,2245 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) + +# Unique Flowline ID column name +CROSSWALK_ID <- "id" + +# --------------------------------------------------------------------- +# --- Read in flowlines +# --------------------------------------------------------------------- + +# Subsetting area +aoi <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides") +aoi <- rmapshaper::ms_simplify(aoi, keep = 0.05) + +id_col <- "id" +# id_col <- "divide_id" + +# query the conus.gpkg for matching IDs +ids <- unique(aoi[[id_col]]) +query_ids <- paste0(paste0("'", ids, "'"), collapse= ", ") + +gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +layer <- gpkg_layers$name[gpkg_layers$name == "flowpaths"] + +wkt <- + aoi %>% + rmapshaper::ms_dissolve() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sfc() %>% + sf::st_sf() %>% + sf::st_geometry() %>% + sf::st_as_text() + +# read in flowlines based on IDs in AOI +flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths", + wkt_filter = wkt + # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids), + # query = query + ) + +# flines <- +# flines %>% +# dplyr::slice(1:1000) + +# bad_ids <- c("wb-14538", "wb-14686", "wb-14687") +# flines <- +# flines %>% +# dplyr::filter(id %in% bad_ids) +# --------------------------------------------------------------------- +# --- Split flowlines by VPU +# --------------------------------------------------------------------- + +# VPUs polygons +VPU_boundaries <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines)) + +# add a VPU ID column to each flowline +flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID") + +# TODO: improve this, manual remap some VPUIDs so that there are +# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected +flines <- + flines %>% + dplyr::mutate( + VPUID = dplyr::case_when( + VPUID == "12" ~ "11, 12, 13", + VPUID == "12, 13" ~ "11, 12, 13", + VPUID == "11, 12" ~ "11, 12, 13", + VPUID == "01" ~ "01, 02", + TRUE ~ VPUID + ) + ) + +# rm(flines2) +# unnest_ids(flines2$VPUID2) + +# set of unique VPUs + +VPU_IDS <- unnest_ids(flines$VPUID) +# VPU_IDS <- unnest_ids(flines2$VPUID2) +VPU_IDS + +# all possible FEMA dirs +AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ] + +flines %>% + sf::st_drop_geometry() %>% + dplyr::group_by(VPUID) %>% + dplyr::count() + +flines %>% + sf::st_drop_geometry() %>% + dplyr::group_by(order) %>% + dplyr::count() +# unique(flines$VPUID) + +# calculate bankfull width +flines <- + flines %>% + hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + VPUID, + # hy_id = id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + order, + mainstem + ) %>% + hydroloom::rename_geometry("geometry") + + +sf::write_sf( + flines, + paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg") +) + +transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flines$bf_width, + # cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data +) + +transects <- + transects %>% + dplyr::mutate(cs_source = CS_SOURCE) %>% + dplyr::select(id, + cs_source, + cs_id, + cs_measure, + ds_distance, + cs_lengthm, + sinuosity, + geometry + ) + +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre_extension_transects.gpkg") + +sf::write_sf( + transects, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg") +) + +# [1] "wb-2416492" "wb-2414869" "wb-2415765" "wb-2416657" "wb-2415479" "wb-2420307" "wb-2425952" "wb-2421269" "wb-2425750" +# [10] "wb-2422028" "wb-2419212" "wb-2426092" "wb-2414971" "wb-2416120" "wb-2414960" "wb-2433259" "wb-14381" "wb-18022" +# [19] "wb-15173" "wb-15160" "wb-571" "wb-2413739" "wb-508015" "wb-507770" "wb-2424752" "wb-2424545" "wb-2422849" +# [28] "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714" "wb-2419650" "wb-2419650" "wb-2416492" "wb-2414869" "wb-2415765" +# [37] "wb-2416657" "wb-2415479" "wb-2420307" "wb-2419694" "wb-2419694" "wb-2419974" "wb-2419974" "wb-2420065" "wb-2420065" +# [46] "wb-2425952" "wb-2420678" "wb-2420678" "wb-2421269" "wb-2419695" "wb-2419695" "wb-2425750" "wb-2422028" "wb-2419212" +# [55] "wb-2426091" "wb-2414971" "wb-2407999" "wb-2407999" "wb-2426131" "wb-2426131" "wb-2416120" "wb-2433525" "wb-2433525" +# [64] "wb-2433259" "wb-14381" "wb-14576" "wb-14576" "wb-18022" "wb-15173" "wb-15160" "wb-11097" "wb-11097" +# [73] "wb-14804" "wb-14804" "wb-571" "wb-2408123" "wb-2408123" "wb-2413739" "wb-508015" "wb-507770" "wb-2424752" +# [82] "wb-2435791" "wb-2435791" "wb-2435798" "wb-2435798" "wb-2435779" "wb-2435779" "wb-2435781" "wb-2435781" "wb-2424545" +# [91] "wb-2422849" "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714" + +# transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg")) + +# out_transects +# out_transects2 + +transects <- + transects %>% + dplyr::left_join( + flines %>% + dplyr::select(id, VPUID, mainstem, tot_drainage_areasqkm) %>% + sf::st_drop_geometry(), + by = "id" + ) + +# sf::write_sf( +# transects, +# paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg") +# ) + +GROUP_VPU_IDS <- unnest_ids(transects$VPUID) + +# all FEMA dirs for the current area +GROUP_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ] +GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))] + + +# read each FEMA geopackage into a list +fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) +# transects$VPUID %>% unique() + +fema <- + fema %>% + dplyr::bind_rows() %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) + +message("Simplifying FEMA polygons...") +message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + +# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. +# keep 1% of the original points for speed +fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) +# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + +# sf::write_sf( +# fema, +# paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg") +# ) + +message(" - Number of geoms AFTER simplifying: ", nrow(fema)) +message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + +# transects <- +# transects %>% +# dplyr::left_join( +# dplyr::select(sf::st_drop_geometry(flines), +# dplyr::any_of(CROSSWALK_ID), +# mainstem +# ), +# by = CROSSWALK_ID +# ) + +# test_ids <- c("wb-2416492", "wb-2414869", "wb-2415765" , "wb-2416657" , "wb-2415479", +# "wb-2420307" , "wb-2425952" , "wb-2421269" , "wb-2425750", +# "wb-14804", "wb-14804", "wb-571", "wb-2408123", "wb-2408123" +# ) +# test_trans <- +# transects %>% +# dplyr::filter(id %in% test_ids) +# +# test_flines <- +# flines %>% +# dplyr::filter(id %in% test_ids) + +# test_fema +# test_trans$VPUID + +# TODO: make sure this 3000m extension distance is appropriate across VPUs +# TODO: also got to make sure that this will be feasible on memory on the larger VPUs... +# transects <- hydrofabric3D::extend_transects_to_polygons( +ext_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + # transect_lines = test_trans, + # polygons = fema, + # flowlines = test_flines, + crosswalk_id = CROSSWALK_ID, + grouping_id = "mainstem", + max_extension_distance = 3000 +) + +# sf::write_sf( +# flines, +# paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg") +# ) + +# flines <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg")) +# transects <- sf::read_sf(paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg")) +# fema <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg")) +# +# transect_lines <- transects +# polygons <- fema +# flowlines <- flines +# bad_id <- "wb-10813" +# mainstem <- "1977479" +# crosswalk_id = "id" +# grouping_id = "mainstem" +# max_extension_distance = 3000 + + +# hydrofabric3D:::rm_self_intersections(ext_transects) +# transects <- +# transects %>% +ext_transects <- + ext_transects %>% + dplyr::mutate(cs_source = CS_SOURCE) %>% + dplyr::select(id, + cs_source, + cs_id, + cs_measure, + ds_distance, + cs_lengthm, + sinuosity, + geometry + ) + +# ext_transects <- +# ext_transects %>% +# dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, sinuosity, +# # tot_drainage_areasqkm, +# geometry) + +sf::write_sf( + # transects, + ext_transects, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg") +) + +# get cross section point elevations +cs_pts <- hydrofabric3D::cross_section_pts( + # cs = transects, + cs = ext_transects, + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH +) + +# mapview::mapview(cs_pts) + test_trans + +sf::write_sf( + cs_pts, + paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/raw-cs-pts.gpkg") +) + +# DOMAIN_WITH_FEMA_CS_PTS_DIR +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- +# ---------------------------------------------------------------------------------------------------------------- +# cs_pts2 %>% +# dplyr::slice(1:200) %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type") + +cs_pts <- + # cs_pts2 <- + cs_pts %>% + hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% + hydrofabric3D::classify_points( + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +# cs_pts %>% +# dplyr::slice(150:250) %>% +# hydrofabric3D::plot_cs_pts("id", color = "point_type", size = 4) + +# }) + +ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id +# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + +# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") +# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- +# ---------------------------------------------------------------------------------------------------------------- + +system.time({ +fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = test_flines, + # net = flowlines, # original flowline network + # transects = transects, # original transect lines + transects = ext_transects, # original transect lines + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE +) +}) + +# sf::write_sf( +# test_flines, +# "/Users/anguswatters/Desktop/test_flines.gpkg" +# ) +# +# sf::write_sf( +# test_trans, +# "/Users/anguswatters/Desktop/test_start_trans.gpkg" +# ) +# +# sf::write_sf( +# ext_transects, +# "/Users/anguswatters/Desktop/test_ext_trans.gpkg" +# ) +# +# sf::write_sf( +# cs_pts, +# "/Users/anguswatters/Desktop/test_cs_pts.gpkg" +# ) + +# mapview::mapview(fixed_pts) + +# mapview::mapview(ext_transects, color = "green") + +# mapview::mapview(out_transects, color = "red") + +fixed_pts$Z %>% is.null() %>% any() +fixed_pts$Z %>% is.na() %>% any() + +ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- +# ext_transects <- +# ext_transects %>% +# dplyr::mutate(cs_source = CS_SOURCE) + +# end_pts <- +# fixed_pts %>% +# dplyr::filter(id == "wb-2435034", cs_id == 1) %>% +# # hydrofabric3D::plot_cs_pts("id") +# dplyr::slice( +# which.min(pt_id), +# which.max(pt_id) +# ) +# mapview::mapview(end_pts) + tt +# tt <- +# ext_transects %>% +# dplyr::filter(id == "wb-2435034", cs_id == 1) +# hydrofabric3D:::match_transects_to_extended_cs_pts() + +out_transects <- hydrofabric3D:::match_transects_to_extended_cs_pts( + # transect_lines = transects, + transect_lines = ext_transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID + # extension_pct = EXTENSION_PCT +) + +# sf::write_sf( +# out_transects, +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") +# # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +# ) + +# fixed_pts2 <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) +# out_transects2 <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) + +fixed_pts <- + fixed_pts %>% +# fixed_pts2 <- + # fixed_pts %>% + hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% + dplyr::group_by(id) %>% + dplyr::arrange(cs_id, .by_group = TRUE) %>% + dplyr::ungroup() %>% + dplyr::group_by(id, cs_id) %>% + dplyr::arrange(pt_id, .by_group = TRUE) %>% + dplyr::ungroup() + + +out_transects <- + out_transects %>% +# out_transects2 <- +# out_transects %>% + hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% + dplyr::group_by(id) %>% + dplyr::arrange(cs_id, .by_group = TRUE) %>% + dplyr::ungroup() %>% + dplyr::select( + -left_bank_count, + -right_bank_count, + -channel_count, + -bottom_count, + -bottom, + -left_bank, + -right_bank, + -valid_banks, + -has_relief, + -is_extended + ) + +sf::write_sf( + out_transects, + # out_transects2, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") + # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +) + +# sf::write_sf( +# fixed_pts2, +# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg") +# ) + +# transects %>% +# dplyr::group_by(id) %>% +# dplyr::arrange(cs_id, .by_group = TRUE) %>% +# dplyr::ungroup() +# +# out_transects2 %>% +# # dplyr::filter(id %in% c("wb-2435034", "wb-2435045")) %>% +# dplyr::group_by(id) %>% +# dplyr::arrange(cs_id, .by_group = TRUE) %>% +# dplyr::ungroup() + +# sf::write_sf( +# out_transects2, +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") +# # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +# ) +# +# sf::write_sf( +# fixed_pts2, +# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg") +# ) + +# trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id") +# ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id") +# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id") + +# all(trans_uids %in% ext_trans_uids) +# all(ext_trans_uids %in% trans_uids) +# all(trans_uids %in% fixed_pts_uids) +# all(fixed_pts_uids %in% trans_uids) +# all(ext_trans_uids %in% fixed_pts_uids) +# all(fixed_pts_uids %in% ext_trans_uids) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- + +# fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +# out_transects <- hydrofabric3D:::renumber_cs_ids( +# df = dplyr::mutate(out_transects, pt_id = 1), +# crosswalk_id = "hy_id" +# ) %>% +# dplyr::select(-pt_id) +# length(out_transects_uids) +# length(fixed_pts_uids) +# +# out_transects_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id") +# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id") +# +# length(out_transects_uids) == length(fixed_pts_uids) & all(out_transects_uids %in% fixed_pts_uids) & all(fixed_pts_uids %in% out_transects_uids) +# +# fixed_pts2 <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) +# out_transects2 <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) +# +# out_transects2_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects2, "id") +# fixed_pts2_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts2, "id") +# +# length(out_transects2_uids) == length(fixed_pts2_uids) & all(out_transects2_uids %in% fixed_pts2_uids) & all(fixed_pts2_uids %in% out_transects2_uids) + + # paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/extended_transects.gpkg") + +# sf::write_sf( +# out_transects2, +# # "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg" +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/final_transects.gpkg") +# ) + +# object.size(out_transects2) + +# classify the cross section points +fixed_pts <- + fixed_pts %>% +# fixed_pts2 <- + # fixed_pts2 %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief + ) + +# # add Z_source column for source of elevation data +# fixed_pts2 <- +# fixed_pts2 %>% +# dplyr::mutate( +# Z_source = CS_SOURCE +# ) %>% +# dplyr::relocate( +# dplyr::any_of(CROSSWALK_ID), +# cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source +# # class, point_type, +# # bottom, left_bank, right_bank, valid_banks, has_relief +# ) + +ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Aligning banks and smoothing bottoms...") +fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID) + +ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Reclassifying cross section points...") + +fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") +} else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") +} + +if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") +} else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") +} + +if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") +} else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") +} + +# sf::write_sf( +# out_transects, +# paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") +# ) +# + +arrow::write_parquet( + fixed_pts, + paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet") +) +# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet") + + +# --------------------------------------------------------------------------------- +# ---- Inject ML predicted top widths / Dingman's R ---- +# --------------------------------------------------------------------------------- + +# --------------------------------------------------------------------------------- +# ---- Read in ML data ---- +# --------------------------------------------------------------------------------- + +VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) + +ML_CROSSWALK_ID <- "id" + +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() %>% +# dplyr::select( +# dplyr::any_of(ML_CROSSWALK_ID), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +# ml_outputs <- +# ml_outputs %>% +# dplyr::distinct( +# dplyr::across(dplyr::any_of(CROSSWALK_ID)), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) + +# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +# rm(ml_outputs, ml) +ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml") + +ml_outputs <- + ml_outputs %>% + dplyr::select( + dplyr::any_of(ML_CROSSWALK_ID), + vpuid, + owp_y_bf = YCC, + owp_y_inchan = Y, + owp_tw_bf = TopWdthCC, + owp_tw_inchan = TopWdth, + owp_dingman_r = dingman_r + ) + + +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% + dplyr::distinct( + dplyr::across(dplyr::any_of(CROSSWALK_ID)), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# --------------------------------------------------------------------------------- +# ---- Read in CS PTS data ---- +# --------------------------------------------------------------------------------- + +# CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet") + +cs_pts <- arrow::read_parquet(CS_PTS_OUTPUT_PATH) + +# --------------------------------------------------------------------------------- +# ---- Join CS PTS with ML data --- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + +# ml_outputs %>% +# dplyr::group_by(id) %>% +# dplyr::count(id) %>% +# dplyr::arrange(-n) + +# join the ML outputs data to the cross section points +cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_outputs, + dplyr::any_of(CROSSWALK_ID), + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r + ), + by = CROSSWALK_ID + ) + +# --------------------------------------------------------------------------------- +# ---- Fixing negative depths/widths estimates ---- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + +cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID) + +# TODO: for now we replace any negative TW values with the length of the bottom of the cross section +# TODO: This method + the negative model output values both need to be looked into (04/05/2024) +cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c(CROSSWALK_ID, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + +# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data +# TODO: look at this stuff with Arash (04/09/2024) +missing_cs <- + cs_pts %>% + dplyr::filter( + is.na(.data[[CROSSWALK_ID]]) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r) + ) %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) + +# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" +# readr::write_csv( +# dplyr::select(missing_cs, -tmp_id), +# paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") +# ) + +# Split the cross sections into 2 groups: +# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model +# - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model +inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + +bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + + +# sanity check that all rows are accounted for after splitting up data +split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)) +# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + +if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) +} + +message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") +# tmp <- +# inchannel_cs %>% +# dplyr::slice(1:10000) +system.time({ + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = tmp, + crosswalk_id = CROSSWALK_ID + ) + +}) + + + +# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") +# ml_subset %>% +# dplyr::filter(hy_id == "wb-1005207") %>% +# dplyr::select(owp_y_inchan, owp_tw_inchan) %>% +# .$owp_y_inchan +message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") + +system.time({ + + # Add bathymetry using "bankful" estimates + bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + # cross_section_pts = dplyr::slice(bankful_cs, 1:10000), + crosswalk_id = CROSSWALK_ID + ) + +}) + +# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points +final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + # inchannel_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + bankful_cs, + # bankful_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + dplyr::mutate( + missing_cs, + is_dem_point = FALSE + ), + # -hf_id, + # -is_dem_point, + -dplyr::starts_with("owp"), + -tmp_id + ) + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet") +# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet") + + +message(round(Sys.time()), " - Reclassifying cross section point types...") + +# reclassify +final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID) +ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID) + +has_same_number_of_uids <- length(starting_uids) == length(ending_uids) +all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) +all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + +# throw some warnings if: +# - the number of uids in the input is different from the output +# - there are missing hy_id/cs_id +if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) +} + +if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + +} + +# tmp_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# message("saving file", tmp_path) +# +# final_cs <- arrow::read_parquet(tmp_path) +# +# # save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet) +# arrow::write_parquet( +# dplyr::select(final_cs, +# -is_dem_point +# ), +# paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# ) + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# --------------------------------------------------------------------------------- + +# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet") +CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'") + +# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet) +arrow::write_parquet( + dplyr::select(final_cs, + -is_dem_point + ), + CROSS_SECTIONS_OUTPUT_PATH +) + +INTERNAL_CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections-is-dem-point.parquet") + +arrow::write_parquet( + final_cs, + INTERNAL_CROSS_SECTIONS_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Substitue diffusive domain DEMs Z values ---- +# --------------------------------------------------------------------------------- + +final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") + +bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) { + r <- terra::rast(path) + extent <- terra::ext(r) + # r <- terra::project(r, "EPSG:5070") + # terra::set.crs(r, "EPSG:5070") + ext_df <- data.frame(lapply(extent, function(i) {i})) + ext_df$crs <- terra::crs(r) + ext_df$file <- basename(path) + ext_df$path <- path + + return(ext_df) +}) %>% + dplyr::bind_rows() + +# final_cs + +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross-sections-is-dem-point.parquet") +# arrow::write_parquet( +# final_cs, +# INTERNAL_CROSS_SECTIONS_PATH +# ) + +# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet") +# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") +# +# arrow::write_parquet( +# final_cs, +# START_PATH +# ) + +# rm(i, count, cs, df, tmp, aoi, new_tmp, pts_subset, dem, has_pts_in_bb, bb, updated_depths) + + +# CROSS_SECTIONS_OUTPUT_PATH +# START_CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +START_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +UPDATED_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet") + +count <- 0 + +for (i in 1:nrow(bb_df)) { + + # i = 1 + + is_first_iter <- count == 0 + count <- count + 1 + CURR_PATH <- ifelse(is_first_iter, START_PATH, UPDATED_PATH) + + # CURR_PATH + + message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...") + + df <- bb_df[i, ] + + START_EPSG_CODE <- 5070 + + cs <- arrow::read_parquet(CURR_PATH) + + cs <- + cs %>% + # dplyr::slice(1:10000) %>% + sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE) + + # convert to bounding box CRS + cs <- + cs %>% + sf::st_transform(df$crs[1]) + + # create bounding box shape + bb <- sf::st_as_sf( + sf::st_as_sfc( + sf::st_bbox( + c(xmin = df$xmin, + xmax = df$xmax, + ymax = df$ymax, + ymin = df$ymin + ), + crs = sf::st_crs(df$crs) + ) + ) + ) + + # get pts that are in the bounding box + pts_subset <- sf::st_filter( + cs, + bb + ) + + has_pts_in_bb <- nrow(pts_subset) > 0 + + message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box") + + if(!has_pts_in_bb) { + + message(" > No points to update!") + message(" > Overwritting original cross section points parquet with updated depth values ...") + message(" > '", UPDATED_PATH, "'") + + cs <- + cs %>% + sf::st_transform(START_EPSG_CODE) %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, + Z, + class, point_type, + Z_source, + bottom, left_bank, right_bank, valid_banks, + has_relief + ) + + arrow::write_parquet(cs, UPDATED_PATH) + + next + } + + if(has_pts_in_bb) { + + message(" > Loading Raster") + + # load DEM + dem <- terra::rast(df$path) + + message(" > Extracting new cross section depth values from DEM...") + + updated_depths <- + pts_subset %>% + dplyr::mutate( + Z2 = hydrofabric3D:::extract_pt_val(dem, .), + Z_source2 = df$file + # Z = extract_pt_val(terra::rast(dem), .) + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + # Z, + Z2, + # Z_source, + Z_source2 + ) + + message(" > Replacing old depth values with new depth values...") + cs <- + cs %>% + dplyr::left_join( + updated_depths, + by = c(CROSSWALK_ID, "cs_id", "pt_id") + ) %>% + dplyr::mutate( + Z = dplyr::case_when( + !is.na(Z2) ~ Z2, + TRUE ~ Z + ), + Z_source = dplyr::case_when( + !is.na(Z_source2) ~ Z_source2, + TRUE ~ Z_source + ) + ) %>% + dplyr::select(-Z2, -Z_source2) + + message(" > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...") + cs <- + cs %>% + sf::st_transform(START_EPSG_CODE) + + message(" > Dropping point geometries and preserving X / Y coordinates...") + + cs <- + cs %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, + Z, + class, point_type, + Z_source, + bottom, left_bank, right_bank, valid_banks, + has_relief + + # newly added columns (03/06/2024) + ) + + + # cs %>% + # dplyr::slice(1:27) %>% + # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4) + # hydrofabric3D::classify_points(crosswalk_id = "id") + # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4) + + # cs <- + # cs %>% + # # dplyr::slice(1:27) %>% + # dplyr::select(-class, -point_type, + # -valid_banks, -has_relief, + # -bottom, -left_bank, -right_bank + # ) %>% + # hydrofabric3D::classify_points(crosswalk_id = "id") %>% + # dplyr::select( + # # hy_id, + # dplyr::any_of(CROSSWALK_ID), + # cs_id, pt_id, + # Z, + # relative_distance, + # cs_lengthm, + # class, + # point_type, + # X, Y, + # Z_source, + # bottom, left_bank, right_bank, + # valid_banks, has_relief + # # newly added columns (03/06/2024) + # ) + # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4) + + # tmp <- final_cs %>% dplyr::filter(id == "wb-507785", cs_id == 4) + # new_tmp <- cs %>% dplyr::filter(id == "wb-507785", cs_id == 4) + + # hydrofabric3D::plot_cs_pts(tmp, crosswalk_id = "id", color = "point_type", size = 4) + # hydrofabric3D::plot_cs_pts(new_tmp, crosswalk_id = "id", color = "point_type", size = 4) + + message(" > Overwritting original cross section points parquet with updated depth values ...") + message(" > '", UPDATED_PATH, "'") + + arrow::write_parquet(cs, UPDATED_PATH) + + } + +} + +cross_sections <- arrow::read_parquet(UPDATED_PATH) + +# cross_sections %>% +# dplyr::select(Z_source) %>% +# dplyr::group_by(Z_source) %>% +# dplyr::count() + +cross_sections <- + cross_sections %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# reclassify +# system.time({ +cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +cross_sections <- + cross_sections %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + Z, + relative_distance, + cs_lengthm, + class, + point_type, + X, Y, + Z_source, + bottom, left_bank, right_bank, + valid_banks, has_relief + # newly added columns (03/06/2024) + ) +# }) + +# START_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +# UPDATED_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet") + +arrow::write_parquet( + cross_sections, + paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet") + ) + +# ------------------------------------------------- +# ---- Move final datasets to outputs/ ---- +# ------------------------------------------------- + +select_cs_pts <- function(cs_pts, crosswalk_id = NULL) { + + if(is.null(crosswalk_id)) { + # crosswalk_id <- 'hydrofabric_id' + stop("Please provide a valid 'crosswalk_id' which uniquely identifies each cross section in 'cs_pts'") + } + + cs_pts <- + cs_pts %>% + dplyr::select( + dplyr::any_of(c( + crosswalk_id, + "cs_id", + "pt_id", + "relative_distance", + "cs_lengthm", + "X", + "Y", + "Z", + "Z_source", + "class", + "point_type", + "valid_banks", + "has_relief" + ) + ) + ) + + return(cs_pts) +} + +select_transects <- function(transects, crosswalk_id = NULL) { + + if(is.null(crosswalk_id)) { + # crosswalk_id <- 'hydrofabric_id' + stop("Please provide a valid 'crosswalk_id' which uniquely identifies the flowline associated with each transect in 'transects'") + } + + transects <- hydroloom::rename_geometry(transects, "geometry") + + transects <- + transects %>% + dplyr::select( + dplyr::any_of(c( + crosswalk_id, + "cs_source", + "cs_id", + "cs_measure", + "cs_lengthm", + "geometry" + ) + ) + ) + + return(transects) +} + + +# arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% +# select_cs_pts("id") + +# DEM based cross section points parquet +# - FEMA extended +# - CS based extensions +arrow::write_parquet( + arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% + select_cs_pts("id"), + # arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet") +) + +# DEM based transect geometry gpkg +# - FEMA extended +# - CS based extensions +sf::write_sf( + sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) %>% + select_transects("id"), + # dplyr::rename(geometry = geom), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") +) + + +# DEM + ML cross section points parquet +# - FEMA extended +# - CS based extensions +# - ML based Z value updates +arrow::write_parquet( + arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet")) %>% + select_cs_pts("id"), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/ml_cross-sections.parquet") +) + +# DEM + COASTAL BATHY cross section points parquet +# - FEMA extended +# - CS based extensions +# - ML based Z value updates +# - Coastal bathy DEM Z value updates (where applicable) +arrow::write_parquet( + arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet")) %>% + select_cs_pts("id"), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/coastal-bathy_cross-sections.parquet") +) + +# ------------------------------------------------- +# ---- Data validation ---- +# ------------------------------------------------- + +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") +# new_trans_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") + +# sf::st_layers(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) +# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")) + +# og_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) +# new_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")) + +# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR + +output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T) +# rm(transects, dropped_trans, cs_pts, flowlines, lengths_check, transect_lengths, rel_dist_check, wrong_lengths, mismatches, duplicate_ids, cs_pt_lengths, cs_pts_ids) + +tmp_ids_list <- lapply(output_files, function(i) { + is_gpkg <- endsWith(i, ".gpkg") + is_parquet <- endsWith(i, ".parquet") + + if(is_gpkg) { + x <- + i %>% + sf::read_sf() %>% + hydrofabric3D::get_unique_tmp_ids(x = "id") + return(x) + } + + if(is_parquet) { + x <- + i %>% + arrow::read_parquet() %>% + hydrofabric3D::get_unique_tmp_ids(x = "id") + return(x) + } + return(NULL) + + + }) + + +tmp_ids_list + +# tmp_ids_list[[1]] %in% tmp_ids_list[[2]] + +res <- list() +idxs <- seq_along(tmp_ids_list) + +for (i in seq_along(tmp_ids_list)) { + # idxs <- seq_along(tmp_ids_list) + # i = 1 + curr <- tmp_ids_list[[i]] + + other_idxs <- idxs[idxs != i] + + in_all_others <- lapply(tmp_ids_list[other_idxs], function(k) { + all(k %in% curr) & all(curr %in% k) + }) %>% + unlist() %>% + all() + res[[i]] <- in_all_others +} + +all_ids_are_matching <- + res %>% + unlist() %>% + all() + +# validate_transects(transects, "id") +if (!all_ids_are_matching) { + warning("Not all id/cs_id are included in all transects / cross section point datasets") +} else { + + message("All id / cs_id are correctly within all transects / cross section points datasets") +} + +trans_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") +transects <- sf::read_sf(trans_path) %>% + dplyr::rename(geometry = geom) + +is_validated_transects <- validate_transects(transects, "id") + +is_flowline_validated_transects <- validate_transects_against_flowlines(transects, flines, "id") + +if (!(is_validated_transects && is_flowline_validated_transects)) { + stop(trans_path, " transects failed validity check:", + "\n > is_validated_transects: ", is_validated_transects, + "\n > is_flowline_validated_transects: ", is_flowline_validated_transects + ) +} else { + message("Transects look good to go!") +} + +output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T) +cs_pts_files <- output_files[grepl(".parquet", output_files)] + +for (i in seq_along(cs_pts_files)) { + path <- cs_pts_files[i] + message(i, " - Checking cross section points validity for > '", basename(path), "'") + + cs_pts <-arrow::read_parquet(path) + + is_validated_cs_pts <- validate_cs_pts(cs_pts, crosswalk_id = "id") + is_transect_validated_cs_pts <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id") + + is_valid <- is_validated_cs_pts & is_transect_validated_cs_pts + + if(!is_valid) { + stop(path, " cs_pts failed validity check:", + "\n > is_validated_cs_pts: ", is_validated_cs_pts, + "\n > is_transect_validated_cs_pts: ", is_transect_validated_cs_pts + ) + } else { + message("'", basename(path), "' is valid and is good to go!") + } + + # message("'", basename(path), "' is validated") + + } + +# og_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg")) +# post_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg")) +# dplyr::filter(transects, id == "wb-10813") %>% +# sf::st_buffer(2500) +# +# bb <- +# dplyr::filter(transects, id == "wb-10813") %>% +# sf::st_buffer(5000) %>% +# sf::st_bbox() %>% +# sf::st_as_sfc() %>% +# sf::st_sf() +# +# sf::st_filter(transects, bb) +# sf::st_filter(transects, bb) +# sf::st_filter(transects, bb) +# +# mapview::mapview(dplyr::filter(flines, id == "wb-10813")) + +# mapview::mapview( sf::st_filter(transects, bb) , color = "green") + +# mapview::mapview( sf::st_filter(og_trans, bb) , color = "red") + +# mapview::mapview( sf::st_filter(post_trans, bb) , color = "gold") +# +# transects %>% +# sf::st_filter( +# bb +# ) %>% mapview::mapview() +# +# mapview::mapview() + mapview::mapview(dplyr::filter(flines, id == "wb-10813")) + +# mapview::mapview( dplyr::filter(transects, id == "wb-10813"), color = "green") + +# mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red") +# +# +# dplyr::filter(og_trans, id == "wb-10813") +# dplyr::filter(post_trans, id == "wb-10813") +# dplyr::filter(transects, id == "wb-10813") +# +# mapview::mapview(dplyr::filter(flines, id == "wb-10813")) + +# mapview::mapview( dplyr::filter(transects, id == "wb-10813"), color = "green") + +# mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red") +# +# transects %>% +# dplyr::group_by(id) %>% +# dplyr::filter(cs_id == max(cs_id)) %>% +# dplyr::arrange(cs_id) +# +# # cs_pts_files <- +# cs_pts <-arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet")) +# +# is_validated_transects <- validate_transects(transects, "id") +# is_flowline_validated_transects <- validate_transects_against_flowlines(transects, flines, "id") +# +# is_validated_cs_pts <- validate_cs_pts(cs_pts, crosswalk_id = "id") +# is_transect_validated_cs_pts <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id") + +# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))_ + +validate_transects_self_intersections <- function(transects) { + return( + nrow(transects) == nrow(hydrofabric3D:::rm_self_intersections(transects)) + ) +} + +validate_transects_cs_id_enumeration <- function(transects, crosswalk_id = NULL) { + + # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered + mismatches <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::arrange(cs_measure, .by_group = TRUE) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::filter(cs_id != new_cs_id) + + # FALSE if there are any transects with different cs_ids to the newly created cs_id + # Otherwise TRUE + has_valid_cs_ids <- !(nrow(mismatches) > 0) + + return( + has_valid_cs_ids + ) + +} + +validate_transects_cs_length <- function(transects, crosswalk_id = NULL) { + + # re calculate transect geometry length and compare to cs_lengthm column + wrong_lengths <- + transects %>% + dplyr::mutate( + new_cs_length = as.numeric(sf::st_length(.)) + ) %>% + dplyr::filter(cs_lengthm != new_cs_length) + + # FALSE if there are any transects with different cs_lengthm than the freshly calculated new_cs_length + has_correct_lengths <- !(nrow(wrong_lengths) > 0) + + return( + has_correct_lengths + ) + +} + +validate_transects_unique_ids <- function(transects, crosswalk_id = NULL) { + + duplicate_ids <- + transects %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::select(tmp_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::count() %>% + dplyr::ungroup() %>% + dplyr::filter(n > 1) + + # FALSE if there are ANY rows in the duplicate_ids dataframe above + # (i.e. a count of greater than 1 for any tmp_id (_cs_id)) + has_unique_ids <- !(nrow(duplicate_ids) > 0) + + return( + has_unique_ids + ) + +} + +validate_transects_cs_measure <- function(transects) { + + min_cs_measure <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::pull(cs_measure) %>% + min() + + max_cs_measure <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::pull(cs_measure) %>% + max() + + # cs_measure should always be: + # greater than or equal to 0 AND + # less than or equal to 100 as its a percentage along a flowline + has_valid_cs_measure <- min_cs_measure >= 0 & max_cs_measure <= 100 + + return( + has_valid_cs_measure + ) + +} + +validate_transects_has_complete_geometries <- function(transects, crosswalk_id = NULL) { + + has_empty_geoms <- + transects %>% + sf::st_is_empty() %>% + any() + + return( + !has_empty_geoms + ) + +} + +validate_transects_has_crs <- function(transects) { + + missing_crs <- + transects %>% + sf::st_crs() %>% + is.na() + + return( + !missing_crs + ) + +} + +validate_transects <- function(transects, + crosswalk_id = NULL + ) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry") + + # validate dataframe has all correct columns + has_all_valid_cols <- hydrofabric3D:::validate_df( + x = transects, + cols = REQUIRED_COLS, + obj_name = "transects" + ) + + # Validate every flowline (id) has a cs_id of 1:number of transects + has_valid_cs_ids <- validate_transects_cs_id_enumeration(transects, crosswalk_id = crosswalk_id) + + # Validate there are no self intersections + has_no_self_intersections <- validate_transects_self_intersections(transects) + + # validate the cs_lengthm column equals the actual transect geometry length + has_correct_lengths <- validate_transects_cs_length(transects, crosswalk_id = crosswalk_id) + + # validate no duplicate id / cs_id combos + has_unique_ids <- validate_transects_unique_ids(transects, crosswalk_id = crosswalk_id) + + # validate cs measure is never greater than 100 (i think) + has_valid_cs_measure <- validate_transects_cs_measure(transects) + + # make sure transects have no empty geometries + has_complete_geoemetries <- validate_transects_has_complete_geometries(transects) + + # make sure transects have a CRS + has_crs <- validate_transects_has_crs(transects) + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_validated_transects <- all( + c( + has_all_valid_cols, + has_valid_cs_ids, + has_no_self_intersections, + has_correct_lengths, + has_unique_ids, + has_valid_cs_measure, + has_complete_geoemetries, + has_crs + ) + ) + + return(is_validated_transects) + +} + +# all ids in transects are in flowlines +validate_transects_ids_in_flowlines <- function(transects, flowlines, crosswalk_id = NULL) { + + # flowlines <- flines + + transect_ids <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id)) %>% + dplyr::pull(dplyr::any_of(crosswalk_id)) %>% + unique() + + flowline_ids <- + flowlines %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id)) %>% + dplyr::pull(dplyr::any_of(crosswalk_id)) %>% + unique() + + all_transect_ids_in_flowline_ids <- all(transect_ids %in% flowline_ids) + + return( + all_transect_ids_in_flowline_ids + ) + +} + +# make sure no transect crosses more than a single flowline, a single time +validate_transects_flowline_intersections <- function(transects, flowlines) { + + # flowlines <- flines + return ( + nrow(transects) == nrow(hydrofabric3D:::rm_multiflowline_intersections(transects, flowlines)) + ) +} + +# validate 2 SF objects have the same CRS +validate_same_crs <- function(x, y) { + + return ( + sf::st_crs(x) == sf::st_crs(y) + ) + +} + +validate_transects_against_flowlines <- function(transects, + flowlines, + crosswalk_id = NULL + ) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry") + + # validate dataframe has all correct columns + has_all_valid_cols <- hydrofabric3D:::validate_df( + x = transects, + cols = REQUIRED_COLS, + obj_name = "transects" + ) + + # all ids in transects are in flowlines + all_transect_ids_in_flowline_ids <- validate_transects_ids_in_flowlines(transects, flowlines, crosswalk_id = crosswalk_id) + + # transects only intersects a single flowline, a single time + has_valid_flowline_intersects <- validate_transects_flowline_intersections(transects, flowlines) + + # transects and flowlines have the same CRS + has_same_crs <- validate_same_crs(transects, flowlines) + + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_flowline_validated_transects <- all( + c( + has_all_valid_cols, + all_transect_ids_in_flowline_ids, + has_valid_flowline_intersects, + has_same_crs + ) + ) + + return(is_flowline_validated_transects) + +} + +# validate_transects_against_cs_pts <- function( +# transects, +# cs_pts, +# crosswalk_id = NULL +# ) { +# +# # # standardize geometry name +# # transects <- hydroloom::rename_geometry(transects, "geometry") +# +# REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry") +# +# # validate dataframe has all correct columns +# has_all_valid_cols <- hydrofabric3D:::validate_df( +# x = transects, +# cols = REQUIRED_COLS, +# obj_name = "transects" +# ) +# } + +validate_cs_pts_cs_id_enumeration <- function(cs_pts, crosswalk_id = NULL) { + + # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered + mismatches <- + cs_pts %>% + # dplyr::slice(1:150) %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::arrange(cs_id, .by_group = TRUE) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::filter(cs_id != new_cs_id) + + # FALSE if there are any transects with different cs_ids to the newly created cs_id + # Otherwise TRUE + has_valid_cs_ids <- !(nrow(mismatches) > 0) + + return( + has_valid_cs_ids + ) + +} + +validate_cs_pts_pt_id_enumeration <- function(cs_pts, crosswalk_id = NULL) { + + # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section + mismatches <- + cs_pts %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::arrange(pt_id, .by_group = TRUE) %>% + dplyr::mutate( + new_pt_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::filter(pt_id != new_pt_id) + + # FALSE if there are any cs_pts with pt_ids different from the newly created new_pt_id + # Otherwise TRUE + has_valid_pt_ids <- !(nrow(mismatches) > 0) + + return( + has_valid_pt_ids + ) + +} + +validate_cs_pts_relative_distance <- function(cs_pts, crosswalk_id = NULL) { + + # make sure relative distance is greater than or equal to 0 + min_relative_distance <- + cs_pts %>% + # dplyr::slice(1:50) %>% + sf::st_drop_geometry() %>% + dplyr::pull(relative_distance) %>% + min() + + + # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section + rel_dist_check <- + cs_pts %>% + # dplyr::slice(1:50) %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::summarise( + cs_lengthm = max(cs_lengthm), + # min_rel_dist = min(relative_distance), + max_rel_dist = max(relative_distance) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + # TODO: as long as the lengths are within 1 meter, thats equal + is_valid_relative_dist = abs(cs_lengthm - max_rel_dist) <= 1 + # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01) + ) + + # dplyr::filter(max_rel_dist > cs_lengthm) + + # relative distance is always greater than or equal to 0 and less than the cross sections length + has_valid_relative_dist_min <- min_relative_distance >= 0 + has_valid_relative_dist_maximums <- all(rel_dist_check$is_valid_relative_dist) + + return( + has_valid_relative_dist_min && has_valid_relative_dist_maximums + ) + +} + +validate_cs_pts_point_types <- function(cs_pts) { + + # make sure only "left_bank", "right_bank", "channel", and "bottom" values exist in cs_pts point_type column + valid_point_types <- c("left_bank", "right_bank", "channel", "bottom") + + # unique point types in cs_pts + unique_point_types <- unique(cs_pts$point_type) + + has_only_valid_point_types <- all(unique_point_types %in% valid_point_types) + + return( + has_only_valid_point_types + ) + +} + +validate_cs_pts <- function( + cs_pts, + crosswalk_id = NULL +) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", + "relative_distance", "cs_lengthm", "X", "Y", "Z", "Z_source", + "class", "point_type", "valid_banks", "has_relief" + ) + + # validate dataframe has all correct columns + has_all_valid_cols <- hydrofabric3D:::validate_df( + x = cs_pts, + cols = REQUIRED_COLS, + obj_name = "cs_pts" + ) + + # make sure valid cs_ids + has_valid_cs_pts_cs_ids <- validate_cs_pts_cs_id_enumeration(cs_pts, crosswalk_id = crosswalk_id) + + # make sure valid pt_ids + has_valid_cs_pts_pt_ids <- validate_cs_pts_pt_id_enumeration(cs_pts, crosswalk_id = crosswalk_id) + + # check cs_pts have only valid relative_distance values + has_valid_relative_distances <- validate_cs_pts_relative_distance(cs_pts, crosswalk_id = crosswalk_id) + + has_valid_point_types <- validate_cs_pts_point_types(cs_pts) + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_validated_cs_pts <- all( + c( + has_all_valid_cols, + has_valid_cs_pts_cs_ids, + has_valid_cs_pts_pt_ids, + has_valid_relative_distances, + has_valid_point_types + ) + ) + + return(is_validated_cs_pts) + +} + +# validate all cs_pts id/cs_ids are in the transects +validate_cs_pt_ids_in_transects <- function(cs_pts, transects, crosswalk_id = NULL) { + + # flowlines <- flines + cs_pts_ids <- + cs_pts %>% + sf::st_drop_geometry() %>% + hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id) + + transect_ids <- + transects %>% + sf::st_drop_geometry() %>% + hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id) + + + all_cs_pts_ids_in_transects <- all(cs_pts_ids %in% transect_ids) + all_transect_ids_in_cs_pts <- all(transect_ids %in% cs_pts_ids) + same_number_of_ids <- length(cs_pts_ids) == length(transect_ids) + + is_valid_cs_pts_ids <- all( + c( + all_cs_pts_ids_in_transects, + all_transect_ids_in_cs_pts, + same_number_of_ids + ) + ) + + return( + is_valid_cs_pts_ids + ) + +} + +validate_cs_pts_length_against_transects <- function(cs_pts, transects, crosswalk_id = NULL) { + + cs_pt_lengths <- + cs_pts %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::rename( + cs_pts_lengthm = cs_lengthm + ) + + transect_lengths <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() + + lengths_check <- + dplyr::left_join( + transect_lengths, + cs_pt_lengths, + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::mutate( + # TODO: as long as the lengths are within 1 meter, thats equal + approx_equal_lengths = abs(cs_lengthm - cs_pts_lengthm) <= 1 + # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01) + ) + + all_lengths_are_equal <- all(lengths_check$approx_equal_lengths) + + return( + all_lengths_are_equal + ) + +} + +validate_cs_pts_against_transects <- function( + cs_pts, + transects, + crosswalk_id = NULL +) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + # make sure all id/cs_id combos are in both transects and cs_pts + has_valid_cs_pts_ids <- validate_cs_pt_ids_in_transects(cs_pts, transects, crosswalk_id = crosswalk_id) + + # make sure cs_lengthm matches from transects to cs_pts + has_matching_lengths <- validate_cs_pts_length_against_transects(cs_pts, transects, crosswalk_id = crosswalk_id) + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_transect_validated_cs_pts <- all( + c( + has_valid_cs_pts_ids, + has_matching_lengths + ) + ) + + return(is_transect_validated_cs_pts) + +} + +# og_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) +# new_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")) + +# new_transects %>% +# dplyr::arrange(cs_lengthm) %>% +# dplyr::slice(1:1000) %>% +# mapview::mapview() +# +# hydrofabric3D:::rm_self_intersections(new_transects) + +# -------------------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------------------------- +# +og_trans <- sf::read_sf( "/Users/anguswatters/Desktop/lynker-spatial/domain_with_fema/transects/pre_extension_transects.gpkg") + +transects <- sf::read_sf(output_files[[4]]) + +transects2 <- hydrofabric3D:::rm_self_intersections(transects) + +old_ids <- transects %>% hydrofabric3D::get_unique_tmp_ids("id") +new_ids <- transects2 %>% hydrofabric3D::get_unique_tmp_ids("id") +diff_tmp_ids <- old_ids[!old_ids %in% new_ids] +strsplit(diff_ids, "_") + +diff_ids <- lapply(diff_tmp_ids, function(i) { + strsplit(i, "_")[[1]][1] +}) %>% unlist() + +diff_trans <- + transects %>% + # hydrofabric3D::add_tmp_id(x = "id") %>% + dplyr::filter(id %in% diff_ids) + # sf::st_buffer(50) %>% + # sf::st_bbox() %>% + # sf::st_as_sfc() %>% + # sf::st_sf() + +og_diff_trans <- + og_trans %>% + # hydrofabric3D::add_tmp_id(x = "id") %>% + dplyr::filter(id %in% diff_ids) + +mapview::mapview(diff_trans, color = "green") + + mapview::mapview(og_diff_trans, color = "red") + +old_trans <- transects[lengths(sf::st_intersects(transects, diff_bb)) > 0, ] + +# final_cross_sections %>% +# dplyr::filter(id == "wb-1000") %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(color = "point_type") + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# ---- Diffusive Domain DEM + FEMA + ML +# --------------------------------------------------------------------------------- + +CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet") + +message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'") +# transects <- +# sum(is.na(final_cross_sections$id)) +# sum(is.na(final_cross_sections$cs_id)) +# sum(is.na(final_cross_sections$pt_id)) +# sum(is.na(final_cross_sections$X)) + +# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet) +arrow::write_parquet( + # dplyr::select(final_cs, + # -is_dem_point + # ), + cross_sections, + CROSS_SECTIONS_ML_OUTPUT_PATH +) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/runners/cs_runner/download_ml_outputs.R b/runners/cs_runner/download_ml_outputs.R new file mode 100644 index 0000000..a3751cc --- /dev/null +++ b/runners/cs_runner/download_ml_outputs.R @@ -0,0 +1,16 @@ +# download_ml_outputs.R +# This script pulls the ML outputs data from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" + +# load config variables +source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Get ML outputs data from S3 bucket ---- +# --------------------------------------------------------------------------- + +ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI))) + +message("Copying S3 object:\n", ML_OUTPUTS_S3_URI) +system(ml_copy_cmd) +message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!") +message("------------------") diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R index a1aa30a..9c455ee 100644 --- a/runners/cs_runner/download_nextgen.R +++ b/runners/cs_runner/download_nextgen.R @@ -1,42 +1,28 @@ -# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "base_dir" -# base_dir is defined within runners/workflow/root_dir.R +# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R # load config variables source("runners/cs_runner/config_vars.R") -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" - -# nextgen bucket name -prerelease_prefix <- "s3://lynker-spatial/pre-release/" - -# nextgen model attributes folder in S3 bucket with parquet files -model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/") - # directory to copy nextgen bucket data too -nextgen_dir <- paste0(base_dir, "/pre-release/") +NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) # create the directory if it does NOT exist -if(!dir.exists(nextgen_dir)) { - message("Directory does not exist at: \n\t'", nextgen_dir, "'\nCreating directory at: \n\t'", nextgen_dir, "'") +if(!dir.exists(NEXTGEN_DIR)) { + message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'") - dir.create(nextgen_dir) + dir.create(NEXTGEN_DIR) } -# model attributes directory -model_attr_dir <- paste0(base_dir, "/model_attributes/") - -# create the directory if it does NOT exist -if(!dir.exists(model_attr_dir)) { - message("Directory does not exist at: \n\t'", model_attr_dir, "'\nCreating directory at: \n\t'", model_attr_dir, "'") - dir.create(model_attr_dir) -} +# --------------------------------------------------------------------------- +# ---- List/Get the nextgen gpkgs from S3 bucket ---- +# --------------------------------------------------------------------------- # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern command <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information - S3_BUCKET="', prerelease_prefix, '" - DESTINATION_DIR=', nextgen_dir, ' + S3_BUCKET="', S3_BUCKET_NEXTGEN_DIR_URI, '" + DESTINATION_DIR=', NEXTGEN_DIR, ' # Regular expression pattern to match object keys PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\.gpkg$" @@ -54,8 +40,8 @@ bucket_keys <- system(command, intern = TRUE) # Parse the selected S3 objects keys and copy them to the destination directory for (key in bucket_keys) { - copy_cmd <- paste0('aws s3 cp ', prerelease_prefix, key, " ", nextgen_dir, key) - message("Copying S3 object:\n", paste0(prerelease_prefix, key)) + copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_NEXTGEN_DIR_URI, key, " ", NEXTGEN_DIR, key) + message("Copying S3 object:\n", paste0(S3_BUCKET_NEXTGEN_DIR_URI, key)) system(copy_cmd) @@ -63,37 +49,51 @@ for (key in bucket_keys) { message("------------------") } -# ---- Get nextgen model attributes parquets ---- -# aws s3 ls s3://lynker-spatial/v20/3D/model_attributes/ - -# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern -list_model_attr_cmd <- paste0('#!/bin/bash - # AWS S3 Bucket and Directory information - S3_BUCKET="', model_attr_prefix, '" - - # Regular expression pattern to match object keys - PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\_model_attributes.parquet$" - - # AWS CLI command to list objects in the S3 bucket and use grep to filter them - S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") - - echo "$S3_OBJECTS"' -) - -# get a list of the model attributes objects in S3 -model_attr_keys <- system(list_model_attr_cmd, intern = TRUE) - -# Parse the selected S3 objects keys and copy them to the destination directory -for (key in model_attr_keys) { - - copy_cmd <- paste0('aws s3 cp ', model_attr_prefix, key, ' ', model_attr_dir, key) - - message("Copying S3 object:\n", paste0(model_attr_prefix, key)) - - system(copy_cmd) - - message("Download '", paste0(model_attr_prefix, key), "' complete!") - message("------------------") -} - - +# # --------------------------------------------------------------------------- +# # ---- List/Get reference features from S3 bucket ---- +# # --------------------------------------------------------------------------- +# +# ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory +# +# # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +# list_ref_features <- paste0('#!/bin/bash +# # AWS S3 Bucket and Directory information +# S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '" +# +# # Regular expression pattern to match object keys +# PATTERN="reference_features.gpkg" +# +# S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") +# +# echo "$S3_OBJECTS"' +# ) +# +# # ---- Get a list of reference features geopackages geopackages ---- +# # Run the script to get a list of the nextgen geopackages that matched the regular expression above +# ref_features <- system(list_ref_features, intern = TRUE) +# +# ## Download reference features geopackages and save them to a local directory +# # Parse the selected S3 objects keys and copy them to the destination directory +# for (key in ref_features) { +# # paste0(REF_FEATURES_DIR, "gpkg/") +# copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key) +# +# message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key)) +# system(copy_cmd) +# +# message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!") +# message("------------------") +# } +# +# # --------------------------------------------------------------------------- +# # ---- Get ML outputs data from S3 bucket ---- +# # --------------------------------------------------------------------------- +# +# ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI))) +# +# message("Copying S3 object:\n", ML_OUTPUTS_S3_URI) +# system(ml_copy_cmd) +# +# message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!") +# message("------------------") +# diff --git a/runners/cs_runner/download_ref_features.R b/runners/cs_runner/download_ref_features.R new file mode 100644 index 0000000..fc10e80 --- /dev/null +++ b/runners/cs_runner/download_ref_features.R @@ -0,0 +1,35 @@ +# download_ref_features.R +# This script pulls the reference features geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" + +# load config variables +source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- List/Get reference features from S3 bucket ---- +# --------------------------------------------------------------------------- + +# list objects in S3 bucket, and regular expression match to reference_features.gpkg pattern +list_ref_features <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI, '" + + # Regular expression pattern to match object keys + PATTERN="reference_features.gpkg" + + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# ---- Get a list of reference features geopackages ---- +# Run the script to get a list of the reference features geopackages that matched the regular expression above +ref_features <- system(list_ref_features, intern = TRUE) + +# Parse the selected S3 objects keys and copy them to the destination directory +for (key in ref_features) { + copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key) + message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key)) + system(copy_cmd) + message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!") + message("------------------") +} diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R new file mode 100644 index 0000000..9dda641 --- /dev/null +++ b/runners/cs_runner/new_domain.R @@ -0,0 +1,293 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# # # # load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) +# install.packages("devtools") + +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") + +# paths to NEW DOMAIN datasets +# NEW_DOMAIN_FILES <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE) + +# --------------------------------------------------------------------- +# --- Read in flowlines +# --------------------------------------------------------------------- +# read in nextgen data +flines <- sf::read_sf(NEW_DOMAIN_FLOWLINES_PATH, layer = "flowpaths") + +VPUS <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines)) +vpu_flines_int <- sf::st_intersects(VPUS, flines) +vpu_aoi <- VPUS[lengths(vpu_flines_int) > 0, ] + +# VPU IDs of interest +VPU_IDS <- vpu_aoi$VPUID + +# sf::write_sf(aoi, "all_diffusive_combined_vpus.gpkg") +# VPUS[lengths(vpu_fl_int) > 0, ] %>% mapview::mapview() +# nhdplusTools::vpu_boundaries + +# flines %>% +# dplyr::filter(order >= 3) %>% +# # flines %>% +# sf::st_transform(4326) %>% +# mapview::mapview() +# # ggplot2::ggplot() + +# # ggplot2::geom_sf() +# +# +# flines %>% +# sf::st_transform(4326) %>% +# sf::st_bbox() %>% +# sf::st_as_sfc() %>% +# sf::st_as_sf() %>% +# mapview::mapview() + +# --------------------------------------------------------------------- +# --- Add bankful width estimate +# --------------------------------------------------------------------- + +# calculate bankfull width +flines <- + flines %>% + sf::st_transform(26904) %>% + dplyr::mutate( + bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) + ) %>% + dplyr::select( + hy_id = id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + mainstem, + geometry = geom + ) + +# --------------------------------------------------------------------- +# --- Create transect lines +# --------------------------------------------------------------------- +system.time({ + +# create transect lines +transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + id = "hy_id", # Unique feature ID + cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data +) + +}) + +# add cs_source column and rename cs_widths to cs_lengthm +transects <- + transects %>% + dplyr::mutate( + cs_source = CS_SOURCE + ) + +# --------------------------------------------------------------------- +# --- Extract cross section points from DEM +# --------------------------------------------------------------------- +# dem <- terra::rast(NEW_DOMAIN_DEM_PATH) + +system.time({ + +# get cross section point elevations +cs_pts <- hydrofabric3D::cross_section_pts( + + cs = transects, + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = NEW_DOMAIN_DEM_PATH +) + +}) + + +# --------------------------------------------------------------------- +# --- Classify cross section points +# --------------------------------------------------------------------- +# STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points + +cs_pts <- +# cs_pts2 <- + cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% + hydrofabric3D::classify_points( + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +# cs_pts3 <- +# cs_pts %>% +# # dplyr::group_by(hy_id, cs_id) %>% +# # dplyr::filter(!any(is.na(Z))) %>% +# # dplyr::ungroup() %>% +# hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% +# hydrofabric3D::classify_points2( +# crosswalk_id = "hy_id", +# pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +# ) +# +# old_plot <- +# cs_pts2 %>% +# dplyr::slice(1:100) %>% +# hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4)+ +# ggplot2::labs(title = "OLD CLASSES") +# +# new_plot <- +# cs_pts3 %>% +# dplyr::slice(1:100) %>% +# hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4) + +# ggplot2::labs(title = "NEW CLASSES") +# +# library(patchwork) +# old_plot / new_plot +# }) + +ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Improve cross section points based on the bank validity & amount of relief in a cross section +# ---------------------------------------------------------------------------------------------------------------- + +# system.time({ +fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = NEW_DOMAIN_DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE +) +# }) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- + +out_transects <- match_transects_to_extended_cs_pts( + transect_lines = transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = "hy_id" + ) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- + +# fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +# out_transects <- hydrofabric3D:::renumber_cs_ids( +# df = dplyr::mutate(out_transects, pt_id = 1), +# crosswalk_id = "hy_id" +# ) %>% +# dplyr::select(-pt_id) + +fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +out_transects <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = "hy_id") + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 4: Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- + +# classify the cross section points +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + +# add Z_source column for source of elevation data +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + +ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + +message("Aligning banks and smoothing bottoms...") +fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts) + +ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + +message("Reclassifying cross section points...") +fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + +if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") +} else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") +} + +if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") +} else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") +} + +if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") +} else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") +} + +sf::write_sf( + out_transects, + paste0(NEW_DOMAIN_TRANSECTS_DIR, "/hi_transects.gpkg"), +) + +arrow::write_parquet( + fixed_pts, + paste0(NEW_DOMAIN_CS_PTS_DIR, "/hi_cs_pts.parquet"), +) + +# ---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- \ No newline at end of file diff --git a/runners/cs_runner/set_path_variables.R b/runners/cs_runner/set_path_variables.R new file mode 100644 index 0000000..b590024 --- /dev/null +++ b/runners/cs_runner/set_path_variables.R @@ -0,0 +1,10 @@ +# Generate the list of file paths for locally stored nextgen datasets: +# - NEXTGEN_FILES --> (list NEXTGEN_DIR files that were downloaded via download_nextgen.R) + + +# load config variables +source("runners/cs_runner/config_vars.R") + +# NOTE: SET VARIABLE FOR REST OF PROCESSING +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) diff --git a/runners/cs_runner/test_cs_pts.R b/runners/cs_runner/test_cs_pts.R new file mode 100644 index 0000000..7031bb6 --- /dev/null +++ b/runners/cs_runner/test_cs_pts.R @@ -0,0 +1,256 @@ +# Generate the flowlines layer for the final cross_sections_.gpkg for each VPU +source("runners/cs_runner/config.R") + +# # load libraries +library(hydrofabric3D) +library(dplyr) +library(sf) + +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) + +# paths to nextgen datasets +transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) +transect_files <- transect_files[!grepl("updated_", transect_files)] + +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) + +# reference features dataframe +ref_df <- data.frame( + vpu = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = REF_FEATURES +) + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = NEXTGEN_FILES, + y = transect_files, + base = BASE_DIR +) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) + +# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# Save parquet locally and upload to specified S3 bucket + +# output_path <- paste0(BASE_DIR, "/test_out/") + +for (i in 20:nrow(path_df)) { + + # i = 8 + + start <- Sys.time() + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) + + # model attributes file and full path + transect_file <- path_df$y[i] + transect_path <- paste0(TRANSECTS_DIR, transect_file) + + # model attributes file and full path + ref_file <- path_df$ref_file[i] + ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) + + # current VPU being processed + VPU <- path_df$vpu[i] + + start <- Sys.time() + + message("Creating VPU ", VPU, + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - transects: '", transect_file, "'", + "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) + + ################### + message("Reading in transects...\n > ", transect_file) + # read in transects data + transects <- sf::read_sf(transect_path) + + message("Reading in flowlines... \n > ", nextgen_file) + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + message("Reading in waterbodies... \n > ", ref_file) + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] + + rm(waterbodies) + gc() + + start_cs_pts <- Sys.time() + + # # ------------------------------------------------------------------------ + # # ------ TESTING DATA ------- + # # ------------------------------------------------------------------------ + # flines <- + # flines %>% + # dplyr::slice(1:3500) + # + # transects <- + # transects %>% + # dplyr::filter(hy_id %in% flines$id) + + # ------------------------------------------------------------------------ + + message("Extracting cross section points (", start_cs_pts, ")") + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 1: Extract cs points from DEM ---- + # ---------------------------------------------------------------------------------------------------------------- + # system.time({ + # tmp <- transects[1:10,] + # + # get cross section point elevations + cs_pts <- hydrofabric3D::cross_section_pts( + + cs = transects, + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH + ) + + # }) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- + # ---------------------------------------------------------------------------------------------------------------- + + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg") + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg") + + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg") + # cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() + # + # cs_pts %>% + # hydrofabric3D::drop_incomplete_cs_pts("hy_id") + + # system.time({ + + # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points + cs_pts <- + # cs_pts2 <- + cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% + hydrofabric3D::classify_points( + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + # }) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + + # output_path <- paste0(BASE_DIR, "/test_out/") + # ---------------------------------------------------------------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + # fixed_pts <- + cs_pts <- + cs_pts %>% + hydrofabric3D:::pts_to_XY() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + + ids_before_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + message("Aligning banks and smoothing bottoms...") + cs_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = cs_pts, crosswalk_id = "hy_id") + # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + + ids_after_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + message("Reclassifying cross section points...") + cs_pts <- hydrofabric3D::classify_points( + cs_pts = cs_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + # if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + # message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + # } else { + # message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + # } + # + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # # name of file and path to save transects gpkg too + out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") + out_path <- paste0(BASE_DIR, "/test_out/", out_file) + + message("Saving cross section points to:\n - filepath: '", out_path, "'") + + # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_.parquet) + arrow::write_parquet(cs_pts, out_path) + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + end <- Sys.time() + + message("Finished cross section point generation for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(cs_pts) + gc() + gc() +} diff --git a/runners/cs_runner/test_fix_cs_pts.R b/runners/cs_runner/test_fix_cs_pts.R new file mode 100644 index 0000000..cbfaae8 --- /dev/null +++ b/runners/cs_runner/test_fix_cs_pts.R @@ -0,0 +1,278 @@ +# Generate the flowlines layer for the final cross_sections_.gpkg for each VPU +source("runners/cs_runner/config.R") + +# # load libraries +library(hydrofabric3D) +library(dplyr) +library(sf) + +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) + +# paths to nextgen datasets +transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) +transect_files <- transect_files[!grepl("updated_", transect_files)] + +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) + +START_CS_PTS_DIR <- '/Users/anguswatters/Desktop/lynker-spatial/test_out' +start_cs_pts_paths <- list.files(START_CS_PTS_DIR, full.names = T) + +END_CS_PTS_DIR <- '/Users/anguswatters/Desktop/lynker-spatial/test_fix_out' + +# reference features dataframe +ref_df <- data.frame( + vpu = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = REF_FEATURES +) + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = NEXTGEN_FILES, + y = transect_files, + base = BASE_DIR +) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) +path_df$cs_pts_path <- start_cs_pts_paths + +# path_df + +# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# Save parquet locally and upload to specified S3 bucket + +# output_path <- paste0(BASE_DIR, "/test_out/") + +for (i in 15:nrow(path_df)) { + # i = 8 + + start <- Sys.time() + + start_cs_pts_path <- path_df$cs_pts_path[i] + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) + + # model attributes file and full path + transect_file <- path_df$y[i] + transect_path <- paste0(TRANSECTS_DIR, transect_file) + + # model attributes file and full path + ref_file <- path_df$ref_file[i] + ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) + + # current VPU being processed + VPU <- path_df$vpu[i] + + start <- Sys.time() + + message("Creating VPU ", VPU, + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - start cs pts: '", basename(start_cs_pts_path), "'", + "'\n - transects: '", transect_file, "'", + "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) + + ################### + message("Reading in starting CS points...\n > ", basename(start_cs_pts_path)) + cs_pts <- arrow::read_parquet(start_cs_pts_path) + + message("Reading in transects...\n > ", transect_file) + # read in transects data + transects <- sf::read_sf(transect_path) + + message("Reading in flowlines... \n > ", nextgen_file) + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + message("Reading in waterbodies... \n > ", ref_file) + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] + + rm(waterbodies) + gc() + + start_cs_pts <- Sys.time() + flines <- dplyr::rename(flines, hy_id = id) + + # test_ids <- + # flines %>% + # dplyr::slice(1:100) %>% + # .$hy_id %>% + # unique() + # flines <- + # flines %>% + # dplyr::filter( + # hy_id %in% test_ids + # ) + # transects <- + # transects %>% + # dplyr::filter( + # hy_id %in% test_ids + # ) + # cs_pts <- + # cs_pts %>% + # dplyr::filter( + # hy_id %in% test_ids + # ) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + # # ------------------------------------------------------------------------ + # # ------ TESTING DATA ------- + # # ------------------------------------------------------------------------ + # system.time({ + fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) + # }) + # fixed_pts + + # fixed_pts %>% + # # cs_pts %>% + # sf::st_drop_geometry() %>% + # # dplyr::left_join( + # # stream_order, + # # by = "hy_id" + # # ) %>% + # dplyr::select(dplyr::any_of("hy_id"), cs_id, + # # vpu, order, + # valid_banks, has_relief) %>% + # dplyr::group_by( + # dplyr::across(dplyr::any_of(c("hy_id", "cs_id"))) + # ) %>% + # dplyr::slice(1) %>% + # dplyr::ungroup() %>% + # # dplyr::group_by( + # # dplyr::across(dplyr::any_of(c("vpu", "order"))) + # # ) %>% + # dplyr::count(valid_banks, has_relief) + + + # transects %>% + # sf::st_drop_geometry() %>% + # dplyr::group_by(id, cs_id) + # fixed_pts2$is_extended %>% sum() + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + # ---------------------------------------------------------------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + fixed_pts <- + fixed_pts %>% + # dplyr::mutate( + # X = sf::st_coordinates(.)[,1], + # Y = sf::st_coordinates(.)[,2] + # ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + # # Drop point geometries, leaving just X, Y, Z values + # fixed_pts <- sf::st_drop_geometry(fixed_pts) + + # add Z_source column for source of elevation data + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + + ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Aligning banks and smoothing bottoms...") + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + + ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Reclassifying cross section points...") + fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + } else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + } + + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # # name of file and path to save transects gpkg too + out_file <- paste0("nextgen_fixed_", path_df$vpu[i], "_cross_sections.parquet") + out_path <- paste0(BASE_DIR, "/test_fix_out/", out_file) + + message("Saving cross section points to:\n - filepath: '", out_path, "'") + + # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_.parquet) + arrow::write_parquet(fixed_pts, out_path) + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + end <- Sys.time() + + message("Finished cross section point generation for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(cs_pts, fixed_pts) + gc() + gc() +} diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R new file mode 100644 index 0000000..47423c1 --- /dev/null +++ b/runners/cs_runner/utils.R @@ -0,0 +1,2547 @@ +# Create an empty file structure +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ +# ├── dem/ +# ├── vrt/ +# ├── tif/ +# ├── cs-extension-polygons/ +create_local_hydrofabric_base_dirs <- function(base_dir) { + + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + + # DEM dirs + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(dem_dir, "vrt") + dem_tif_dir <- file.path(dem_dir, "tif") + + # polygons for transect extensions + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + # create base directories + create_if_not_exists(base_dir) + create_if_not_exists(hydrofabric_dir) + + # DEM dirs + create_if_not_exists(dem_dir) + create_if_not_exists(dem_vrt_dir) + create_if_not_exists(dem_tif_dir) + + # extension polygons + create_if_not_exists(cs_extension_polygons_dir) + + + # Create FEMA folders + create_if_not_exists(fema_dir) + create_if_not_exists(fema_fgb_dir) + create_if_not_exists(fema_geojson_dir) + create_if_not_exists(fema_clean_dir) + create_if_not_exists(fema_gpkg_dir) + create_if_not_exists(fema_by_vpu_dir) + + for (path in fema_by_vpu_subdirs) { + create_if_not_exists(path) + } + +} + +get_vpu_ids <- function() { + VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', + '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') + # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + + return(VPU_IDS) + +} + +# # Base directory for local file storage +# BASE_DIR <- '/Volumes/T7SSD/lynker-spatial' +# base_dir <- BASE_DIR +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb") +# FEMA_GEOJSON_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson") +# FEMA_CLEAN_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean") +# FEMA_GPKG_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg") +# FEMA_BY_VPU_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU") +# +# +# VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', +# '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') +# # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID +# +# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ") +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# Create an empty file structure for a new version within a specified base_dir +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ + # ├── version_number/ + # ├── network/ + # ├── transects/ + # ├── cross-sections/ + # ├── dem/ + # ├── dem-ml/ + # ├── dem-coastal-bathy/ + # ├── dem-points/ +create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { + # version = "v3.0" + # base_dir <- BASE_DIR + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + version_base_dir <- paste0(hydrofabric_dir, "/", version) + + # polygons for transect extensions + ml_dir <- paste0(version_base_dir, "/ml") + + # reference features + ref_features_dir <- paste0(version_base_dir, "/reference-features") + + # conus network gpkg + network_dir <- paste0(version_base_dir, "/network") + + # transects + transects_dir <- paste0(version_base_dir, "/transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") + + # cross sections dirs + cross_sections_dir <- paste0(version_base_dir, "/cross-sections") + cross_sections_dem_dir <- paste0(cross_sections_dir, "/dem") + cross_sections_ml_dir <- paste0(cross_sections_dir, "/dem-ml") + cross_sections_coastal_bathy_dir <- paste0(cross_sections_dir, "/dem-coastal-bathy") + cross_sections_dem_pts_dir <- paste0(cross_sections_dir, "/dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") + + + if(with_output) { + output_dir <- paste0(version_base_dir, "/outputs") + } + + # create version BASE dir + create_if_not_exists(version_base_dir) + + # CONUS dir + create_if_not_exists(network_dir) + + # ML data + create_if_not_exists(ml_dir) + + # reference features data + create_if_not_exists(ref_features_dir) + + # transects + create_if_not_exists(transects_dir) + create_if_not_exists(transects_base_dir) + create_if_not_exists(transects_fema_extended_dir) + create_if_not_exists(transects_cs_extended_dir) + create_if_not_exists(transects_output_dir) + + # CS pts + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cross_sections_dem_dir) + create_if_not_exists(cross_sections_ml_dir) + create_if_not_exists(cross_sections_coastal_bathy_dir) + create_if_not_exists(cross_sections_dem_pts_dir) + create_if_not_exists(cross_sections_output_dir) + + + if(with_output) { + create_if_not_exists(output_dir) + } + +} + +create_if_not_exists <- function(dir_path) { + if (!dir.exists(dir_path)) { + dir.create(dir_path, recursive = TRUE) + message("Created directory: '", dir_path, "'\n") + } +} + +# get a list of top level directories for main directory +get_base_dir_paths <- function(base_dir) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(base_dir, "dem", "vrt") + dem_tif_dir <- file.path(base_dir, "dem", "tif") + + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + return( + list( + hydrofabric_dir = hydrofabric_dir, + dem_dir = dem_dir, + dem_vrt_dir = dem_vrt_dir, + dem_tif_dir = dem_tif_dir, + cs_extension_polygons_dir = cs_extension_polygons_dir, + fema_dir = fema_dir, + fema_fgb_dir = fema_fgb_dir, + fema_geojson_dir = fema_geojson_dir, + fema_clean_dir = fema_clean_dir, + fema_gpkg_dir = fema_gpkg_dir, + fema_by_vpu_dir = fema_by_vpu_dir, + fema_by_vpu_subdirs = fema_by_vpu_subdirs + ) + ) +} + +# get list of a specific directories in a version directory +get_version_base_dir_paths <- function(base_dir, version) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + version_base_dir <- file.path(hydrofabric_dir, version) + + # polygons for transect extensions + ml_dir <- file.path(version_base_dir, "ml") + + # reference features + ref_features_dir <- file.path(version_base_dir, "reference-features") + + # conus network gpkg + network_dir <- file.path(version_base_dir, "network") + + # transects + transects_dir <- file.path(version_base_dir, "transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") + + # cross sections dirs + cross_sections_dir <- file.path(version_base_dir, "cross-sections") + cross_sections_dem_dir <- file.path(cross_sections_dir, "dem") + cross_sections_ml_dir <- file.path(cross_sections_dir, "dem-ml") + cross_sections_coastal_bathy_dir <- file.path(cross_sections_dir, "dem-coastal-bathy") + cross_sections_dem_pts_dir <- file.path(cross_sections_dir, "dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") + + return( + list( + hydrofabric_dir = hydrofabric_dir, + version_base_dir = version_base_dir, + + ref_features_dir = ref_features_dir, + + network_dir = network_dir, + + ml_dir = ml_dir, + + transects_dir = transects_dir, + transects_base_dir = transects_base_dir, + transects_fema_extended_dir = transects_fema_extended_dir, + transects_cs_extended_dir = transects_cs_extended_dir, + transects_output_dir = transects_output_dir, + + cross_sections_dir = cross_sections_dir, + cross_sections_dem_dir = cross_sections_dem_dir, + cross_sections_dem_pts_dir = cross_sections_dem_pts_dir, + cross_sections_ml_dir = cross_sections_ml_dir, + cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir, + cross_sections_output_dir = cross_sections_output_dir + + ) + ) +} + +list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { + + profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else "" + + if (is.null(pattern) || pattern == "") { + grep_command <- "" # no filtering if empty or NULL + } else { + grep_command <- paste0(" | grep -E \"", pattern, "\"") # grep if a pattern is given + } + + cmd <- paste0( + '#!/bin/bash\n', + 'S3_BUCKET="', s3_bucket, '"\n', + 'PATTERN="', pattern, '"\n', + 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n', + 'echo "$S3_OBJECTS"' + ) + # cmd <- paste0( + # '#!/bin/bash\n', + # 'S3_BUCKET="', s3_bucket, '"\n', + # 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n', + # 'echo "$S3_OBJECTS"' + # ) + ls_output <- system(cmd, intern = TRUE) + return(ls_output) +} + +download_tiles <- function(tile_paths, output_dir) { + # output_dir <- DEM_TIF_DIR + # tile_paths + # error_tiles <- + tif_save_paths <- paste0(output_dir, "/", basename(tile_paths)) + + error_tiles <- data.frame( + tile = basename(tile_paths), + status = TRUE + ) + + for (i in seq_along(tile_paths)) { + # i = 1 + tile_path <- tile_paths[i] + tif_save_path <- tif_save_paths[i] + + message("[", i, "]", + "\n > Tile: ", basename(tile_path), + "\n > Output path: ", tif_save_path + ) + + download_tif_cmd <- paste0("curl -o ", tif_save_path, " ", tile_path) + + tryCatch({ + + tif_download_output <- system(download_tif_cmd, intern = TRUE) + message(" > Succesfully downloaded tile: ", basename(tile_path)) + + }, error = function(e) { + + message("Error downloading tile: ", basename(tile_path)) + message("ERROW below: \n ", e) + + error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE + + }) + + } + + return(error_tiles) + +} + +# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to +# make sure they are aligned and in the same order +# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# base is a character vector of the base directory of the files. Defaults to NULL +# Returns a dataframe with VPU, x, and y columns +align_files_by_vpu <- function( + x, + y, + base = NULL +) { + + # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period + regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*" + + # path dataframe for X filepaths + x_paths <- data.frame(x = x) + + # path dataframe for Y filepaths + y_paths <- data.frame(y = y) + + # generate VPU IDs based on file path regular expression matching with "regex_pattern" above + x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x) + y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y) + + # match paths based on VPU column + matched_paths <- dplyr::left_join( + x_paths, + y_paths, + by = "vpu" + ) + + # reorder columns + matched_paths <- dplyr::relocate(matched_paths, vpu, x, y) + + if(!is.null(base)) { + matched_paths$base_dir <- base + } + + return(matched_paths) + +} + +# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# flowlines: flowlines linestring sf object +# trans: transects linestring sf object +# waterbodies: waterbodies polygon sf object +# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies +### Returns a list of length 2 with updated "flowlines" and "transects" sf objects +wb_intersects <- function(flowlines, trans, waterbodies) { + + ######## ######## ######## ######## ######## ######## + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies (v2) ...") + wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos) # (NEW METHOD) + # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)]) # (OLD METHOD) + + # sum(lengths(wb_trans_index) == 0) + # length(wb_trans_index) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[lengths(wb_trans_index) == 0, ] # (NEW METHOD) + # trans_keep <- trans_check[!wb_trans_index, ] # (OLD METHOD) + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects + ) + ) + + # # within the transects lines that are on a flowline that crosses a waterbody, + # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + # trans_keep <- trans_check[!trans_wb_index, ] + # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + # + # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + # + # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # # have transects that are NOT in the waterbody + # to_keep <- dplyr::bind_rows(to_keep, to_check) + # + # # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # # that were determined to be valid (are being kept) + # check_ids <- unique(trans_check$tmp_id) + # keep_ids <- unique(trans_keep$tmp_id) + # + # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # valid_flowlines <- flowlines$id %in% to_keep$id + # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + # + # # return alist of updated flowlines and transects + # return( + # list( + # "valid_flowlines" = valid_flowlines, + # "valid_transects" = valid_transects + # ) + # ) +} + +add_intersects_ids <- function(x, y, id_col) { + # make sure the crs are tjhe same + y <- sf::st_transform(y, sf::st_crs(x)) + + # Perform the intersection + intersections <- sf::st_intersects(x, y) + + # add the intersected values to the first dataframe + x[[id_col]] <- unlist(lapply(intersections, function(idx) { + if (length(idx) > 0) { + paste0(unlist(y[[id_col]][idx]), collapse = ", ") + } else { + NA + } + })) + + return(x) +} + +unnest_ids <- function(ids) { + return( + unique(unlist(strsplit(unique(ids), ", "))) + ) +} + +#' Get the polygons that interesect with any of the linestring geometries +#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes +#' @param polygons polygon sf object. Default is NULL +#' @param lines linestring sf object. Default is NULL. +#' +#' @return sf dataframe of polygons that intersect with the linestrings +polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) { + + if (is.null(polygons)) { + stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries") + } + + if (is.null(lines)) { + stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries") + } + + # Convert the SF geometries to geos geometries + polygons_geos <- geos::as_geos_geometry(polygons) + lines_geos <- geos::as_geos_geometry(lines) + + # create an index between the polygons and linestrings + lines_index <- geos::geos_intersects_matrix(polygons_geos, lines_geos) + + # get the polygons that have atleast 1 intersection with the 'lines' + polygons_with_lines <- polygons[lengths(lines_index) != 0, ] + + return(polygons_with_lines) +} + +add_predicate_group_id <- function(polys, predicate) { + # GROUP BY SPATIAL PREDICATES + # ----------------------------------------- + # predicate = sf::st_touches + # polys <- sf_df + # ----------------------------------------- + + + relations <- predicate(polys) + + relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) }) + + group_ids_map <- fastmap::fastmap() + ids_to_groups <- fastmap::fastmap() + + group_id <- 0 + + for (i in seq_along(relations)) { + + predicate_ids <- relations[i][[1]] + + # message("(", i, ") - ", predicate_ids) + # message("Start Group ID: ", group_id) + + id_group_check <- ids_to_groups$has(predicate_ids) + + if(any(id_group_check)) { + + known_groups <- ids_to_groups$mget(predicate_ids) + known_group <- known_groups[unname(sapply(known_groups , function(kg) { + !is.null(kg) + }))][[1]] + + # message("IDs part of past group ID > '", known_group, "'") + + past_group_ids <- group_ids_map$get(known_group)[[1]] + updated_group_ids <- as.character( + sort(as.numeric(unique(c(past_group_ids, predicate_ids)))) + ) + + group_ids_map$set(known_group, list(updated_group_ids)) + + new_ids <- predicate_ids[!predicate_ids %in% past_group_ids] + + # message("Adding ", new_ids, " to seen set...") + + # add any newly added IDs to the seen map + for (seen_id in new_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + + } else { + # get a new group ID number + group_id <- group_id + 1 + # message("IDs form NEW group > '", group_id, "'") + + # create a new key in the map with the predicate IDs list as the value + group_ids_map$set(as.character(group_id), list(predicate_ids)) + + # message("Adding ", predicate_ids, " to seen set...") + + # add each predicate ID to the map storing the seen indexes and their respecitve group IDs + for (seen_id in predicate_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + } + # message("End group ID: ", group_id, "\n") + } + + group_ids <- group_ids_map$as_list() + + grouping_df <- lapply(seq_along(group_ids), function(i) { + # i = 2 + grouping <- group_ids[i] + group_id <- names(grouping) + indices <- grouping[[1]][[1]] + + data.frame( + index = as.numeric(indices), + group_id = rep(group_id, length(indices)) + ) + + }) %>% + dplyr::bind_rows() %>% + dplyr::arrange(i) + + # count up the number of IDs for each group, well use this to determine which group + # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members) + group_id_counts <- + grouping_df %>% + dplyr::group_by(group_id) %>% + dplyr::count() %>% + # dplyr::arrange(-n) %>% + dplyr::ungroup() + + # select the IDs with the most other members + grouping_df <- + grouping_df %>% + dplyr::left_join( + group_id_counts, + by = 'group_id' + ) %>% + dplyr::group_by(index) %>% + dplyr::slice_max(n, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(-n) %>% + dplyr::arrange(-index) + + polys$group_id <- grouping_df$group_id + + return(polys) + +} + +# TODO: DElete these NEW DOMAIN functions... +# Create an empty file structure +# base_dir: character, top level directory path +# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist) + +# Directory tree: +# base_dir/ +# └── domain_dirname/ +# ├── flowlines/ +# ├── dem/ +# ├── transects/ +# ├── cross_sections/ +# └── cs_pts/ +create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + + if(with_output) { + output_dir <- paste0(domain_dir, "/outputs") + } + + # create directories + create_if_not_exists(domain_dir) + create_if_not_exists(flowlines_dir) + create_if_not_exists(domain_subset_dir) + create_if_not_exists(dem_dir) + create_if_not_exists(transects_dir) + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cs_pts_dir) + create_if_not_exists(vpu_subsets_dir) + + if(with_output) { + create_if_not_exists(output_dir) + } + +} + +# get path strings for a domain dir (based of a base dir and domain dirname) +# NOTE: this does NOT guarentee that these folders exist, +# NOTE: it just gets the paths if they were created by create_new_domain_dirs() +get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + output_dir <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA) + + + # named list of file paths + return( + list( + base_dir = base_dir, + domain_dir = domain_dir, + flowlines_dir = flowlines_dir, + domain_subset_dir = domain_subset_dir, + dem_dir = dem_dir, + transects_dir = transects_dir, + cross_sections_dir = cross_sections_dir, + cs_pts_dir = cs_pts_dir, + vpu_subsets_dir = vpu_subsets_dir, + output_dir = output_dir + ) + ) + +} + +download_3dep_vrt <- function(base_dir) { + + ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/ + ### paste w/ `datapasta::vector_paste_vertical()` + ### Some manual cleaning of header and footer mess... + ### Reason? Un-scrapable page, and no index.gpkg... + + t <- c( + "0 n06e162/", + "0 n06e163/", + "0 n07e134/", + "0 n07e151/", + "0 n07e152/", + "0 n07e158/", + "0 n08e134/", + "0 n08e151/", + "0 n08e152/", + "0 n08e158/", + "0 n09e134/", + "0 n10e138/", + "0 n14e144/", + "0 n15e145/", + "0 n16e145/", + "0 n18w065/", + "0 n18w066/", + "0 n18w067/", + "0 n18w068/", + "0 n19w065/", + "0 n19w066/", + "0 n19w067/", + "0 n19w068/", + "0 n19w156/", + "0 n20w155/", + "0 n20w156/", + "0 n20w157/", + "0 n21w156/", + "0 n21w157/", + "0 n21w158/", + "0 n22w157/", + "0 n22w158/", + "0 n22w159/", + "0 n22w160/", + "0 n22w161/", + "0 n23w160/", + "0 n23w161/", + "0 n25w081/", + "0 n25w082/", + "0 n25w083/", + "0 n26w081/", + "0 n26w082/", + "0 n26w098/", + "0 n26w099/", + "0 n27w081/", + "0 n27w082/", + "0 n27w083/", + "0 n27w098/", + "0 n27w099/", + "0 n27w100/", + "0 n28w081/", + "0 n28w082/", + "0 n28w083/", + "0 n28w097/", + "0 n28w098/", + "0 n28w099/", + "0 n28w100/", + "0 n28w101/", + "0 n29w081/", + "0 n29w082/", + "0 n29w083/", + "0 n29w090/", + "0 n29w096/", + "0 n29w097/", + "0 n29w098/", + "0 n29w099/", + "0 n29w100/", + "0 n29w101/", + "0 n29w104/", + "0 n30w081/", + "0 n30w082/", + "0 n30w083/", + "0 n30w084/", + "0 n30w085/", + "0 n30w086/", + "0 n30w089/", + "0 n30w090/", + "0 n30w091/", + "0 n30w092/", + "0 n30w093/", + "0 n30w094/", + "0 n30w095/", + "0 n30w096/", + "0 n30w097/", + "0 n30w098/", + "0 n30w099/", + "0 n30w100/", + "0 n30w101/", + "0 n30w102/", + "0 n30w103/", + "0 n30w104/", + "0 n30w105/", + "0 n31w082/", + "0 n31w083/", + "0 n31w084/", + "0 n31w085/", + "0 n31w086/", + "0 n31w087/", + "0 n31w088/", + "0 n31w089/", + "0 n31w090/", + "0 n31w091/", + "0 n31w092/", + "0 n31w093/", + "0 n31w094/", + "0 n31w095/", + "0 n31w096/", + "0 n31w097/", + "0 n31w098/", + "0 n31w099/", + "0 n31w100/", + "0 n31w101/", + "0 n31w102/", + "0 n31w103/", + "0 n31w104/", + "0 n31w105/", + "0 n31w106/", + "0 n31w107/", + "0 n32w081/", + "0 n32w082/", + "0 n32w083/", + "0 n32w084/", + "0 n32w085/", + "0 n32w086/", + "0 n32w087/", + "0 n32w088/", + "0 n32w089/", + "0 n32w090/", + "0 n32w091/", + "0 n32w092/", + "0 n32w093/", + "0 n32w094/", + "0 n32w095/", + "0 n32w096/", + "0 n32w097/", + "0 n32w098/", + "0 n32w099/", + "0 n32w100/", + "0 n32w101/", + "0 n32w102/", + "0 n32w103/", + "0 n32w104/", + "0 n32w105/", + "0 n32w106/", + "0 n32w107/", + "0 n32w108/", + "0 n32w109/", + "0 n32w110/", + "0 n32w111/", + "0 n32w112/", + "0 n32w113/", + "0 n32w114/", + "0 n33w080/", + "0 n33w081/", + "0 n33w082/", + "0 n33w083/", + "0 n33w084/", + "0 n33w085/", + "0 n33w086/", + "0 n33w087/", + "0 n33w088/", + "0 n33w089/", + "0 n33w090/", + "0 n33w091/", + "0 n33w092/", + "0 n33w093/", + "0 n33w094/", + "0 n33w095/", + "0 n33w096/", + "0 n33w097/", + "0 n33w098/", + "0 n33w099/", + "0 n33w100/", + "0 n33w101/", + "0 n33w102/", + "0 n33w103/", + "0 n33w104/", + "0 n33w105/", + "0 n33w106/", + "0 n33w107/", + "0 n33w108/", + "0 n33w109/", + "0 n33w110/", + "0 n33w111/", + "0 n33w112/", + "0 n33w113/", + "0 n33w114/", + "0 n33w115/", + "0 n33w116/", + "0 n33w117/", + "0 n33w118/", + "0 n33w119/", + "0 n34w078/", + "0 n34w079/", + "0 n34w080/", + "0 n34w081/", + "0 n34w082/", + "0 n34w083/", + "0 n34w084/", + "0 n34w085/", + "0 n34w086/", + "0 n34w087/", + "0 n34w088/", + "0 n34w089/", + "0 n34w090/", + "0 n34w091/", + "0 n34w092/", + "0 n34w093/", + "0 n34w094/", + "0 n34w095/", + "0 n34w096/", + "0 n34w097/", + "0 n34w098/", + "0 n34w099/", + "0 n34w100/", + "0 n34w101/", + "0 n34w102/", + "0 n34w103/", + "0 n34w104/", + "0 n34w105/", + "0 n34w106/", + "0 n34w107/", + "0 n34w108/", + "0 n34w109/", + "0 n34w110/", + "0 n34w111/", + "0 n34w112/", + "0 n34w113/", + "0 n34w114/", + "0 n34w115/", + "0 n34w116/", + "0 n34w117/", + "0 n34w118/", + "0 n34w119/", + "0 n34w120/", + "0 n34w121/", + "0 n35w076/", + "0 n35w077/", + "0 n35w078/", + "0 n35w079/", + "0 n35w080/", + "0 n35w081/", + "0 n35w082/", + "0 n35w083/", + "0 n35w084/", + "0 n35w085/", + "0 n35w086/", + "0 n35w087/", + "0 n35w088/", + "0 n35w089/", + "0 n35w090/", + "0 n35w091/", + "0 n35w092/", + "0 n35w093/", + "0 n35w094/", + "0 n35w095/", + "0 n35w096/", + "0 n35w097/", + "0 n35w098/", + "0 n35w099/", + "0 n35w100/", + "0 n35w101/", + "0 n35w102/", + "0 n35w103/", + "0 n35w104/", + "0 n35w105/", + "0 n35w106/", + "0 n35w107/", + "0 n35w108/", + "0 n35w109/", + "0 n35w110/", + "0 n35w111/", + "0 n35w112/", + "0 n35w113/", + "0 n35w114/", + "0 n35w115/", + "0 n35w116/", + "0 n35w117/", + "0 n35w118/", + "0 n35w119/", + "0 n35w120/", + "0 n35w121/", + "0 n36w076/", + "0 n36w077/", + "0 n36w078/", + "0 n36w079/", + "0 n36w080/", + "0 n36w081/", + "0 n36w082/", + "0 n36w083/", + "0 n36w084/", + "0 n36w085/", + "0 n36w086/", + "0 n36w087/", + "0 n36w088/", + "0 n36w089/", + "0 n36w090/", + "0 n36w091/", + "0 n36w092/", + "0 n36w093/", + "0 n36w094/", + "0 n36w095/", + "0 n36w096/", + "0 n36w097/", + "0 n36w098/", + "0 n36w099/", + "0 n36w100/", + "0 n36w101/", + "0 n36w102/", + "0 n36w103/", + "0 n36w104/", + "0 n36w105/", + "0 n36w106/", + "0 n36w107/", + "0 n36w108/", + "0 n36w109/", + "0 n36w110/", + "0 n36w111/", + "0 n36w112/", + "0 n36w113/", + "0 n36w114/", + "0 n36w115/", + "0 n36w116/", + "0 n36w117/", + "0 n36w118/", + "0 n36w119/", + "0 n36w120/", + "0 n36w121/", + "0 n36w122/", + "0 n37w076/", + "0 n37w077/", + "0 n37w078/", + "0 n37w079/", + "0 n37w080/", + "0 n37w081/", + "0 n37w082/", + "0 n37w083/", + "0 n37w084/", + "0 n37w085/", + "0 n37w086/", + "0 n37w087/", + "0 n37w088/", + "0 n37w089/", + "0 n37w090/", + "0 n37w091/", + "0 n37w092/", + "0 n37w093/", + "0 n37w094/", + "0 n37w095/", + "0 n37w096/", + "0 n37w097/", + "0 n37w098/", + "0 n37w099/", + "0 n37w100/", + "0 n37w101/", + "0 n37w102/", + "0 n37w103/", + "0 n37w104/", + "0 n37w105/", + "0 n37w106/", + "0 n37w107/", + "0 n37w108/", + "0 n37w109/", + "0 n37w110/", + "0 n37w111/", + "0 n37w112/", + "0 n37w113/", + "0 n37w114/", + "0 n37w115/", + "0 n37w116/", + "0 n37w117/", + "0 n37w118/", + "0 n37w119/", + "0 n37w120/", + "0 n37w121/", + "0 n37w122/", + "0 n37w123/", + "0 n38w076/", + "0 n38w077/", + "0 n38w078/", + "0 n38w079/", + "0 n38w080/", + "0 n38w081/", + "0 n38w082/", + "0 n38w083/", + "0 n38w084/", + "0 n38w085/", + "0 n38w086/", + "0 n38w087/", + "0 n38w088/", + "0 n38w089/", + "0 n38w090/", + "0 n38w091/", + "0 n38w092/", + "0 n38w093/", + "0 n38w094/", + "0 n38w095/", + "0 n38w096/", + "0 n38w097/", + "0 n38w098/", + "0 n38w099/", + "0 n38w100/", + "0 n38w101/", + "0 n38w102/", + "0 n38w103/", + "0 n38w104/", + "0 n38w105/", + "0 n38w106/", + "0 n38w107/", + "0 n38w108/", + "0 n38w109/", + "0 n38w110/", + "0 n38w111/", + "0 n38w112/", + "0 n38w113/", + "0 n38w114/", + "0 n38w115/", + "0 n38w116/", + "0 n38w117/", + "0 n38w118/", + "0 n38w119/", + "0 n38w120/", + "0 n38w121/", + "0 n38w122/", + "0 n38w123/", + "0 n38w124/", + "0 n39w075/", + "0 n39w076/", + "0 n39w077/", + "0 n39w078/", + "0 n39w079/", + "0 n39w080/", + "0 n39w081/", + "0 n39w082/", + "0 n39w083/", + "0 n39w084/", + "0 n39w085/", + "0 n39w086/", + "0 n39w087/", + "0 n39w088/", + "0 n39w089/", + "0 n39w090/", + "0 n39w091/", + "0 n39w092/", + "0 n39w093/", + "0 n39w094/", + "0 n39w095/", + "0 n39w096/", + "0 n39w097/", + "0 n39w098/", + "0 n39w099/", + "0 n39w100/", + "0 n39w101/", + "0 n39w102/", + "0 n39w103/", + "0 n39w104/", + "0 n39w105/", + "0 n39w106/", + "0 n39w107/", + "0 n39w108/", + "0 n39w109/", + "0 n39w110/", + "0 n39w111/", + "0 n39w112/", + "0 n39w113/", + "0 n39w114/", + "0 n39w115/", + "0 n39w116/", + "0 n39w117/", + "0 n39w118/", + "0 n39w119/", + "0 n39w120/", + "0 n39w121/", + "0 n39w122/", + "0 n39w123/", + "0 n39w124/", + "0 n40w075/", + "0 n40w076/", + "0 n40w077/", + "0 n40w078/", + "0 n40w079/", + "0 n40w080/", + "0 n40w081/", + "0 n40w082/", + "0 n40w083/", + "0 n40w084/", + "0 n40w085/", + "0 n40w086/", + "0 n40w087/", + "0 n40w088/", + "0 n40w089/", + "0 n40w090/", + "0 n40w091/", + "0 n40w092/", + "0 n40w093/", + "0 n40w094/", + "0 n40w095/", + "0 n40w096/", + "0 n40w097/", + "0 n40w098/", + "0 n40w099/", + "0 n40w100/", + "0 n40w101/", + "0 n40w102/", + "0 n40w103/", + "0 n40w104/", + "0 n40w105/", + "0 n40w106/", + "0 n40w107/", + "0 n40w108/", + "0 n40w109/", + "0 n40w110/", + "0 n40w111/", + "0 n40w112/", + "0 n40w113/", + "0 n40w114/", + "0 n40w115/", + "0 n40w116/", + "0 n40w117/", + "0 n40w118/", + "0 n40w119/", + "0 n40w120/", + "0 n40w121/", + "0 n40w122/", + "0 n40w123/", + "0 n40w124/", + "0 n40w125/", + "0 n41w073/", + "0 n41w074/", + "0 n41w075/", + "0 n41w076/", + "0 n41w077/", + "0 n41w078/", + "0 n41w079/", + "0 n41w080/", + "0 n41w081/", + "0 n41w082/", + "0 n41w083/", + "0 n41w084/", + "0 n41w085/", + "0 n41w086/", + "0 n41w087/", + "0 n41w088/", + "0 n41w089/", + "0 n41w090/", + "0 n41w091/", + "0 n41w092/", + "0 n41w093/", + "0 n41w094/", + "0 n41w095/", + "0 n41w096/", + "0 n41w097/", + "0 n41w098/", + "0 n41w099/", + "0 n41w100/", + "0 n41w101/", + "0 n41w102/", + "0 n41w103/", + "0 n41w104/", + "0 n41w105/", + "0 n41w106/", + "0 n41w107/", + "0 n41w108/", + "0 n41w109/", + "0 n41w110/", + "0 n41w111/", + "0 n41w112/", + "0 n41w113/", + "0 n41w114/", + "0 n41w115/", + "0 n41w116/", + "0 n41w117/", + "0 n41w118/", + "0 n41w119/", + "0 n41w120/", + "0 n41w121/", + "0 n41w122/", + "0 n41w123/", + "0 n41w124/", + "0 n41w125/", + "0 n42w070/", + "0 n42w071/", + "0 n42w072/", + "0 n42w073/", + "0 n42w074/", + "0 n42w075/", + "0 n42w076/", + "0 n42w077/", + "0 n42w078/", + "0 n42w079/", + "0 n42w080/", + "0 n42w081/", + "0 n42w082/", + "0 n42w083/", + "0 n42w084/", + "0 n42w085/", + "0 n42w086/", + "0 n42w087/", + "0 n42w088/", + "0 n42w089/", + "0 n42w090/", + "0 n42w091/", + "0 n42w092/", + "0 n42w093/", + "0 n42w094/", + "0 n42w095/", + "0 n42w096/", + "0 n42w097/", + "0 n42w098/", + "0 n42w099/", + "0 n42w100/", + "0 n42w101/", + "0 n42w102/", + "0 n42w103/", + "0 n42w104/", + "0 n42w105/", + "0 n42w106/", + "0 n42w107/", + "0 n42w108/", + "0 n42w109/", + "0 n42w110/", + "0 n42w111/", + "0 n42w112/", + "0 n42w113/", + "0 n42w114/", + "0 n42w115/", + "0 n42w116/", + "0 n42w117/", + "0 n42w118/", + "0 n42w119/", + "0 n42w120/", + "0 n42w121/", + "0 n42w122/", + "0 n42w123/", + "0 n42w124/", + "0 n42w125/", + "0 n43w071/", + "0 n43w072/", + "0 n43w073/", + "0 n43w074/", + "0 n43w075/", + "0 n43w076/", + "0 n43w077/", + "0 n43w078/", + "0 n43w079/", + "0 n43w080/", + "0 n43w081/", + "0 n43w082/", + "0 n43w083/", + "0 n43w084/", + "0 n43w085/", + "0 n43w086/", + "0 n43w087/", + "0 n43w088/", + "0 n43w089/", + "0 n43w090/", + "0 n43w091/", + "0 n43w092/", + "0 n43w093/", + "0 n43w094/", + "0 n43w095/", + "0 n43w096/", + "0 n43w097/", + "0 n43w098/", + "0 n43w099/", + "0 n43w100/", + "0 n43w101/", + "0 n43w102/", + "0 n43w103/", + "0 n43w104/", + "0 n43w105/", + "0 n43w106/", + "0 n43w107/", + "0 n43w108/", + "0 n43w109/", + "0 n43w110/", + "0 n43w111/", + "0 n43w112/", + "0 n43w113/", + "0 n43w114/", + "0 n43w115/", + "0 n43w116/", + "0 n43w117/", + "0 n43w118/", + "0 n43w119/", + "0 n43w120/", + "0 n43w121/", + "0 n43w122/", + "0 n43w123/", + "0 n43w124/", + "0 n43w125/", + "0 n44w069/", + "0 n44w070/", + "0 n44w071/", + "0 n44w072/", + "0 n44w073/", + "0 n44w074/", + "0 n44w075/", + "0 n44w076/", + "0 n44w077/", + "0 n44w078/", + "0 n44w079/", + "0 n44w080/", + "0 n44w081/", + "0 n44w083/", + "0 n44w084/", + "0 n44w085/", + "0 n44w086/", + "0 n44w087/", + "0 n44w088/", + "0 n44w089/", + "0 n44w090/", + "0 n44w091/", + "0 n44w092/", + "0 n44w093/", + "0 n44w094/", + "0 n44w095/", + "0 n44w096/", + "0 n44w097/", + "0 n44w098/", + "0 n44w099/", + "0 n44w100/", + "0 n44w101/", + "0 n44w102/", + "0 n44w103/", + "0 n44w104/", + "0 n44w105/", + "0 n44w106/", + "0 n44w107/", + "0 n44w108/", + "0 n44w109/", + "0 n44w110/", + "0 n44w111/", + "0 n44w112/", + "0 n44w113/", + "0 n44w114/", + "0 n44w115/", + "0 n44w116/", + "0 n44w117/", + "0 n44w118/", + "0 n44w119/", + "0 n44w120/", + "0 n44w121/", + "0 n44w122/", + "0 n44w123/", + "0 n44w124/", + "0 n44w125/", + "0 n45w067/", + "0 n45w068/", + "0 n45w069/", + "0 n45w070/", + "0 n45w071/", + "0 n45w072/", + "0 n45w073/", + "0 n45w074/", + "0 n45w075/", + "0 n45w076/", + "0 n45w077/", + "0 n45w083/", + "0 n45w084/", + "0 n45w085/", + "0 n45w086/", + "0 n45w087/", + "0 n45w088/", + "0 n45w089/", + "0 n45w090/", + "0 n45w091/", + "0 n45w092/", + "0 n45w093/", + "0 n45w094/", + "0 n45w095/", + "0 n45w096/", + "0 n45w097/", + "0 n45w098/", + "0 n45w099/", + "0 n45w100/", + "0 n45w101/", + "0 n45w102/", + "0 n45w103/", + "0 n45w104/", + "0 n45w105/", + "0 n45w106/", + "0 n45w107/", + "0 n45w108/", + "0 n45w109/", + "0 n45w110/", + "0 n45w111/", + "0 n45w112/", + "0 n45w113/", + "0 n45w114/", + "0 n45w115/", + "0 n45w116/", + "0 n45w117/", + "0 n45w118/", + "0 n45w119/", + "0 n45w120/", + "0 n45w121/", + "0 n45w122/", + "0 n45w123/", + "0 n45w124/", + "0 n45w125/", + "0 n46w068/", + "0 n46w069/", + "0 n46w070/", + "0 n46w071/", + "0 n46w072/", + "0 n46w073/", + "0 n46w074/", + "0 n46w075/", + "0 n46w084/", + "0 n46w085/", + "0 n46w086/", + "0 n46w087/", + "0 n46w088/", + "0 n46w089/", + "0 n46w090/", + "0 n46w091/", + "0 n46w092/", + "0 n46w093/", + "0 n46w094/", + "0 n46w095/", + "0 n46w096/", + "0 n46w097/", + "0 n46w098/", + "0 n46w099/", + "0 n46w100/", + "0 n46w101/", + "0 n46w102/", + "0 n46w103/", + "0 n46w104/", + "0 n46w105/", + "0 n46w106/", + "0 n46w107/", + "0 n46w108/", + "0 n46w109/", + "0 n46w110/", + "0 n46w111/", + "0 n46w112/", + "0 n46w113/", + "0 n46w114/", + "0 n46w115/", + "0 n46w116/", + "0 n46w117/", + "0 n46w118/", + "0 n46w119/", + "0 n46w120/", + "0 n46w121/", + "0 n46w122/", + "0 n46w123/", + "0 n46w124/", + "0 n46w125/", + "0 n47w068/", + "0 n47w069/", + "0 n47w070/", + "0 n47w071/", + "0 n47w084/", + "0 n47w085/", + "0 n47w086/", + "0 n47w087/", + "0 n47w088/", + "0 n47w089/", + "0 n47w090/", + "0 n47w091/", + "0 n47w092/", + "0 n47w093/", + "0 n47w094/", + "0 n47w095/", + "0 n47w096/", + "0 n47w097/", + "0 n47w098/", + "0 n47w099/", + "0 n47w100/", + "0 n47w101/", + "0 n47w102/", + "0 n47w103/", + "0 n47w104/", + "0 n47w105/", + "0 n47w106/", + "0 n47w107/", + "0 n47w108/", + "0 n47w109/", + "0 n47w110/", + "0 n47w111/", + "0 n47w112/", + "0 n47w113/", + "0 n47w114/", + "0 n47w115/", + "0 n47w116/", + "0 n47w117/", + "0 n47w118/", + "0 n47w119/", + "0 n47w120/", + "0 n47w121/", + "0 n47w122/", + "0 n47w123/", + "0 n47w124/", + "0 n47w125/", + "0 n48w068/", + "0 n48w069/", + "0 n48w070/", + "0 n48w087/", + "0 n48w088/", + "0 n48w089/", + "0 n48w090/", + "0 n48w091/", + "0 n48w092/", + "0 n48w093/", + "0 n48w094/", + "0 n48w095/", + "0 n48w096/", + "0 n48w097/", + "0 n48w098/", + "0 n48w099/", + "0 n48w100/", + "0 n48w101/", + "0 n48w102/", + "0 n48w103/", + "0 n48w104/", + "0 n48w105/", + "0 n48w106/", + "0 n48w107/", + "0 n48w108/", + "0 n48w109/", + "0 n48w110/", + "0 n48w111/", + "0 n48w112/", + "0 n48w113/", + "0 n48w114/", + "0 n48w115/", + "0 n48w116/", + "0 n48w117/", + "0 n48w118/", + "0 n48w119/", + "0 n48w120/", + "0 n48w121/", + "0 n48w122/", + "0 n48w123/", + "0 n48w124/", + "0 n48w125/", + "0 n49w089/", + "0 n49w090/", + "0 n49w091/", + "0 n49w092/", + "0 n49w093/", + "0 n49w094/", + "0 n49w095/", + "0 n49w096/", + "0 n49w097/", + "0 n49w098/", + "0 n49w099/", + "0 n49w100/", + "0 n49w101/", + "0 n49w102/", + "0 n49w103/", + "0 n49w104/", + "0 n49w105/", + "0 n49w106/", + "0 n49w107/", + "0 n49w108/", + "0 n49w109/", + "0 n49w110/", + "0 n49w111/", + "0 n49w112/", + "0 n49w113/", + "0 n49w114/", + "0 n49w115/", + "0 n49w116/", + "0 n49w117/", + "0 n49w118/", + "0 n49w119/", + "0 n49w120/", + "0 n49w121/", + "0 n49w122/", + "0 n49w123/", + "0 n49w124/", + "0 n49w125/", + "0 n50w095/", + "0 n50w096/", + "0 n50w097/", + "0 n50w098/", + "0 n50w099/", + "0 n50w100/", + "0 n50w101/", + "0 n50w107/", + "0 n50w108/", + "0 n50w122/", + "0 n50w123/", + "0 n50w124/", + "0 n52e177/", + "0 n52e178/", + "0 n52e179/", + "0 n52w174/", + "0 n52w176/", + "0 n52w177/", + "0 n52w178/", + "0 n52w179/", + "0 n52w180/", + "0 n53e172/", + "0 n53e173/", + "0 n53e174/", + "0 n53e175/", + "0 n53e177/", + "0 n53e178/", + "0 n53e179/", + "0 n53w169/", + "0 n53w170/", + "0 n53w171/", + "0 n53w172/", + "0 n53w173/", + "0 n53w174/", + "0 n53w175/", + "0 n53w176/", + "0 n53w177/", + "0 n54e172/", + "0 n54e173/", + "0 n54w167/", + "0 n54w168/", + "0 n54w169/", + "0 n54w170/", + "0 n55w131/", + "0 n55w132/", + "0 n55w133/", + "0 n55w134/", + "0 n55w160/", + "0 n55w161/", + "0 n55w162/", + "0 n55w163/", + "0 n55w164/", + "0 n55w165/", + "0 n55w166/", + "0 n55w167/", + "0 n56w130/", + "0 n56w131/", + "0 n56w132/", + "0 n56w133/", + "0 n56w134/", + "0 n56w135/", + "0 n56w156/", + "0 n56w157/", + "0 n56w159/", + "0 n56w160/", + "0 n56w161/", + "0 n56w162/", + "0 n56w163/", + "0 n56w164/", + "0 n57w131/", + "0 n57w132/", + "0 n57w133/", + "0 n57w134/", + "0 n57w135/", + "0 n57w136/", + "0 n57w153/", + "0 n57w154/", + "0 n57w155/", + "0 n57w156/", + "0 n57w157/", + "0 n57w158/", + "0 n57w159/", + "0 n57w160/", + "0 n57w161/", + "0 n57w162/", + "0 n57w170/", + "0 n57w171/", + "0 n58w133/", + "0 n58w134/", + "0 n58w135/", + "0 n58w136/", + "0 n58w137/", + "0 n58w153/", + "0 n58w154/", + "0 n58w155/", + "0 n58w156/", + "0 n58w157/", + "0 n58w158/", + "0 n58w159/", + "0 n58w170/", + "0 n58w171/", + "0 n59w134/", + "0 n59w135/", + "0 n59w136/", + "0 n59w137/", + "0 n59w138/", + "0 n59w139/", + "0 n59w152/", + "0 n59w153/", + "0 n59w154/", + "0 n59w155/", + "0 n59w156/", + "0 n59w157/", + "0 n59w158/", + "0 n59w159/", + "0 n59w160/", + "0 n59w161/", + "0 n59w162/", + "0 n59w163/", + "0 n60w135/", + "0 n60w136/", + "0 n60w137/", + "0 n60w138/", + "0 n60w139/", + "0 n60w140/", + "0 n60w141/", + "0 n60w142/", + "0 n60w143/", + "0 n60w144/", + "0 n60w145/", + "0 n60w146/", + "0 n60w147/", + "0 n60w148/", + "0 n60w149/", + "0 n60w150/", + "0 n60w151/", + "0 n60w152/", + "0 n60w153/", + "0 n60w154/", + "0 n60w155/", + "0 n60w156/", + "0 n60w157/", + "0 n60w158/", + "0 n60w159/", + "0 n60w160/", + "0 n60w161/", + "0 n60w162/", + "0 n60w163/", + "0 n60w164/", + "0 n60w165/", + "0 n60w166/", + "0 n60w167/", + "0 n60w168/", + "0 n61w140/", + "0 n61w141/", + "0 n61w142/", + "0 n61w143/", + "0 n61w144/", + "0 n61w145/", + "0 n61w146/", + "0 n61w147/", + "0 n61w148/", + "0 n61w149/", + "0 n61w150/", + "0 n61w151/", + "0 n61w152/", + "0 n61w153/", + "0 n61w154/", + "0 n61w155/", + "0 n61w156/", + "0 n61w157/", + "0 n61w158/", + "0 n61w159/", + "0 n61w160/", + "0 n61w161/", + "0 n61w162/", + "0 n61w163/", + "0 n61w164/", + "0 n61w165/", + "0 n61w166/", + "0 n61w167/", + "0 n61w168/", + "0 n61w173/", + "0 n61w174/", + "0 n62w142/", + "0 n62w143/", + "0 n62w144/", + "0 n62w145/", + "0 n62w146/", + "0 n62w147/", + "0 n62w148/", + "0 n62w149/", + "0 n62w150/", + "0 n62w151/", + "0 n62w152/", + "0 n62w153/", + "0 n62w154/", + "0 n62w155/", + "0 n62w156/", + "0 n62w157/", + "0 n62w158/", + "0 n62w159/", + "0 n62w160/", + "0 n62w161/", + "0 n62w162/", + "0 n62w163/", + "0 n62w164/", + "0 n62w165/", + "0 n62w166/", + "0 n62w167/", + "0 n63w142/", + "0 n63w143/", + "0 n63w144/", + "0 n63w145/", + "0 n63w146/", + "0 n63w147/", + "0 n63w148/", + "0 n63w149/", + "0 n63w150/", + "0 n63w151/", + "0 n63w152/", + "0 n63w153/", + "0 n63w154/", + "0 n63w155/", + "0 n63w156/", + "0 n63w157/", + "0 n63w158/", + "0 n63w159/", + "0 n63w160/", + "0 n63w161/", + "0 n63w162/", + "0 n63w163/", + "0 n63w164/", + "0 n63w165/", + "0 n63w166/", + "0 n63w167/", + "0 n63w170/", + "0 n64w141/", + "0 n64w142/", + "0 n64w143/", + "0 n64w144/", + "0 n64w145/", + "0 n64w146/", + "0 n64w147/", + "0 n64w148/", + "0 n64w149/", + "0 n64w150/", + "0 n64w151/", + "0 n64w152/", + "0 n64w153/", + "0 n64w154/", + "0 n64w155/", + "0 n64w156/", + "0 n64w157/", + "0 n64w158/", + "0 n64w159/", + "0 n64w160/", + "0 n64w161/", + "0 n64w162/", + "0 n64w163/", + "0 n64w164/", + "0 n64w165/", + "0 n64w169/", + "0 n64w170/", + "0 n64w171/", + "0 n64w172/", + "0 n65w141/", + "0 n65w142/", + "0 n65w143/", + "0 n65w144/", + "0 n65w145/", + "0 n65w146/", + "0 n65w147/", + "0 n65w148/", + "0 n65w149/", + "0 n65w150/", + "0 n65w151/", + "0 n65w152/", + "0 n65w153/", + "0 n65w154/", + "0 n65w155/", + "0 n65w156/", + "0 n65w157/", + "0 n65w158/", + "0 n65w159/", + "0 n65w160/", + "0 n65w161/", + "0 n65w162/", + "0 n65w163/", + "0 n65w164/", + "0 n65w165/", + "0 n65w166/", + "0 n65w167/", + "0 n66w141/", + "0 n66w142/", + "0 n66w143/", + "0 n66w144/", + "0 n66w145/", + "0 n66w146/", + "0 n66w147/", + "0 n66w148/", + "0 n66w149/", + "0 n66w150/", + "0 n66w151/", + "0 n66w152/", + "0 n66w153/", + "0 n66w154/", + "0 n66w155/", + "0 n66w156/", + "0 n66w157/", + "0 n66w158/", + "0 n66w159/", + "0 n66w160/", + "0 n66w161/", + "0 n66w162/", + "0 n66w163/", + "0 n66w164/", + "0 n66w165/", + "0 n66w166/", + "0 n66w167/", + "0 n66w168/", + "0 n66w169/", + "0 n67w141/", + "0 n67w142/", + "0 n67w143/", + "0 n67w144/", + "0 n67w145/", + "0 n67w146/", + "0 n67w147/", + "0 n67w148/", + "0 n67w149/", + "0 n67w150/", + "0 n67w151/", + "0 n67w152/", + "0 n67w153/", + "0 n67w154/", + "0 n67w155/", + "0 n67w156/", + "0 n67w157/", + "0 n67w158/", + "0 n67w159/", + "0 n67w160/", + "0 n67w161/", + "0 n67w162/", + "0 n67w163/", + "0 n67w164/", + "0 n67w165/", + "0 n67w166/", + "0 n67w167/", + "0 n67w168/", + "0 n68w141/", + "0 n68w142/", + "0 n68w143/", + "0 n68w144/", + "0 n68w145/", + "0 n68w146/", + "0 n68w147/", + "0 n68w148/", + "0 n68w149/", + "0 n68w150/", + "0 n68w151/", + "0 n68w152/", + "0 n68w153/", + "0 n68w154/", + "0 n68w155/", + "0 n68w156/", + "0 n68w157/", + "0 n68w158/", + "0 n68w159/", + "0 n68w160/", + "0 n68w161/", + "0 n68w162/", + "0 n68w163/", + "0 n68w164/", + "0 n68w165/", + "0 n68w166/", + "0 n69w141/", + "0 n69w142/", + "0 n69w143/", + "0 n69w144/", + "0 n69w145/", + "0 n69w146/", + "0 n69w147/", + "0 n69w148/", + "0 n69w149/", + "0 n69w150/", + "0 n69w151/", + "0 n69w152/", + "0 n69w153/", + "0 n69w154/", + "0 n69w155/", + "0 n69w156/", + "0 n69w157/", + "0 n69w158/", + "0 n69w159/", + "0 n69w160/", + "0 n69w161/", + "0 n69w162/", + "0 n69w163/", + "0 n69w164/", + "0 n69w165/", + "0 n69w166/", + "0 n69w167/", + "0 n70w141/", + "0 n70w142/", + "0 n70w143/", + "0 n70w144/", + "0 n70w145/", + "0 n70w146/", + "0 n70w147/", + "0 n70w148/", + "0 n70w149/", + "0 n70w150/", + "0 n70w151/", + "0 n70w152/", + "0 n70w153/", + "0 n70w154/", + "0 n70w155/", + "0 n70w156/", + "0 n70w157/", + "0 n70w158/", + "0 n70w159/", + "0 n70w160/", + "0 n70w161/", + "0 n70w162/", + "0 n70w163/", + "0 n70w164/", + "0 n70w165/", + "0 n71w143/", + "0 n71w144/", + "0 n71w145/", + "0 n71w146/", + "0 n71w147/", + "0 n71w148/", + "0 n71w149/", + "0 n71w150/", + "0 n71w151/", + "0 n71w152/", + "0 n71w153/", + "0 n71w154/", + "0 n71w155/", + "0 n71w156/", + "0 n71w157/", + "0 n71w158/", + "0 n71w159/", + "0 n71w160/", + "0 n71w161/", + "0 n71w162/", + "0 n71w163/", + "0 n71w164/", + "0 n72w155/", + "0 n72w156/", + "0 n72w157/", + "0 n72w158/", + "0 s14w170/", + "0 s14w171/" + ) + + + # sub out HTML copy pattern with vsi URL + t2 <- gsub("0 ", + "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/", + t) + + # add file paths following NED scheme + t3 <- paste0(t2, + "USGS_13_", + basename(t2), + ".tif") + + # Write table to data-raw + write.table(t3, + paste0(base_dir, "/ned_list_USGS_13.txt"), + row.names = FALSE, + col.names = FALSE, + quote = FALSE) + + +} +# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem" +# download_3dep_vrt(dem_base_dir) +# +# # Create meta data object of three NED resoruces +# ned <- data.frame(rbind( +# # c(id = "USGS_1", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1", +# # varname = "30m elevation", +# # long_name = "30m (1 arcsec) National Elevation Dataset", +# # units = "m"), +# # +# # c(id = "USGS_2", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2", +# # varname = "60m elevation", +# # long_name = "60m (2 arcsec) National Elevation Dataset Alaska", +# # units = "m"), +# +# c(id = "USGS_13", +# URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13", +# varname = "10m elevation", +# long_name = "10m (1/3th arcsec) National Elevation Dataset", +# units = "m") +# )) +# +# +# +# +# # Loop over the three resolutions +# for (i in 1:length(ned)) { +# i = 1 +# +# # Define output text file path +# txt_file <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt") +# +# # Define output VRT path +# vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt")) +# +# # If VRT does NOT exist, build VRT +# if (!file.exists(vrt_file)) { +# +# # read the corresponding index.gpkg +# files <- sf::read_sf(ned$domain_url[i]) +# DEM_UR +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# # Build full HTTPS paths to "./current/" +# files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location))) +# +# files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# # write list of files to text file +# write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE) +# +# # build VRT from text file input using GDAL system call ... +# system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file)) +# } +# +# logger::log_info("Finished ", ned$id[i], "...") +# } + +# +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(hy_id := !!sym(crosswalk_id)) +# # +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# crosswalk_id = crosswalk_id, +# pct = extension_pct, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + +# utility function for getting transects extended and +# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement +# returns the extended version of the transects +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(hy_id := !!sym(crosswalk_id)) +# +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# pct = EXTENSION_PCT, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + + + diff --git a/runners/cs_runner2/00_driver.R b/runners/cs_runner2/00_driver.R new file mode 100644 index 0000000..9c6eaed --- /dev/null +++ b/runners/cs_runner2/00_driver.R @@ -0,0 +1,27 @@ +### Run this file to have all runner scripts run in order + +# downloads nextgen datasets +source("runners/cs_runner2/config_env.R") + +# downloads datasets +# - Nextgen data +# - Reference features (for waterbody filtering) +# - FEMA 100 year floodplain polygons (FGBs) +# - Process state level FEMA polygons to CONUS +# - ML outputs +# - 3DEP DEM VRT +source("runners/cs_runner2/download_conus_nextgen.R") +source("runners/cs_runner2/download_conus_ref_features.R") +source("runners/cs_runner2/download_fema.r") +source("runners/cs_runner2/process_fema.R") +source("runners/cs_runner2/download_ml_outputs.R") +source("runners/cs_runner2/download_dem_from_vrt.R") + +# generate and upload transects datasets +source("runners/cs_runner2/01_transects.R") + +# generate and upload cross sections points datasets +source("runners/cs_runner2/02_cs_pts.R") + +# Apply machine learning topwidths and depths estimates to DEM cross section points +source("runners/cs_runner2/03_inject_ml.R") diff --git a/runners/cs_runner2/01_transects.R b/runners/cs_runner2/01_transects.R new file mode 100644 index 0000000..e11f46c --- /dev/null +++ b/runners/cs_runner2/01_transects.R @@ -0,0 +1,190 @@ + +library(sf) +library(dplyr) + +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner2/utils.R") + +VPU_IDS <- get_vpu_ids() + +for (i in seq_along(VPU_IDS)) { +# for (i in 21:length(VPU_IDS)) { + + # -------------------------------------------------------------- + # ---- Setup variables / paths ---- + # -------------------------------------------------------------- + # VERSION_DIRS_LIST + vpu <- VPU_IDS[i] + CROSSWALK_ID <- "id" + + transect_filenames <- get_transect_filenames(vpu, sep = "-") + base_output_path <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path) + base_transect_file_exists <- file.exists(base_output_path) + + do_process_transects <- !base_transect_file_exists || REGENERATE_TRANSECTS + # do_not_reprocess_transects <- base_transect_file_exists && !REGENERATE_TRANSECTS + + if (do_process_transects) { + message("Creating VPU ", vpu, " transects:", + "\n - flowpaths: '", + basename(CONUS_NEXTGEN_GPKG_PATH), "'" + ) + } else { + message( + "VPU ", vpu, " transects file already exists at:\n - '", base_output_path, "'", + "\n\n >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", + basename(base_output_path), "' file" + ) + next + + } + + # if we are going to create new transects and the transect file already exists, delete it as to not cause confusion or accidently append to an old dataset + if (do_process_transects && base_transect_file_exists) { + message("Deleting old transect file:\n > '", base_output_path, "'") + file.remove(base_output_path) + } + + # -------------------------------------------------------------- + # ---- Load flowpaths for VPU from conus_nextgen.gpkg ---- + # -------------------------------------------------------------- + + # read in nextgen flowlines + flines <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) + + has_no_flowlines <- nrow(flines) == 0 + + if (has_no_flowlines) { + message("Skipping VPU ", vpu, " as no flowlines were found in '", basename(CONUS_NEXTGEN_GPKG_PATH), "'..." ) + next + } + + + + # Add an estimate bankful width based on Total downsteam drainage area (sqkm, Power law equation) + flines <- + flines %>% + hydroloom::rename_geometry("geometry") %>% + hydrofabric3D::add_powerlaw_bankful_width( + total_drainage_area_sqkm_col = "tot_drainage_areasqkm", + min_bf_width = 50 + ) %>% + dplyr::group_by(order) %>% + dplyr::slice(1:10) %>% + dplyr::ungroup() %>% + dplyr::select( + id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + mainstem, + geometry + ) + # dplyr::group_by(order) + + # -------------------------------------------------------------- + # ---- Generate transects ---- + # -------------------------------------------------------------- + # flines$geometry %>% sf::st_is_empty() %>% any() + + # sf::write_sf( + # flines, + # "/Users/anguswatters/Desktop/wrong_cs_ids_flines_error.gpkg" + # # "/Users/anguswatters/Desktop/empty_geom_flines_error.gpkg" + # ) + + # create transect lines + transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flines$bf_width, # cross section width of each "id" linestring ("hy_id") + # cs_widths = 15, + num = 3, # number of cross sections per "id" linestring ("hy_id") + # smooth = FALSE, # smooth lines + # densify = NULL, + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + add = TRUE # whether to add back the original data + ) + # dplyr::mutate( + # cs_source = CS_SOURCE + # ) + # transects$cs_lengthm + # transects$geometry %>% sf::st_length() + + # transects %>% + # dplyr::mutate( + # new_cs_lengthm = as.numeric(sf::st_length(.)) + # ) %>% + # dplyr::filter(!dplyr::near(new_cs_lengthm, cs_lengthm, tol = 2)) + # .Machine$double.eps^1 + # crosswalk_id <- "id" + # # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered + # mismatches <- + # transects %>% + # sf::st_drop_geometry() %>% + # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + # dplyr::arrange(cs_measure, .by_group = TRUE) %>% + # dplyr::mutate( + # new_cs_id = 1:dplyr::n() + # ) %>% + # dplyr::ungroup() %>% + # dplyr::filter(cs_id != new_cs_id) + # mismatches + # transects %>% + # dplyr::filter(id %in% c("wb-1023360", "wb-1023363")) + # + # transects %>% + # dplyr::filter(id %in% c("wb-1023360", "wb-1023363")) %>% + # mapview::mapview() + # + # # FALSE if there are any transects with different cs_ids to the newly created cs_id + # # Otherwise TRUE + # has_valid_cs_ids <- !(nrow(mismatches) > 0) + # + # # t2 <- transects %>% hydrofabric3D:::renumber_cs_ids("id") + # # hydrofabric3D::validate_transects(t2, "id") + # transects + is_valid_transects <- hydrofabric3D::validate_transects(transects, "id") + is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(transects, flines, "id") + + # is_valid_transects <- hydrofabric3D::validate_transects(transects[c(1, 3, 55), ], "id") + # is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(transects, flines[c(-1), ], "id") + # is_valid_transects_against_flowlines + # hydrofabric3D:::rm_multiflowline_intersections(transects, flines[c(-1), ]) + + # select relevent columns + transects <- + transects %>% + dplyr::mutate( + cs_source = CS_SOURCE + ) %>% + # hydrofabric3D:::select_transects(CROSSWALK_ID) + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + # cs_lengthm = new_cs_lengthm, + cs_measure, + sinuosity, + cs_source, + geometry + ) + + if (is_valid_transects && is_valid_transects_against_flowlines) { + message("Saving transects to:\n - filepath: '", base_output_path, "'") + sf::write_sf( + transects, + base_output_path + ) + } + + message(" --- VPU ", vpu, " transects generation complete! --- ") + message() + +} diff --git a/runners/cs_runner2/02_extend_transects_by_fema.R b/runners/cs_runner2/02_extend_transects_by_fema.R new file mode 100644 index 0000000..106dc44 --- /dev/null +++ b/runners/cs_runner2/02_extend_transects_by_fema.R @@ -0,0 +1,118 @@ + +library(sf) +library(dplyr) + +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner2/utils.R") + +VPU_IDS <- get_vpu_ids() + +for (i in seq_along(VPU_IDS)) { + # for (i in 21:length(VPU_IDS)) { + + # -------------------------------------------------------------- + # ---- Setup variables / paths ---- + # -------------------------------------------------------------- + # VERSION_DIRS_LIST + # i = 8 + vpu <- VPU_IDS[i] + CROSSWALK_ID <- "id" + # cs_filesnames <- get_cross_section_filenames(vpu, sep = "-") + transect_filenames <- get_transect_filenames(vpu, sep = "-") + + # TODO: Add a catch for not allowing this all to run if the base_transects do NOT exist + base_transects_path <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path) + fema_transects_output_path <- paste0(VERSION_DIRS_LIST$transects_fema_extended_dir, "/", transect_filenames$transects_fema_extended_path) + + base_transect_file_exists <- file.exists(base_transects_path) + fema_transects_file_exists <- file.exists(fema_transects_output_path) + + do_process_transects <- !fema_transects_file_exists || REGENERATE_TRANSECTS + + if (do_process_transects) { + message("Creating VPU ", vpu, " transects:", + "\n - flowpaths: '", + basename(CONUS_NEXTGEN_GPKG_PATH), "'", + "\n - base transects: '", + basename(base_transects_path), "'" + ) + } else { + message( + "VPU ", vpu, " transects file already exists at:\n - '", fema_transects_output_path, "'", + "\n\n >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", + basename(fema_transects_output_path), "' file" + ) + next + + } + + if (do_process_transects && fema_transects_file_exists) { + message("Deleting old fema extended transect file:\n > '", fema_transects_output_path, "'") + file.remove(fema_transects_output_path) + } + + # read in nextgen flowlines + flines <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) %>% + hydroloom::rename_geometry("geometry") + + # transects + transects <- sf::read_sf(base_transects_path) %>% + hydroloom::rename_geometry("geometry") + + # FEMA polygons + fema <- + CONUS_FEMA_GPKG_PATH %>% + sf::read_sf(layer = get_fema_conus_layer_name(vpu)) %>% + rmapshaper::ms_simplify(keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) %>% + hydroloom::rename_geometry("geometry") + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + + # # add mainstem to + # transects <- + # transects %>% + # dplyr::left_join( + # flines %>% + # sf::st_drop_geometry() %>% + # dplyr::select(dplyr::any_of(CROSSWALK_ID), mainstem), + # by = CROSSWALK_ID + # ) + + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + + # TODO: make sure this 3000m extension distance is appropriate across VPUs + # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + extended_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + # grouping_id = "mainstem", + max_extension_distance = 3000, + reindex_cs_ids = TRUE + ) + + is_valid_transects <- hydrofabric3D::validate_transects(extended_transects, "id") + is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(extended_transects, flines, "id") + + + + + +} + + + + + + + + + + + + + + \ No newline at end of file diff --git a/runners/cs_runner/.Rapp.history b/runners/cs_runner2/03_extract_initial_cs_pts.R similarity index 100% rename from runners/cs_runner/.Rapp.history rename to runners/cs_runner2/03_extract_initial_cs_pts.R diff --git a/runners/cs_runner2/04_extend_transects_by_cs_attributes.R b/runners/cs_runner2/04_extend_transects_by_cs_attributes.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/05_extract_output_cs_pts.R b/runners/cs_runner2/05_extract_output_cs_pts.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/06_inject_ml_bathymetry.R b/runners/cs_runner2/06_inject_ml_bathymetry.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R new file mode 100644 index 0000000..7588bb9 --- /dev/null +++ b/runners/cs_runner2/base_variables.R @@ -0,0 +1,435 @@ +### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ### +# load utils +source("runners/cs_runner2/utils.R") + +# --------------------------------------------------------------------------------- +# ---- General paths and constants variables ---- +# - edit to match your local environment +# - BASE_DIR: base directory for local file storage +# - AWS_PROFILE: AWS profile to run CLI commands +# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for +# --------------------------------------------------------------------------------- +# Base directory for local file storage +BASE_DIR <- '/Volumes/T7SSD/lynker-spatial' +# BASE_DIR <- '/Users/anguswatters/Desktop/lynker-spatial' + +BASE_DIRS_LIST <- get_base_dir_paths(BASE_DIR) + +# AWS profile to run CLI commands +AWS_PROFILE <- "angus-lynker" + +# S3 prefix/folder of version run +VERSION <- "v3.0" +CONUS_VERSION <- "v2.2" + +VERSION_DIRS_LIST <- get_version_base_dir_paths(BASE_DIR, VERSION) + +# string to fill in "CS_SOURCE" column in output datasets +CS_SOURCE <- "hydrofabric3D" + +# ------------------------------------------------------------------------------------- +# ---- Processing flags (cache data) ---- +# ------------------------------------------------------------------------------------- + +# cache transects by setting REGENERATE_TRANSECTS to FALSE +REGENERATE_TRANSECTS <- TRUE + +# ------------------------------------------------------------------------------------- +# ---- S3 BUCKET NAMES ---- +# ------------------------------------------------------------------------------------- + +# name of bucket with nextgen data +LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial" +LYNKER_SPATIAL_HF_S3_PREFIX <- "hydrofabric" + +# AWS S3 bucket URI +LYNKER_SPATIAL_BASE_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/") +LYNKER_SPATIAL_HF_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/") + +# name of bucket with nextgen data +LYNKER_HF_S3_BUCKET_NAME <- "lynker-hydrofabric" + +# ------------------------------------------------------------------------------------- +# ---- VPU IDs ---- +# ------------------------------------------------------------------------------------- +VPU_IDS <- c('01', '02', '03N', '03S', '03W', + '04', '05', '06', '07', '08', '09', + '10L', '10U', '11', '12', '13', '14', + '15', '16', '17', '18', '20', '21') +# VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + +# ------------------------------------------------------------------------------------- +# ---- CONUS NEXTGEN ---- +# ------------------------------------------------------------------------------------- + +CONUS_NEXTGEN_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_nextgen.gpkg") +CONUS_NEXTGEN_GPKG_PATH <- file.path(VERSION_DIRS_LIST$network_dir, "conus_nextgen.gpkg") + +# ------------------------------------------------------------------------------------- +# ---- CONUS REFEREMCE FEATURES ---- +# ------------------------------------------------------------------------------------- + +CONUS_REF_FEATURES_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_reference.gpkg") +CONUS_REF_FEATURES_GPKG_PATH <- file.path(VERSION_DIRS_LIST$ref_features_dir, "conus_reference.gpkg") + +# ------------------------------------------------------------------------------------- +# ---- CONUS ML DATA ---- +# ------------------------------------------------------------------------------------- + +CONUS_ML_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/bathymetry/ml_auxiliary_data.parquet") +CONUS_ML_PARQUET_PATH <- file.path(VERSION_DIRS_LIST$ml_dir, "ml_auxiliary_data.parquet") + +# ------------------------------------------------------------------------------------- +# ---- FEMA 100 year flood plain data ---- +# ------------------------------------------------------------------------------------- + +LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100" +LYNKER_HF_FEMA_S3_URI <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/") + +FEMA_FGB_PATH = BASE_DIRS_LIST$fema_fgb_dir +FEMA_GEOJSON_PATH = BASE_DIRS_LIST$fema_geojson_dir +FEMA_CLEAN_PATH = BASE_DIRS_LIST$fema_clean_dir +FEMA_GPKG_PATH = BASE_DIRS_LIST$fema_gpkg_dir +FEMA_BY_VPU_PATH = BASE_DIRS_LIST$fema_by_vpu_dir +FEMA_VPU_SUBFOLDERS = BASE_DIRS_LIST$fema_by_vpu_subdirs + +# ------------------------------------------------------------------------------------- +# ---- CONUS CS extension polygons ---- +# Data derived from FEMA 100 year flood plain data, puts all FEMA VPU datasets into a +# single geopackage for CONUS +# ------------------------------------------------------------------------------------- + +CS_EXTENSION_POLYGONS_DIR <- BASE_DIRS_LIST$cs_extension_polygons_dir +CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg') + +# ------------------------------------------------------------------------------------- +# ---- CONUS CS extension polygons ---- +# ------------------------------------------------------------------------------------- + +# Meta data object of National Elevation Datasets (NED) +NED_META <- data.frame(rbind( + # c(id = "USGS_1", + # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1", + # varname = "30m elevation", + # long_name = "30m (1 arcsec) National Elevation Dataset", + # units = "m", + # s3_uri = "s3://prd-tnm/StagedProducts/Elevation/1/TIFF/", + # vrt_name = "USGS_Seamless_DEM_1.vrt" + # ) + # + # # c(id = "USGS_2", + # # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2", + # # varname = "60m elevation", + # # long_name = "60m (2 arcsec) National Elevation Dataset Alaska", + # # units = "m", + # # s3_uri = "s3://prd-tnm/StagedProducts/Elevation/2/TIFF/", + # # vrt_name = "USGS_Seamless_DEM_2.vrt" + # # ), + c(id = "USGS_13", + URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13", + varname = "10m elevation", + long_name = "10m (1/3th arcsec) National Elevation Dataset", + units = "m", + s3_uri = "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/", + vrt_name = "USGS_Seamless_DEM_13.vrt" + ) + ) +) + + +DEM_DIR <- BASE_DIRS_LIST$dem_dir +DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir +# DEM_TIF_DIR <- BASE_DIRS_LIST$dem_tif_dir + +# DEM URL +DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" +# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif +# ---------------------------------------------------------------------------- +# ---- Cross section point extraction constant variables ---- +# ---------------------------------------------------------------------------- + +# scale argument for cross_section_pts() function. +# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line +# Default setting is 50% of the original transect lines length (0.5) +EXTENSION_PCT <- 0.5 + +# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not +# 1% of the cross sections length is the default value we are using +# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") +PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01 + +# # ------------------------------------------------------------------------------------- +# # ---- Create local directory / path variables (FEMA data) ---- +# # ------------------------------------------------------------------------------------- + +# LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100" +# LYNKER_HF_FEMA_S3_URI <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/") + +# # location of FEMA 100 year flood plain FGB files +# FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +# LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100/" +# LYNKER_HF_FEMA_S3_URI <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX) + +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb") +# FEMA_GEOJSON_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson") +# FEMA_CLEAN_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean") +# FEMA_GPKG_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg") +# FEMA_BY_VPU_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU") + +# VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# CS_EXTENSION_POLYGONS_DIR <- paste0(BASE_DIR, "/cs-extension-polygons") +# CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg') + +# DEM_DIR <- BASE_DIRS_LIST$dem_dir +# DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir +# DEM_TIF_DIR <- BASE_DIRS_LIST$dem_tif_dir + +# # ---------------------------------------------------------------------------- +# # ---- Cross section point extraction constant variables ---- +# # ---------------------------------------------------------------------------- + +# # DEM URL +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +# # scale argument for cross_section_pts() function. +# # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line +# # Default setting is 50% of the original transect lines length (0.5) +# EXTENSION_PCT <- 0.5 + +# # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not +# # 1% of the cross sections length is the default value we are using +# # (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") +# PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01 + + +# # ------------------------------------------------------------------------------------- +# # ---- S3 output directories ----- +# # - transects +# # - cross section points +# # - ML cross section points +# # ------------------------------------------------------------------------------------- +# +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") +# +# # cross section bucket prefix +# S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/") +# +# # cross section bucket prefix +# S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/") +# +# # ------------------------------------------------------------------------------------- +# # ---- S3 nextgen data paths / directories ----- +# # ------------------------------------------------------------------------------------- +# +# # the name of the folder in the S3 bucket with the nextgen data +# S3_BUCKET_NEXTGEN_DIR <- paste0(VERSION, "/gpkg/") +# # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" +# +# # full URI to the S3 bucket folder with the nextgen data +# S3_BUCKET_NEXTGEN_DIR_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR) +# +# # reference features S3 bucket prefix +# S3_BUCKET_REF_FEATURES_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/") +# # S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" +# +# # ---------------------------------------------------------------------------- +# # ---- Machine learning data path variables ---- +# # ---------------------------------------------------------------------------- +# +# ML_OUTPUTS_S3_FILE <- "channel_ml_outputs.parquet" +# +# # ML_OUTPUTS_S3_DIR <- paste0(VERSION, "/3D/ml-outputs/") +# # ML_OUTPUTS_S3_DIR <- "v20.1/3D/ml-outputs/" +# +# ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE) +# # ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE) +# +# ML_OUTPUTS_PATH <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE) +# +# # path to the remote CONUS net parquet file +# CONUS_NETWORK_FILE <- "conus_net.parquet" +# CONUS_NETWORK_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE) +# +# # ---------------------------------------------------------------------------- +# +# +# # ------------------------------------------------------------------------------------- +# # ---- Local directory / path variables ---- +# # ------------------------------------------------------------------------------------- +# +# # directory to copy nextgen bucket data too +# NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) +# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/") +# +# # # model attributes directory +# # MODEL_ATTR_DIR <- paste0(BASE_DIR, "/model_attributes/") +# +# # cross-section data model data directories +# TRANSECTS_DIR <- paste0(BASE_DIR, "/01_transects/") +# CS_PTS_DIR <- paste0(BASE_DIR, "/02_cs_pts/") +# +# # final output directory with geopackages per VPU +# CS_OUTPUT_DIR <- paste0(BASE_DIR, "/cross_sections/") +# +# # directory to copy nextgen bucket data too +# REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/") +# REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/") +# +# # make a directory for the ML outputs data +# ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") +# +# DEM_DIR <- paste0(BASE_DIR, "/dem") +# DEM_VRT_DIR <- paste0(DEM_DIR, "/vrt") +# DEM_TIF_DIR <- paste0(DEM_DIR, "/tif") +# +# # ------------------------------------------------------------------------------------- +# # ---- Create local directory / path variables (FEMA data) ---- +# # ------------------------------------------------------------------------------------- +# +# # location of FEMA 100 year flood plain FGB files +# FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +# LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100/" +# LYNKER_HF_FEMA_S3_URI <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX) +# +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- paste0(BASE_DIR, "/FEMA100") +# FEMA_GEOJSON_PATH <- paste0(BASE_DIR, "/FEMA100_geojson") +# FEMA_CLEAN_PATH <- paste0(BASE_DIR, "/FEMA100_clean") +# FEMA_GPKG_PATH <- paste0(BASE_DIR, "/FEMA100_gpkg") +# FEMA_GPKG_BB_PATH <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet +# +# FEMA_BY_VPU_PATH <- paste0(BASE_DIR, "/FEMA_BY_VPU") +# VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID +# +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) +# # FEMA_VPU_SUBFOLDERS <- paste0( +# # FEMA_BY_VPU_PATH, "/VPU_", +# # unlist( +# # lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) { +# # unlist(regmatches(vpu_file_names, gregexpr("\\d+[A-Za-z]*", vpu_file_names)))}) +# # ) +# # ) +# +# # ------------------------------------------------------------------------------------- +# # ---- OVERWRITE_FEMA_FILES constant logicals---- +# # ---- > if TRUE, processing steps will be run again +# # and overwrite existing previously processed files +# # TODO: Describe these variables +# # ------------------------------------------------------------------------------------- +# +# # Default is TRUE (i.e. a fresh processing run is done from start to finish) +# OVERWRITE_FEMA_FILES <- TRUE +# DELETE_STAGING_GPKGS <- TRUE # remove intermediary files from the main output folder +# + +# # ------------------------------------------------------------------------------------- +# # ---- (New single domain) Local directory / path variables ---- +# # ------------------------------------------------------------------------------------- +# +# # directory for new domain data +# NEW_DOMAIN_DIRNAME <- "new_domain" +# NEW_DOMAIN_DIR <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME) +# +# NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines" +# NEW_DOMAIN_FLOWLINES_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME) +# +# NEW_DOMAIN_DEM_DIRNAME <- "dem" +# NEW_DOMAIN_DEM_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME) +# +# NEW_DOMAIN_FLOWLINES_FILE <- "AllDiffusiveCombined.gpkg" +# NEW_DOMAIN_FLOWLINES_PATH <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE) +# +# # # Local DEM file +# # NEW_DOMAIN_DEM_FILE <- "hi_dem.tif" +# # NEW_DOMAIN_DEM_PATH <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE) +# +# # Remote DEM file +# NEW_DOMAIN_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# NEW_DOMAIN_TRANSECTS_DIRNAME <- "transects" +# NEW_DOMAIN_CS_PTS_DIRNAME <- "cs_pts" +# NEW_DOMAIN_CROSS_SECTIONS_DIRNAME <- "cross_sections" +# +# NEW_DOMAIN_TRANSECTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME) +# NEW_DOMAIN_CS_PTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME) +# NEW_DOMAIN_CROSS_SECTIONS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME) +# +# # ------------------------------------------------------------------------------------- +# # ---- (New single domain) Local directory / path variables ---- +# # ------------------------------------------------------------------------------------- +# +# # directory for new domain data +# DOMAIN_WITH_FEMA_DIRNAME <- "domain_with_fema" +# DOMAIN_WITH_FEMA_DIR <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME) +# +# DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines" +# DOMAIN_WITH_FEMA_FLOWLINES_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME) +# +# DOMAIN_WITH_FEMA_SUBSET_DIRNAME <- "domain_subset" +# DOMAIN_WITH_FEMA_SUBSET_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME) +# +# DOMAIN_WITH_FEMA_DEM_DIRNAME <- "dem" +# DOMAIN_WITH_FEMA_DEM_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME) +# +# DOMAIN_WITH_FEMA_FLOWLINES_FILE <- "ls_conus.gpkg" +# DOMAIN_WITH_FEMA_FLOWLINES_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE) +# +# # Geopackage containing area to subset flowlines to before processing +# DOMAIN_WITH_FEMA_SUBSET_FILE <- "AllDiffusiveCombined.gpkg" +# DOMAIN_WITH_FEMA_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE) +# +# # # Local DEM file +# # DOMAIN_WITH_FEMA_DEM_FILE <- "hi_dem.tif" +# # DOMAIN_WITH_FEMA_DEM_PATH <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE) +# +# # Remote DEM file +# DOMAIN_WITH_FEMA_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME <- "transects" +# DOMAIN_WITH_FEMA_CS_PTS_DIRNAME <- "cs_pts" +# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME <- "cross_sections" +# DOMAIN_WITH_FEMA_OUTPUT_DIRNAME <- "outputs" +# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME <- "vpu-subsets" +# DOMAIN_WITH_FEMA_ML_DIRNAME <- "ml" +# # aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt +# DOMAIN_WITH_FEMA_TRANSECTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME) +# DOMAIN_WITH_FEMA_CS_PTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME) +# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME) +# DOMAIN_WITH_FEMA_OUTPUT_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME) +# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME) +# DOMAIN_WITH_FEMA_ML_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME) +# +# ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data") +# # LYNKER_SPATIAL_BASE_S3_URI +# LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI <- "s3://lynker-hydrofabric/" +# ML_BATHYMETRY_S3_DATA_DIR <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/" +# ML_BATHYMETRY_S3_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR) +# VPU_ML_BATHYMETRY_S3_DIRS <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/") +# +# COASTAL_BATHY_DEM_S3_DIR <- "coastal_bathy/diffusive_domain/" +# COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR) +# # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) +# +# +# # # # -------------------------------------------------------------------------- +# # # # ---- Get locations of diffusive domain DEM files in S3 ---- +# # # # -------------------------------------------------------------------------- +# # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) +# # +# # # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, +# # # list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE) +# # # ) +# +# + + + + + + diff --git a/runners/cs_runner2/config_env.R b/runners/cs_runner2/config_env.R new file mode 100644 index 0000000..a9b0b17 --- /dev/null +++ b/runners/cs_runner2/config_env.R @@ -0,0 +1,26 @@ +# load required packages +pacman::p_load( + archive, + hydrofabric, + hydrofabric3D, + dplyr, + sf +) + +# # install.packages("devtools") +# devtools::install_github("anguswg-ucsb/hydrofabric3D") + +# load root directory +source("runners/cs_runner2/base_variables.R") +source("runners/cs_runner2/utils.R") + +sf::sf_use_s2(FALSE) + +# create empty base directories +create_local_hydrofabric_base_dirs(base_dir = BASE_DIR) + +# create a new version directory +create_new_version_dirs(base_dir = BASE_DIR, + version = VERSION, + with_output = TRUE) + diff --git a/runners/cs_runner2/download_conus_nextgen.R b/runners/cs_runner2/download_conus_nextgen.R new file mode 100644 index 0000000..80de910 --- /dev/null +++ b/runners/cs_runner2/download_conus_nextgen.R @@ -0,0 +1,28 @@ +# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R + +# load config variables +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Download conus_nextgen.gpkg +# --------------------------------------------------------------------------- + +copy_cmd <- paste0('aws s3 cp ', CONUS_NEXTGEN_S3_URI, " ", CONUS_NEXTGEN_GPKG_PATH) +message("Copying S3 object:\n", CONUS_NEXTGEN_S3_URI) + +if (!file.exists(CONUS_NEXTGEN_GPKG_PATH)) { + tryCatch({ + system(copy_cmd) + message("Download '", basename(CONUS_NEXTGEN_GPKG_PATH), "' complete!") + message("------------------") + }, error = function(e) { + message("Error downloading conus_nextgen.gpkg") + message(e) + stop() + }) + +} else { + message("conus_nextgen.gpkg file already exists at\n > '", CONUS_NEXTGEN_GPKG_PATH, "'") +} diff --git a/runners/cs_runner2/download_conus_ref_features.R b/runners/cs_runner2/download_conus_ref_features.R new file mode 100644 index 0000000..b6f6a06 --- /dev/null +++ b/runners/cs_runner2/download_conus_ref_features.R @@ -0,0 +1,31 @@ +# Running this script goes and pulls the desired CONUS NextGen geopackage dataset from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/cs_runner/base_variables.R + +# load config variables +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Download conus_reference.gpkg +# --------------------------------------------------------------------------- + +copy_cmd <- paste0('aws s3 cp ', CONUS_REF_FEATURES_S3_URI, " ", CONUS_REF_FEATURES_GPKG_PATH) +message("Copying S3 object:\n", CONUS_REF_FEATURES_S3_URI) + +if (!file.exists(CONUS_REF_FEATURES_GPKG_PATH)) { + tryCatch({ + system(copy_cmd) + message("Download '", basename(CONUS_REF_FEATURES_GPKG_PATH), "' complete!") + message("------------------") + }, error = function(e) { + message("Error downloading conus_nextgen.gpkg") + message(e) + stop() + }) + +} else { + message("conus_nextgen.gpkg file already exists at\n > '", CONUS_REF_FEATURES_GPKG_PATH, "'") +} + + +# } \ No newline at end of file diff --git a/runners/cs_runner2/download_dem_from_vrt.R b/runners/cs_runner2/download_dem_from_vrt.R new file mode 100644 index 0000000..fc18f2c --- /dev/null +++ b/runners/cs_runner2/download_dem_from_vrt.R @@ -0,0 +1,143 @@ +library(terra) +library(dplyr) + +# get the main config variables / paths +# source("runners/cs_runner/config_vars.R") +source("runners/cs_runner2/base_variables.R") +source("runners/cs_runner2/utils.R") + +# specify where VRT should go +DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir +# # DEM_TIF_DIR <- base_dirs$dem_tif_dir +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" +# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif +# message("Syncing 3DEP elevation VRTs...") +# s3_cp_cmd <- paste0("aws s3 cp ", "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt", " ", "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt", " --profile ", AWS_PROFILE, " --no-sign-request") +# s3_cp_cmd +# system(s3_cp_cmd) +# "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" +# +# VRT_file <- "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt" +# vrt_tiles <- terra::vrt_tiles(VRT_file) +# +# dem <- terra::rast(vrt_tiles[999]) +# dem +# terra::writeRaster(dem, "/Users/anguswatters/Desktop/tile1_USGS_Seamless_DEM_13.tif", overwrite = T) +# length(vrt_tiles) +# s3://lynker-nypa-inputs/ryan_test/ + + +for (i in seq_along(nrow(NED_META))) { + + NED_META + i = 1 + ned_uri <- NED_META$s3_uri[i] + ned_current_uri <- paste0(ned_uri, "current/") + ned_current_uri + + output_dir <- file.path(DEM_VRT_DIR, NED_META$id[i]) + output_dir + current_output_dir <- file.path(DEM_VRT_DIR, NED_META$id[i], "current") + + # create specific VRT directory if it doesnt already exist + create_if_not_exists(output_dir) + create_if_not_exists(current_output_dir) + + message("[", i, "] - ", "Syncing\n '", ned_current_uri, "' > '", current_output_dir, "'") + + s3_sync_cmd <- paste0("aws s3 sync ", ned_current_uri, " ", current_output_dir, " --profile ", AWS_PROFILE, " --no-sign-request") + # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request") + + + tryCatch({ + + # system(s3_sync_cmd) + + }, error = function(e) { + + message("Error syncing ", ned_uri, " to local directory...") + message(e) + + }) + +} + +# vrt_file <- file.path(DEM_VRT_DIR, "USGS_Seamless_DEM_1.vrt") +# +# # TODO: use AWS S3 sync command like below +# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request") +# # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors") +# +# system(s3_sync_cmd) + +# ------------------------------------ +# ---- Build VRTs ---- +# ------------------------------------ + +message("Building 3DEP elevation '.vrt' files...") + +for (i in seq_along(nrow(NED_META))) { + + i = 1 + NED_META$id[i] + + output_dir <- file.path(DEM_VRT_DIR, NED_META$id[i]) + + # output_dir + + + # Define output text file path + txt_file <- file.path(output_dir, paste0("ned_list_", NED_META$id[i], ".txt")) + + # Define output VRT path + vrt_file <- file.path(output_dir, paste0("ned_", NED_META$id[i], ".vrt")) + # vrt_file <- paste0(paste0("data-raw/ned_", NED_META$id[i], ".vrt")) + + # If VRT does NOT exist, build VRT + if (!file.exists(vrt_file)) { + + # txt_file + + current_dir <- file.path(output_dir, "current") + + # all the current/ directories that have the TIFs + tile_dirs <- list.files(current_dir, full.names = TRUE) + # list.files(tile_dirs[1:2], full.names = T) + # index_gpkg <- sf::read_sf(list.files(tile_dirs[999], full.names = T, pattern = ".gpkg")) + + # get list of all the .tif files in the "current/" dir + tif_paths <- list.files(tile_dirs, pattern = ".tif", full.names = T) + + # write list of files to text file + write.table(tif_paths, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE) + + # build VRT from text file input using GDAL system call ... + system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file)) + + } + +} + + +# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt") +# length(list.files(file.path(DEM_TIF_DIR, "current"), full.names = T)) +# vrt_tiles <- terra::vrt_tiles(file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt")) +# vrt_tiles[1] +# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" +# +# # Parse the VRT file +# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE) +# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE) +# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt") +# vrt_tiles <- terra::vrt_tiles(vrt_file) +# +# # tile_path <- vrt_tiles[500] +# tile_paths <- gsub("/vsicurl/", "", vrt_tiles) +# +# # TODO: use AWS S3 sync command like below +# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors") +# # aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors +# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" +# +# # TODO: Old method curl request each file individually.... super slow +# # error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR) \ No newline at end of file diff --git a/runners/cs_runner2/download_fema.R b/runners/cs_runner2/download_fema.R new file mode 100644 index 0000000..86b7e6d --- /dev/null +++ b/runners/cs_runner2/download_fema.R @@ -0,0 +1,123 @@ +# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R + +# NOTE: The lynker-hydrofabric S3 bucket is private at the moment + +# load config variables +# source("runners/cs_runner/config_vars.R") +source("runners/cs_runner2/base_variables.R") + +# ------------------------------------------------------------------------------------- +# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ---- +# ------------------------------------------------------------------------------------- + +# if (!dir.exists(FEMA_FGB_PATH)) { +# message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'")) +# dir.create(FEMA_FGB_PATH) +# } +# +# # create geojsons directory (if not exists) +# if (!dir.exists(FEMA_GEOJSON_PATH)) { +# message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'")) +# dir.create(FEMA_GEOJSON_PATH) +# } +# +# # create directory for cleaned FEMA geometries (if not exists) +# if (!dir.exists(FEMA_CLEAN_PATH)) { +# message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'")) +# dir.create(FEMA_CLEAN_PATH) +# } +# +# # create directory for cleaned FEMA geometries as geopackages (if not exists) +# if (!dir.exists(FEMA_GPKG_PATH)) { +# message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) +# dir.create(FEMA_GPKG_PATH) +# } +# +# # create simplified geojsons directory (if not exists) +# if (!dir.exists(FEMA_SIMPLIFIED_PATH)) { +# message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'")) +# dir.create(FEMA_SIMPLIFIED_PATH) +# } +# +# # create simplified geojsons directory (if not exists) +# if (!dir.exists(FEMA_DISSOLVED_PATH)) { +# message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'")) +# dir.create(FEMA_DISSOLVED_PATH) +# } +# +# # create exploded geojsons directory (if not exists) +# if (!dir.exists(FEMA_EXPLODED_PATH)) { +# message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'")) +# dir.create(FEMA_EXPLODED_PATH) +# } +# +# # create FEMA GPKG Bounding Boxes directory (if not exists) +# if (!dir.exists(FEMA_GPKG_BB_PATH)) { +# message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) +# dir.create(FEMA_GPKG_BB_PATH) +# } +# +# if (!dir.exists(FEMA_FGB_BB_PATH)) { +# message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'")) +# dir.create(FEMA_FGB_BB_PATH) +# } + +# ------------------------------------------------------------------------------------- +# ---- Get list of FEMA FGB files in S3 bucket ---- +# ------------------------------------------------------------------------------------- + +# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +fema_list_command <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', LYNKER_HF_FEMA_S3_URI, '" + + # Regular expression pattern to match object keys + PATTERN=".fgb$" + + # AWS CLI command to list objects in the S3 bucket and use grep to filter them + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# ------------------------------------------------------------------------------------- +# ---- Get the S3 buckets object keys for FEMA 100 FGB files ---- +# ------------------------------------------------------------------------------------- + +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) + +# create bucket object URIs +# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX, FEMA_BUCKET_KEYS) + +# ------------------------------------------------------------------------------------- +# ---- Download FEMA 100 year FGB files from S3 ---- +# ------------------------------------------------------------------------------------- + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (key in FEMA_BUCKET_KEYS) { + local_save_path <- paste0(FEMA_FGB_PATH, "/", key) + + if (!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', LYNKER_HF_FEMA_S3_URI, key, " ", local_save_path, " --profile ", AWS_PROFILE) + + message("S3 object:\n > '", LYNKER_HF_FEMA_S3_URI, key, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + + system(copy_cmd) + + message(" > '", key, "' download complete!") + message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") + } +} + + + + + + + + diff --git a/runners/cs_runner2/download_ml_outputs.R b/runners/cs_runner2/download_ml_outputs.R new file mode 100644 index 0000000..4d14634 --- /dev/null +++ b/runners/cs_runner2/download_ml_outputs.R @@ -0,0 +1,28 @@ +# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R + +# load config variables +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Download conus_nextgen.gpkg +# --------------------------------------------------------------------------- + +copy_cmd <- paste0('aws s3 cp ', CONUS_ML_S3_URI, " ", CONUS_ML_PARQUET_PATH) +message("Copying S3 object:\n", CONUS_ML_S3_URI) + +if (!file.exists(CONUS_ML_PARQUET_PATH)) { + tryCatch({ + system(copy_cmd) + message("Download '", basename(CONUS_ML_PARQUET_PATH), "' complete!") + message("------------------") + }, error = function(e) { + message("Error downloading conus_nextgen.gpkg") + message(e) + stop() + }) + +} else { + message("'", basename(CONUS_ML_PARQUET_PATH), "' file already exists at\n > '", CONUS_ML_PARQUET_PATH, "'") +} diff --git a/runners/cs_runner2/finalize_outputs.R b/runners/cs_runner2/finalize_outputs.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/process_fema.R b/runners/cs_runner2/process_fema.R new file mode 100644 index 0000000..50b2106 --- /dev/null +++ b/runners/cs_runner2/process_fema.R @@ -0,0 +1,737 @@ +# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3 +# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages + +# Processing steps: +# - Convert FGBs to GEOJSON (via ogr2ogr) +# - Simplifies +# - Dissolves +# - Explodes +# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr) +# - Apply hydrofab::clean_geometry() +# - Partition FEMA 100 geometries by VPU # TODO still +# - Get FEMA bounding box geometries (maybe) + +# load config variables +# source("runners/cs_runner/config_vars.R") +# source("runners/cs_runner/config.R") +source("runners/cs_runner2/base_variables.R") +source("runners/cs_runner2/utils.R") + +library(dplyr) +library(sf) +library(geos) +library(fastmap) +library(nngeo) + +# ONLY runs if the main CONUS FEMA gpkg hasnt been created yet +if (!file.exists(CONUS_FEMA_GPKG_PATH)) { + +# ------------------------------------------------------------------------------------- +# ---- OVERWRITE_FEMA_FILES constant logical ---- +# ---- > if TRUE, processing steps will be run again +# and overwrite existing previously processed files +# ------------------------------------------------------------------------------------- + +# Default is TRUE (i.e. a fresh processing run is done from start to finish) +OVERWRITE_FEMA_FILES <- TRUE +DELETE_STAGING_GPKGS <- TRUE +Sys.setenv(OGR_GEOJSON_MAX_OBJ_SIZE=0) + +# ------------------------------------------------------------------------------------- +# ---- Get paths to downloaded FEMA 100 FGBs ---- +# ------------------------------------------------------------------------------------- + +FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) +FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) + +for (file in FEMA_FILENAMES) { + # message(file) + + STAGING_FILES_TO_DELETE <- c() + + # ------------------------------------------------------------------------------------------------------------------- + # ---- Step 1: Convert FGB to GeoJSON + # ------------------------------------------------------------------------------------------------------------------- + + local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) + geojson_filename <- gsub(".fgb", ".geojson", file) + geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename) + + message("FEMA filename: '", file, "'") + message("Converting \n > '", file, "' to geojson '", geojson_filename, "'") + + geojson_exists <- file.exists(geojson_save_path) + + message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + # Step 1.1 Run FGDB to GeoJSON conversion + ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) + + if (OVERWRITE_FEMA_FILES || !geojson_exists) { + system(ogr2ogr_command) + message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path) + } + + # ------------------------------------------------------------------------------------------------------------------- + # ---- # Step 2: Clean GeoJSON + # ------------------------------------------------------------------------------------------------------------------- + + message("Simplify, dissolve, explode > '", geojson_filename, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename) + output_clean_geojson_path <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename) + + clean_geojson_exists <- file.exists(output_clean_geojson_path) + message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, + ' -dissolve2 FLD_AR_ID \\', + ' -simplify 0.1 visvalingam \\', + ' -snap \\', + ' -o ', output_clean_geojson_path) + + # Step 2.1 Run simplify, dissolve, explode on cleaned GeoJSON + if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) { + message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") + system(mapshaper_command) + message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) + } + + # ------------------------------------------------------------------------------------------------------------------- + # ---- # Step 3: Convert cleaned GeoJSON to GeoPackage + # ------------------------------------------------------------------------------------------------------------------- + + message("Fema 100 year flood plain:\n > '", output_clean_filename, "'") + output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename) + output_gpkg_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) + + message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'") + + clean_gpkg_exists <- file.exists(output_gpkg_path) + message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path) + + if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) { + system(ogr2ogr_command) + message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'") + } + + # ------------------------------------------------------------------------------------------------------------------- + # ---- Step 4: Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- + # ------------------------------------------------------------------------------------------------------------------- + + message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(output_gpkg_path), "'") + + fema <- sf::read_sf(output_gpkg_path) + fema <- resolve_internal_fema_boundaries(fema, output_gpkg_path) + + message("End time: ", Sys.time()) + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(output_gpkg_path), "' to: \n > '", output_gpkg_path, "'") + sf::write_sf( + fema, + # fema_clean, + output_gpkg_path + ) + } + + + + # ------------------------------------------------------------------------------------------------------------------- + # ---- Step 5: Delete intermediary files + # ------------------------------------------------------------------------------------------------------------------- + + message("Deleting intermediary files\n") + for (delete_file in STAGING_FILES_TO_DELETE) { + if (file.exists(delete_file)) { + message("Deleting >>> '", delete_file, "'") + file.remove(delete_file) + } + + } + + rm(fema) + + message() + +} + +# ------------------------------------------------------------------------------------------------------------------- +# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- +# ------------------------------------------------------------------------------------------------------------------- + +# # paths to FEMA 100 year flood plain files +# FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +# for (file_path in FEMA_gpkg_paths) { +# message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") + +# fema <- sf::read_sf(file_path) + +# fema <- resolve_internal_fema_boundaries(fema) + +# # fema <- +# # fema[!sf::st_is_empty(fema), ] %>% +# # sf::st_transform(5070) + +# # fema <- +# # fema %>% +# # # fema[!sf::st_is_empty(fema), ] %>% +# # dplyr::select(geometry = geom) %>% +# # add_predicate_group_id(sf::st_intersects) %>% +# # sf::st_make_valid() %>% +# # dplyr::group_by(group_id) %>% +# # dplyr::summarise( +# # geometry = sf::st_combine(sf::st_union(geometry)) +# # ) %>% +# # dplyr::ungroup() %>% +# # dplyr::select(-group_id) %>% +# # add_predicate_group_id(sf::st_intersects) %>% +# # rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% +# # rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% +# # dplyr::mutate( +# # fema_id = as.character(1:dplyr::n()) +# # ) %>% +# # dplyr::select(fema_id, geometry) + +# # # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + +# # # mapview::mapview(end_fema, color = 'red', col.regions = "white") + +# # fema <- +# # fema %>% +# # dplyr::mutate( +# # source = basename(file_path), +# # state = gsub("-100yr-flood_valid_clean.gpkg", "", source) +# # ) %>% +# # dplyr::select(fema_id, source, state, +# # # areasqkm, +# # geometry) + +# message("End time: ", Sys.time()) + +# if (OVERWRITE_FEMA_FILES) { +# message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") +# sf::write_sf( +# # fema_clean, +# fema, +# file_path +# ) +# } +# message() + +# } + +# ------------------------------------------------------------------------------------- +# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- +# ------------------------------------------------------------------------------------- + +# Clean FEMA GPKG files +FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +# paths to nextgen datasets and model attribute parquet files +# CONUS_NEXTGEN_GPKG_PATH + +# layer_info = sf::st_layers(CONUS_NEXTGEN_GPKG_PATH) +# layer_info[layer_info$name == "flowpaths", "fields"] +# layer_inf + +# flines = sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths") +# flines %>% names() +# (flines$vpuid %>% unique()) %in% VPU_IDS +# VPU_IDS +# VPU_IDS %in% (flines$vpuid %>% unique()) + +# query the unique VPU_IDS from the flowlines layer from sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths") +CONUS_VPU_IDS <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = "SELECT DISTINCT vpuid FROM flowpaths") %>% + dplyr::pull() + + +# NEXTGEN_FILENAMES <- list.files(NEXTGEN_DIR, full.names = FALSE) +# NEXTGEN_FILE_PATHS <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES) + + +for (file_path in FEMA_CLEAN_GPKG_PATHS) { + # i = 35 + # file_path = FEMA_CLEAN_GPKG_PATHS[i] + + fema_file <- basename(file_path) + + message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'") + + # read in fema polygons + fema <- sf::read_sf(file_path) + + for (vpu in CONUS_VPU_IDS) { + # j = 8 + # vpu = CONUS_VPU_IDS[j] + + # nextgen_basename <- basename(nextgen_path) + # vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) + + message("VPU: ", vpu) + # message("- nextgen gpkg:\n > '", nextgen_path, "'") + message(" > Checking if '", fema_file, "' intersects with CONUS flowpaths in VPU '", vpu, "'") + + # read in nextgen flowlines + flines <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) + + # flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # get the FEMA polygons that intersect with the nextgen flowlines + fema_intersect <- polygons_with_line_intersects(fema, flines) + + fema_in_nextgen <- nrow(fema_intersect) != 0 + + message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) + + if(fema_in_nextgen) { + + # create filepaths + vpu_subfolder <- paste0("vpu-", vpu) + # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") + vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) + + # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] + + fema_intersect <- + fema_intersect %>% + dplyr::mutate( + vpu = vpu + ) %>% + dplyr::select(vpu, fema_id, source, state, geom) + + # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) + + # BASE_DIRS_LIST$fema_by_vpu_subsets_dirs + + fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) + fema_vpu_path <- paste0(vpu_subfolder_path, "/subsets/", fema_vpu_filename) + + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") + + sf::write_sf( + fema_intersect, + fema_vpu_path + ) + } + + + } + message() + } + + message( + "--------------------------------------------------------------\n", + "Completed all VPU intersections for: \n > '", fema_file, "'", + "\n--------------------------------------------------------------\n" + ) + +} + +# for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) { +# vpu_subdir <- BASE_DIRS_LIST$fema_by_vpu_subdirs[i] +# +# message(i, " - ", vpu_subdir) +# +# files_to_move = list.files(vpu_subdir, pattern = ".gpkg", full.names = T) +# +# for (file in files_to_move) { +# new_path <- paste0(vpu_subdir, "/subsets/", basename(file)) +# message(" > ", file, "\n > ", new_path, "\n") +# fs::file_move( +# file, +# new_path +# ) +# } +# } + +# ------------------------------------------------------------------------------------- +# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- +# ------------------------------------------------------------------------------------- +# BASE_DIRS_LIST$fema_by_vpu_subdirs +# BASE_DIRS_LIST$fema_by_vpu_subsets_dirs +# for (vpu_dir in FEMA_VPU_SUBFOLDERS) { +BASE_DIRS_LIST$fema_by_vpu_subdirs + +for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) { + # for (i in 1:4) { + # i = 8 + + vpu_dir <- BASE_DIRS_LIST$fema_by_vpu_subdirs[i] + # vpu_dir <- FEMA_VPU_SUBFOLDERS[i] + + vpu_subset_dir <- BASE_DIRS_LIST$fema_by_vpu_subsets_dirs[i] + + message("Merging files in '", basename(vpu_dir), "' directory...") + vpu_subdirs <- list.files(vpu_subset_dir, full.names = TRUE) + # vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) + + # message(paste0("\n > ", basename(vpu_subdirs), collapse = "")) + + # path to the merged directory where the final merged geopackge will end up + master_name <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + master_filepath <- paste0(vpu_dir, "/merged/", master_gpkg_name) + + # if the file already exists, remove it so we dont OVER append data to the "master file" + if (file.exists(master_filepath)) { + file.remove(master_filepath) + } + + # fema state geopackages partioned for the specific VPU + fema_state_gpkgs <- list.files(vpu_subset_dir, full.names = TRUE) + # master_output_filepath <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name)) + # + # # make sure to ignore the master file if it already exists + # fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath] + + for(gpkg_file in fema_state_gpkgs) { + # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", + # basename(gpkg_file), " > ", basename(master_filepath), + # "'") + message(" > '", + basename(gpkg_file), " > ", basename(master_filepath), + "'") + + ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", + master_filepath, + " ", gpkg_file + ) + + # if (OVERWRITE_FEMA_FILES) { + system(ogr2ogr_merge_command) + # } + + } + + # has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0 + # + # if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) { + # message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...") + # remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) + # + # for (remove_cmd in remove_gpkg_cmds) { + # message(" > '", remove_cmd, "'") + # system(remove_cmd) + # } + # } + + # message() + message("Merge complete!") + message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'") + message() + +} +# ------------------------------------------------------------------------------------- +# ----Apply simplify, dissolve, explode on the MERGED polygons ---- +# ------------------------------------------------------------------------------------- + +# # NOTE: remove past runs for testing... +# for (i in list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]) { +# file.remove(i) +# } +# seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs) +# rm(STAGING_FILES_TO_DELETE, i, VPU, vpu_dir) + +for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { + + # i = 8 + # FEMA_VPU_SUBFOLDERS + STAGING_FILES_TO_DELETE <- c() + + vpu_dir <- FEMA_VPU_SUBFOLDERS[i] + VPU <- basename(vpu_dir) + + message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") + + # path to the merged directory where the final merged geopackage will end up + master_name <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + master_filepath <- paste0(vpu_dir, "/merged/", master_gpkg_name) + + master_geojson_name <- paste0(master_name, ".geojson") + master_geojson_filepath <- paste0(vpu_dir, "/merged/", master_geojson_name) + + # updated_gpkg_name <- gsub(".gpkg", "-output.gpkg", master_gpkg_name) + updated_gpkg_name <- master_gpkg_name + updated_filepath <- paste0(vpu_dir, "/output/", master_gpkg_name) + + updated_gpkg_exists <- file.exists(updated_filepath) + + # remove output file if it already exists + if (updated_gpkg_exists) { + file.remove(updated_filepath) + } + + message("VPU Merged FEMA filename: '", master_gpkg_name, "'") + message("> Simplifying, dissolve, exploding VPU aggregated FEMA polygons... '", basename(master_filepath), "'") + + if(!file.exists(master_filepath)) { + message("No FEMA geometries in '", VPU, "'") + message() + next + } + + message("Converting \n > '", basename(master_filepath), "' to geojson '", master_geojson_name, "'") + + geojson_exists <- file.exists(master_geojson_filepath) + + # message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) + # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + gpkg_to_geojson_cmd <- paste0("ogr2ogr -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath) + # gpkg_to_geojson_cmd <- paste0("ogr2ogr -f GEOJSON -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath) + # gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath) + # file.remove(master_geojson_filepath) + + # if (OVERWRITE_FEMA_FILES || !geojson_exists) { + system(gpkg_to_geojson_cmd) + message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath) + # } + + # master_gj <- sf::read_sf(master_geojson_filepath) + # master_gpkg <- sf::read_sf(master_filepath) + + # Clean GeoJSON + message("Simplify, dissolve, explode > '", master_geojson_name, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", master_geojson_name) + output_clean_geojson_path <- paste0(vpu_dir, "/merged/", output_clean_filename) + + clean_geojson_exists <- file.exists(output_clean_geojson_path) + message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) + # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + if (clean_geojson_exists) { + file.remove(output_clean_geojson_path) + } + + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', + master_geojson_filepath, + # ' -clean \\', + # ' -explode \\', + # ' -dissolve2 \\', + ' -simplify 0.3 visvalingam \\', + ' -snap \\', + ' -explode \\', + ' -clean \\', + # ' -proj EPSG:5070 \\', + ' -o ', output_clean_geojson_path + ) + + system(mapshaper_command) + # message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) + # output_clean_gpkg_filename <- gsub(".geojson", ".gpkg", master_geojson_name) + # output_clean_gpkg_path <- paste0(vpu_dir, "/merged/", output_clean_gpkg_filename) + + # fema_vpu <- sf::read_sf(master_filepath) + # geojson_to_gpkg_cmd <- paste0("ogr2ogr -f GPKG ", updated_filepath, " ", output_clean_geojson_path) + geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070 ", + updated_filepath, " ", + output_clean_geojson_path) + # geojson_to_gpkg_cmd <- paste0("ogr2ogr ", updated_filepath, " ", output_clean_geojson_path) + + # updated_gpkg_exists <- file.exists(updated_filepath) + # if (updated_gpkg_exists) { + # file.remove(updated_filepath) + # } + + + # if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) { + system(geojson_to_gpkg_cmd) + message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'") + # } + + # sf::st_layers(updated_filepath) + + # mapview::npts(fema) + fema <- + sf::read_sf(updated_filepath) %>% + # sf::read_sf(output_clean_geojson_path) %>% + # rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% + dplyr::mutate( + vpu = gsub("vpu-", "", VPU), + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select( + vpu, + fema_id, + geom + ) + + # fema %>% + # rmapshaper::ms_simplify(keep = 0.5, keep_shapes = T) %>% + # dplyr::group_by(fema_id) %>% + # dplyr::mutate(pts = mapview::npts(geom)) %>% + # dplyr::arrange(-pts) + + # remove before writting updated version + file.remove(updated_filepath) + + sf::write_sf( + fema, + updated_filepath, + append = FALSE + ) + + message("Deleting intermediary files\n") + for (delete_file in STAGING_FILES_TO_DELETE) { + if (file.exists(delete_file)) { + message("Deleting >>> '", delete_file, "'") + file.remove(delete_file) + } + + } +} + + +# ------------------------------------------------------------------------------------- +# ---- Store all FEMA layers in a single conus_fema.gpkg +# ------------------------------------------------------------------------------------- + +FEMA_VPU_SUBFOLDERS + +all_fema_vpu_layers <- list.files(BASE_DIRS_LIST$fema_by_vpu_output_dirs, full.names = TRUE) + +# fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))] + +combine_gpkg_files(all_fema_vpu_layers, CONUS_FEMA_GPKG_PATH) + + +# ------------------------------------------------------------------------------------- + +} else { + message("'", basename(CONUS_FEMA_GPKG_PATH), "'already exists at \n> ", CONUS_FEMA_GPKG_PATH) +} + +# ------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------- + +# # ------------------------------------------------------------------------------------- +# # ---- Union each VPU geopackage (either on state or just touching predicate) ---- +# # ------------------------------------------------------------------------------------- +# +# for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { +# +# vpu_dir <- FEMA_VPU_SUBFOLDERS[i] +# VPU <- basename(vpu_dir) +# +# message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") +# +# # path to the merged directory where the final merged geopackage will end up +# master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) +# master_gpkg_name <- paste0(master_name, ".gpkg") +# master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) +# +# updated_gpkg_name <- gsub(".gpkg", "_output.gpkg", master_gpkg_name) +# updated_filepath <- paste0(vpu_dir, "/", updated_gpkg_name) +# +# message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") +# +# if(!file.exists(master_filepath)) { +# message("No FEMA geometries in '", VPU, "'") +# message() +# next +# } +# +# +# fema_vpu <- sf::read_sf(master_filepath) +# +# # fema_vpu %>% sf::st_geometry_type() %>% unique() +# # fema_vpu %>% mapview::npts() +# # fema_vpu %>% sf::st_is_valid() %>% all() +# # fema_vpu %>% +# # sf::st_make_valid() %>% +# # sf::st_geometry_type() %>% +# # unique() +# +# geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) +# +# message("Geometry counts before casting all geometries to MULTIPOLYGON:") +# for (g in seq_along(geom_type_counts)) { +# message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) +# } +# +# # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + +# # mapview::mapview(fema_union, color = 'green', col.regions = 'white') +# +# # fema_vpu %>% +# # sf::st_make_valid() %>% +# # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% +# # sf::st_is_valid() %>% +# # all() +# +# tryCatch({ +# +# fema_vpu <- +# fema_vpu %>% +# nngeo::st_remove_holes(max_area = 200) %>% +# # sf::st_make_valid() %>% +# # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% +# add_predicate_group_id(sf::st_intersects) %>% +# dplyr::group_by(group_id) %>% +# rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% +# rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% +# dplyr::ungroup() %>% +# nngeo::st_remove_holes(max_area = 200) %>% +# dplyr::mutate( +# vpu = gsub("VPU_", "", VPU), +# fema_id = as.character(1:dplyr::n()) +# ) %>% +# dplyr::select( +# vpu, fema_id, +# # state, +# geom = geometry +# ) +# +# }, error = function(e) { +# message(VPU, " threw into the following error \n ", e) +# message(" > Cleaning ", VPU, " using a backup cleaning strategy...") +# +# fema_vpu <- +# fema_vpu %>% +# sf::st_make_valid() %>% +# dplyr::mutate( +# vpu = gsub("VPU_", "", VPU), +# fema_id = as.character(1:dplyr::n()) +# ) %>% +# dplyr::select( +# vpu, fema_id, +# # state, +# geom +# ) +# +# }) +# +# if (OVERWRITE_FEMA_FILES) { +# +# message("> Overwritting '", basename(master_filepath), "' with final clean version...") +# +# # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) +# # message("> writting '", basename(union_file_path), "' (unioned and exploded version)") +# +# sf::write_sf( +# fema_vpu, +# updated_filepath +# # master_filepath +# # union_file_path +# ) +# } +# message() +# } diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R new file mode 100644 index 0000000..5413d5c --- /dev/null +++ b/runners/cs_runner2/utils.R @@ -0,0 +1,2680 @@ +# Create an empty file structure +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ +# ├── dem/ +# ├── vrt/ +# ├── tif/ +# ├── cs-extension-polygons/ +create_local_hydrofabric_base_dirs <- function(base_dir) { + # base_dir <- BASE_DIR + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + + # DEM dirs + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(dem_dir, "vrt") + # dem_tif_dir <- file.path(dem_dir, "tif") + + # polygons for transect extensions + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + # create base directories + create_if_not_exists(base_dir) + create_if_not_exists(hydrofabric_dir) + + # DEM dirs + create_if_not_exists(dem_dir) + create_if_not_exists(dem_vrt_dir) + # create_if_not_exists(dem_tif_dir) + + # extension polygons + create_if_not_exists(cs_extension_polygons_dir) + + + # Create FEMA folders + create_if_not_exists(fema_dir) + create_if_not_exists(fema_fgb_dir) + create_if_not_exists(fema_geojson_dir) + create_if_not_exists(fema_clean_dir) + create_if_not_exists(fema_gpkg_dir) + create_if_not_exists(fema_by_vpu_dir) + + for (path in fema_by_vpu_subdirs) { + create_if_not_exists(path) + create_if_not_exists(file.path(path, "subsets")) + create_if_not_exists(file.path(path, "merged")) + create_if_not_exists(file.path(path, "output")) + } + +} + +get_vpu_ids <- function() { + VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', + '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') + # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + + return(VPU_IDS) + +} + +# # Base directory for local file storage +# BASE_DIR <- '/Volumes/T7SSD/lynker-spatial' +# base_dir <- BASE_DIR +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb") +# FEMA_GEOJSON_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson") +# FEMA_CLEAN_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean") +# FEMA_GPKG_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg") +# FEMA_BY_VPU_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU") +# +# +# VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', +# '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') +# # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID +# +# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ") +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# Create an empty file structure for a new version within a specified base_dir +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ +# ├── version_number/ +# ├── network/ +# ├── transects/ +# ├── cross-sections/ +# ├── dem/ +# ├── dem-ml/ +# ├── dem-coastal-bathy/ +# ├── dem-points/ +create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { + # version = "v3.0" + # base_dir <- BASE_DIR + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + version_base_dir <- paste0(hydrofabric_dir, "/", version) + + # polygons for transect extensions + ml_dir <- paste0(version_base_dir, "/ml") + + # reference features + ref_features_dir <- paste0(version_base_dir, "/reference-features") + + # conus network gpkg + network_dir <- paste0(version_base_dir, "/network") + + # transects + transects_dir <- paste0(version_base_dir, "/transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") + + # cross sections dirs + cross_sections_dir <- paste0(version_base_dir, "/cross-sections") + cross_sections_dem_dir <- paste0(cross_sections_dir, "/dem") + cross_sections_ml_dir <- paste0(cross_sections_dir, "/dem-ml") + cross_sections_coastal_bathy_dir <- paste0(cross_sections_dir, "/dem-coastal-bathy") + cross_sections_dem_pts_dir <- paste0(cross_sections_dir, "/dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") + + + if(with_output) { + output_dir <- paste0(version_base_dir, "/outputs") + } + + # create version BASE dir + create_if_not_exists(version_base_dir) + + # CONUS dir + create_if_not_exists(network_dir) + + # ML data + create_if_not_exists(ml_dir) + + # reference features data + create_if_not_exists(ref_features_dir) + + # transects + create_if_not_exists(transects_dir) + create_if_not_exists(transects_base_dir) + create_if_not_exists(transects_fema_extended_dir) + create_if_not_exists(transects_cs_extended_dir) + create_if_not_exists(transects_output_dir) + + # CS pts + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cross_sections_dem_dir) + create_if_not_exists(cross_sections_ml_dir) + create_if_not_exists(cross_sections_coastal_bathy_dir) + create_if_not_exists(cross_sections_dem_pts_dir) + create_if_not_exists(cross_sections_output_dir) + + + if(with_output) { + create_if_not_exists(output_dir) + } + +} + +create_if_not_exists <- function(dir_path) { + if (!dir.exists(dir_path)) { + dir.create(dir_path, recursive = TRUE) + message("Created directory: '", dir_path, "'\n") + } +} + +# get a list of top level directories for main directory +get_base_dir_paths <- function(base_dir) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(base_dir, "dem", "vrt") + # dem_tif_dir <- file.path(base_dir, "dem", "tif") + + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + fema_by_vpu_subsets_dirs <- file.path(fema_by_vpu_subdirs, "subsets") + fema_by_vpu_merged_dirs <- file.path(fema_by_vpu_subdirs, "merged") + fema_by_vpu_output_dirs <- file.path(fema_by_vpu_subdirs, "output") + + return( + list( + hydrofabric_dir = hydrofabric_dir, + dem_dir = dem_dir, + dem_vrt_dir = dem_vrt_dir, + # dem_tif_dir = dem_tif_dir, + cs_extension_polygons_dir = cs_extension_polygons_dir, + fema_dir = fema_dir, + fema_fgb_dir = fema_fgb_dir, + fema_geojson_dir = fema_geojson_dir, + fema_clean_dir = fema_clean_dir, + fema_gpkg_dir = fema_gpkg_dir, + fema_by_vpu_dir = fema_by_vpu_dir, + fema_by_vpu_subdirs = fema_by_vpu_subdirs, + fema_by_vpu_subsets_dirs = fema_by_vpu_subsets_dirs, + fema_by_vpu_merged_dirs = fema_by_vpu_merged_dirs, + fema_by_vpu_output_dirs = fema_by_vpu_output_dirs + ) + ) +} + +# get list of a specific directories in a version directory +get_version_base_dir_paths <- function(base_dir, version) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + version_base_dir <- file.path(hydrofabric_dir, version) + + # polygons for transect extensions + ml_dir <- file.path(version_base_dir, "ml") + + # reference features + ref_features_dir <- file.path(version_base_dir, "reference-features") + + # conus network gpkg + network_dir <- file.path(version_base_dir, "network") + + # transects + transects_dir <- file.path(version_base_dir, "transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") + + # cross sections dirs + cross_sections_dir <- file.path(version_base_dir, "cross-sections") + cross_sections_dem_dir <- file.path(cross_sections_dir, "dem") + cross_sections_ml_dir <- file.path(cross_sections_dir, "dem-ml") + cross_sections_coastal_bathy_dir <- file.path(cross_sections_dir, "dem-coastal-bathy") + cross_sections_dem_pts_dir <- file.path(cross_sections_dir, "dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") + + return( + list( + hydrofabric_dir = hydrofabric_dir, + version_base_dir = version_base_dir, + + ref_features_dir = ref_features_dir, + + network_dir = network_dir, + + ml_dir = ml_dir, + + transects_dir = transects_dir, + transects_base_dir = transects_base_dir, + transects_fema_extended_dir = transects_fema_extended_dir, + transects_cs_extended_dir = transects_cs_extended_dir, + transects_output_dir = transects_output_dir, + + cross_sections_dir = cross_sections_dir, + cross_sections_dem_dir = cross_sections_dem_dir, + cross_sections_dem_pts_dir = cross_sections_dem_pts_dir, + cross_sections_ml_dir = cross_sections_ml_dir, + cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir, + cross_sections_output_dir = cross_sections_output_dir + + ) + ) +} + + +combine_gpkg_files <- function(gpkg_paths, output_gpkg) { + + layer_counter <- list() + + for (gpkg_path in gpkg_paths) { + + base_name <- tools::file_path_sans_ext(basename(gpkg_path)) + # base_name <- gsub("_output.gpkg", "", basename(gpkg_path)) + + if (base_name %in% names(layer_counter)) { + layer_counter[[base_name]] <- layer_counter[[base_name]] + 1 + layer_name <- paste0(base_name, "_", layer_counter[[base_name]]) + } else { + + layer_counter[[base_name]] <- 1 + layer_name <- base_name + } + + tryCatch({ + sf_layer <- st_read(gpkg_path, quiet = TRUE) + + sf::st_write(sf_layer, + dsn = output_gpkg, + layer = layer_name, + append = TRUE, + quiet = TRUE) + + message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'") + + }, error = function(e) { + warning("Error processing: ", basename(gpkg_path)) + warning(e) + }) + } +} + +get_transect_filenames <- function(vpu, sep = "-", ext = ".gpkg") { + # ext <- ".gpkg" + # sep = "_" + transects_base_path <- paste0(paste0(c(vpu, "transects", "base"), collapse = sep) , ext) + transects_fema_extended_path <- paste0(paste0(c(vpu, "transects", "extended", "by", "fema"), collapse = sep) , ext) + transects_cs_extended_path <- paste0(paste0(c(vpu, "transects", "extended", "by", "cs", "attributes"), collapse = sep) , ext) + transects_output_path <- paste0(paste0(c(vpu, "transects"), collapse = sep) , ext) + + return( + list( + transects_base_path = transects_base_path, + transects_fema_extended_path = transects_fema_extended_path, + transects_cs_extended_path = transects_cs_extended_path, + transects_output_path = transects_output_path + ) + ) +} + +get_cross_section_filenames <- function(vpu, sep = "-", ext = ".parquet") { + # ext <- ".parquet" + # sep = "_" + cs_pts_base_path <- paste0(paste0(c(vpu, "cross", "sections", "base"), collapse = sep) , ext) + cs_pts_fema_extended_path <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "fema"), collapse = sep) , ext) + cs_pts_cs_extended_path <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "cs", "attributes"), collapse = sep) , ext) + cs_pts_output_path <- paste0(paste0(c(vpu, "cross", "sections"), collapse = sep) , ext) + + return( + list( + cs_pts_base_path = cs_pts_base_path, + cs_pts_fema_extended_path = cs_pts_fema_extended_path, + cs_pts_cs_extended_path = cs_pts_cs_extended_path, + cs_pts_output_path = cs_pts_output_path + ) + ) +} + +# derive the FEMA VPU layer name from the conus_fema.gpkg +get_fema_conus_layer_name <- function(vpu) { + + return(paste0("fema-vpu-", vpu)) + +} + +list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { + + profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else "" + + if (is.null(pattern) || pattern == "") { + grep_command <- "" # no filtering if empty or NULL + } else { + grep_command <- paste0(" | grep -E \"", pattern, "\"") # grep if a pattern is given + } + + cmd <- paste0( + '#!/bin/bash\n', + 'S3_BUCKET="', s3_bucket, '"\n', + 'PATTERN="', pattern, '"\n', + 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n', + 'echo "$S3_OBJECTS"' + ) + # cmd <- paste0( + # '#!/bin/bash\n', + # 'S3_BUCKET="', s3_bucket, '"\n', + # 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n', + # 'echo "$S3_OBJECTS"' + # ) + ls_output <- system(cmd, intern = TRUE) + return(ls_output) +} + +download_tiles <- function(tile_paths, output_dir) { + # output_dir <- DEM_TIF_DIR + # tile_paths + # error_tiles <- + tif_save_paths <- paste0(output_dir, "/", basename(tile_paths)) + + error_tiles <- data.frame( + tile = basename(tile_paths), + status = TRUE + ) + + for (i in seq_along(tile_paths)) { + # i = 1 + tile_path <- tile_paths[i] + tif_save_path <- tif_save_paths[i] + + message("[", i, "]", + "\n > Tile: ", basename(tile_path), + "\n > Output path: ", tif_save_path + ) + + download_tif_cmd <- paste0("curl -o ", tif_save_path, " ", tile_path) + + tryCatch({ + + tif_download_output <- system(download_tif_cmd, intern = TRUE) + message(" > Succesfully downloaded tile: ", basename(tile_path)) + + }, error = function(e) { + + message("Error downloading tile: ", basename(tile_path)) + message("ERROW below: \n ", e) + + error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE + + }) + + } + + return(error_tiles) + +} + +# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to +# make sure they are aligned and in the same order +# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# base is a character vector of the base directory of the files. Defaults to NULL +# Returns a dataframe with VPU, x, and y columns +align_files_by_vpu <- function( + x, + y, + base = NULL +) { + + # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period + regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*" + + # path dataframe for X filepaths + x_paths <- data.frame(x = x) + + # path dataframe for Y filepaths + y_paths <- data.frame(y = y) + + # generate VPU IDs based on file path regular expression matching with "regex_pattern" above + x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x) + y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y) + + # match paths based on VPU column + matched_paths <- dplyr::left_join( + x_paths, + y_paths, + by = "vpu" + ) + + # reorder columns + matched_paths <- dplyr::relocate(matched_paths, vpu, x, y) + + if(!is.null(base)) { + matched_paths$base_dir <- base + } + + return(matched_paths) + +} + +# ------------------------------------------------------------------------------------- +# FEMA processing functions: +# ------------------------------------------------------------------------------------- + +resolve_internal_fema_boundaries <- function(fema, source_file = "") { +# message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") + +# fema <- sf::read_sf(file_path) + + fema <- + fema[!sf::st_is_empty(fema), ] %>% + sf::st_transform(5070) + + fema <- + fema %>% + dplyr::select(geometry = geom) %>% + add_predicate_group_id(sf::st_intersects) %>% + sf::st_make_valid() %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-group_id) %>% + add_predicate_group_id(sf::st_intersects) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + dplyr::mutate( + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select(fema_id, geometry) + + fema <- + fema %>% + dplyr::mutate( + source = basename(source_file), + state = gsub("-100yr-flood_valid_clean.gpkg", "", source) + ) %>% + dplyr::select(fema_id, source, state, geometry) + + return(fema) +} + +add_predicate_group_id <- function(polys, predicate) { + # GROUP BY SPATIAL PREDICATES + # ----------------------------------------- + # predicate = sf::st_touches + # polys <- sf_df + # ----------------------------------------- + + + relations <- predicate(polys) + + relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) }) + + group_ids_map <- fastmap::fastmap() + ids_to_groups <- fastmap::fastmap() + + group_id <- 0 + + for (i in seq_along(relations)) { + + predicate_ids <- relations[i][[1]] + + # message("(", i, ") - ", predicate_ids) + # message("Start Group ID: ", group_id) + + id_group_check <- ids_to_groups$has(predicate_ids) + + if(any(id_group_check)) { + + known_groups <- ids_to_groups$mget(predicate_ids) + known_group <- known_groups[unname(sapply(known_groups , function(kg) { + !is.null(kg) + }))][[1]] + + # message("IDs part of past group ID > '", known_group, "'") + + past_group_ids <- group_ids_map$get(known_group)[[1]] + updated_group_ids <- as.character( + sort(as.numeric(unique(c(past_group_ids, predicate_ids)))) + ) + + group_ids_map$set(known_group, list(updated_group_ids)) + + new_ids <- predicate_ids[!predicate_ids %in% past_group_ids] + + # message("Adding ", new_ids, " to seen set...") + + # add any newly added IDs to the seen map + for (seen_id in new_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + + } else { + # get a new group ID number + group_id <- group_id + 1 + # message("IDs form NEW group > '", group_id, "'") + + # create a new key in the map with the predicate IDs list as the value + group_ids_map$set(as.character(group_id), list(predicate_ids)) + + # message("Adding ", predicate_ids, " to seen set...") + + # add each predicate ID to the map storing the seen indexes and their respecitve group IDs + for (seen_id in predicate_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + } + # message("End group ID: ", group_id, "\n") + } + + group_ids <- group_ids_map$as_list() + + grouping_df <- lapply(seq_along(group_ids), function(i) { + # i = 2 + grouping <- group_ids[i] + group_id <- names(grouping) + indices <- grouping[[1]][[1]] + + data.frame( + index = as.numeric(indices), + group_id = rep(group_id, length(indices)) + ) + + }) %>% + dplyr::bind_rows() %>% + dplyr::arrange(i) + + # count up the number of IDs for each group, well use this to determine which group + # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members) + group_id_counts <- + grouping_df %>% + dplyr::group_by(group_id) %>% + dplyr::count() %>% + # dplyr::arrange(-n) %>% + dplyr::ungroup() + + # select the IDs with the most other members + grouping_df <- + grouping_df %>% + dplyr::left_join( + group_id_counts, + by = 'group_id' + ) %>% + dplyr::group_by(index) %>% + dplyr::slice_max(n, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(-n) %>% + dplyr::arrange(-index) + + polys$group_id <- grouping_df$group_id + + return(polys) + +} + +# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# flowlines: flowlines linestring sf object +# trans: transects linestring sf object +# waterbodies: waterbodies polygon sf object +# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies +### Returns a list of length 2 with updated "flowlines" and "transects" sf objects +wb_intersects <- function(flowlines, trans, waterbodies) { + + ######## ######## ######## ######## ######## ######## + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies (v2) ...") + wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos) # (NEW METHOD) + # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)]) # (OLD METHOD) + + # sum(lengths(wb_trans_index) == 0) + # length(wb_trans_index) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[lengths(wb_trans_index) == 0, ] # (NEW METHOD) + # trans_keep <- trans_check[!wb_trans_index, ] # (OLD METHOD) + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects + ) + ) + + # # within the transects lines that are on a flowline that crosses a waterbody, + # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + # trans_keep <- trans_check[!trans_wb_index, ] + # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + # + # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + # + # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # # have transects that are NOT in the waterbody + # to_keep <- dplyr::bind_rows(to_keep, to_check) + # + # # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # # that were determined to be valid (are being kept) + # check_ids <- unique(trans_check$tmp_id) + # keep_ids <- unique(trans_keep$tmp_id) + # + # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # valid_flowlines <- flowlines$id %in% to_keep$id + # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + # + # # return alist of updated flowlines and transects + # return( + # list( + # "valid_flowlines" = valid_flowlines, + # "valid_transects" = valid_transects + # ) + # ) +} + +add_intersects_ids <- function(x, y, id_col) { + # make sure the crs are tjhe same + y <- sf::st_transform(y, sf::st_crs(x)) + + # Perform the intersection + intersections <- sf::st_intersects(x, y) + + # add the intersected values to the first dataframe + x[[id_col]] <- unlist(lapply(intersections, function(idx) { + if (length(idx) > 0) { + paste0(unlist(y[[id_col]][idx]), collapse = ", ") + } else { + NA + } + })) + + return(x) +} + +unnest_ids <- function(ids) { + return( + unique(unlist(strsplit(unique(ids), ", "))) + ) +} + +#' Get the polygons that interesect with any of the linestring geometries +#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes +#' @param polygons polygon sf object. Default is NULL +#' @param lines linestring sf object. Default is NULL. +#' +#' @return sf dataframe of polygons that intersect with the linestrings +polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) { + + if (is.null(polygons)) { + stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries") + } + + if (is.null(lines)) { + stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries") + } + + # Convert the SF geometries to geos geometries + polygons_geos <- geos::as_geos_geometry(polygons) + lines_geos <- geos::as_geos_geometry(lines) + + # create an index between the polygons and linestrings + lines_index <- geos::geos_intersects_matrix(polygons_geos, lines_geos) + + # get the polygons that have atleast 1 intersection with the 'lines' + polygons_with_lines <- polygons[lengths(lines_index) != 0, ] + + return(polygons_with_lines) +} + +# TODO: DElete these NEW DOMAIN functions... +# Create an empty file structure +# base_dir: character, top level directory path +# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist) + +# Directory tree: +# base_dir/ +# └── domain_dirname/ +# ├── flowlines/ +# ├── dem/ +# ├── transects/ +# ├── cross_sections/ +# └── cs_pts/ +create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + + if(with_output) { + output_dir <- paste0(domain_dir, "/outputs") + } + + # create directories + create_if_not_exists(domain_dir) + create_if_not_exists(flowlines_dir) + create_if_not_exists(domain_subset_dir) + create_if_not_exists(dem_dir) + create_if_not_exists(transects_dir) + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cs_pts_dir) + create_if_not_exists(vpu_subsets_dir) + + if(with_output) { + create_if_not_exists(output_dir) + } + +} + +# get path strings for a domain dir (based of a base dir and domain dirname) +# NOTE: this does NOT guarentee that these folders exist, +# NOTE: it just gets the paths if they were created by create_new_domain_dirs() +get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + output_dir <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA) + + + # named list of file paths + return( + list( + base_dir = base_dir, + domain_dir = domain_dir, + flowlines_dir = flowlines_dir, + domain_subset_dir = domain_subset_dir, + dem_dir = dem_dir, + transects_dir = transects_dir, + cross_sections_dir = cross_sections_dir, + cs_pts_dir = cs_pts_dir, + vpu_subsets_dir = vpu_subsets_dir, + output_dir = output_dir + ) + ) + +} + +download_3dep_vrt <- function(base_dir) { + + ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/ + ### paste w/ `datapasta::vector_paste_vertical()` + ### Some manual cleaning of header and footer mess... + ### Reason? Un-scrapable page, and no index.gpkg... + + t <- c( + "0 n06e162/", + "0 n06e163/", + "0 n07e134/", + "0 n07e151/", + "0 n07e152/", + "0 n07e158/", + "0 n08e134/", + "0 n08e151/", + "0 n08e152/", + "0 n08e158/", + "0 n09e134/", + "0 n10e138/", + "0 n14e144/", + "0 n15e145/", + "0 n16e145/", + "0 n18w065/", + "0 n18w066/", + "0 n18w067/", + "0 n18w068/", + "0 n19w065/", + "0 n19w066/", + "0 n19w067/", + "0 n19w068/", + "0 n19w156/", + "0 n20w155/", + "0 n20w156/", + "0 n20w157/", + "0 n21w156/", + "0 n21w157/", + "0 n21w158/", + "0 n22w157/", + "0 n22w158/", + "0 n22w159/", + "0 n22w160/", + "0 n22w161/", + "0 n23w160/", + "0 n23w161/", + "0 n25w081/", + "0 n25w082/", + "0 n25w083/", + "0 n26w081/", + "0 n26w082/", + "0 n26w098/", + "0 n26w099/", + "0 n27w081/", + "0 n27w082/", + "0 n27w083/", + "0 n27w098/", + "0 n27w099/", + "0 n27w100/", + "0 n28w081/", + "0 n28w082/", + "0 n28w083/", + "0 n28w097/", + "0 n28w098/", + "0 n28w099/", + "0 n28w100/", + "0 n28w101/", + "0 n29w081/", + "0 n29w082/", + "0 n29w083/", + "0 n29w090/", + "0 n29w096/", + "0 n29w097/", + "0 n29w098/", + "0 n29w099/", + "0 n29w100/", + "0 n29w101/", + "0 n29w104/", + "0 n30w081/", + "0 n30w082/", + "0 n30w083/", + "0 n30w084/", + "0 n30w085/", + "0 n30w086/", + "0 n30w089/", + "0 n30w090/", + "0 n30w091/", + "0 n30w092/", + "0 n30w093/", + "0 n30w094/", + "0 n30w095/", + "0 n30w096/", + "0 n30w097/", + "0 n30w098/", + "0 n30w099/", + "0 n30w100/", + "0 n30w101/", + "0 n30w102/", + "0 n30w103/", + "0 n30w104/", + "0 n30w105/", + "0 n31w082/", + "0 n31w083/", + "0 n31w084/", + "0 n31w085/", + "0 n31w086/", + "0 n31w087/", + "0 n31w088/", + "0 n31w089/", + "0 n31w090/", + "0 n31w091/", + "0 n31w092/", + "0 n31w093/", + "0 n31w094/", + "0 n31w095/", + "0 n31w096/", + "0 n31w097/", + "0 n31w098/", + "0 n31w099/", + "0 n31w100/", + "0 n31w101/", + "0 n31w102/", + "0 n31w103/", + "0 n31w104/", + "0 n31w105/", + "0 n31w106/", + "0 n31w107/", + "0 n32w081/", + "0 n32w082/", + "0 n32w083/", + "0 n32w084/", + "0 n32w085/", + "0 n32w086/", + "0 n32w087/", + "0 n32w088/", + "0 n32w089/", + "0 n32w090/", + "0 n32w091/", + "0 n32w092/", + "0 n32w093/", + "0 n32w094/", + "0 n32w095/", + "0 n32w096/", + "0 n32w097/", + "0 n32w098/", + "0 n32w099/", + "0 n32w100/", + "0 n32w101/", + "0 n32w102/", + "0 n32w103/", + "0 n32w104/", + "0 n32w105/", + "0 n32w106/", + "0 n32w107/", + "0 n32w108/", + "0 n32w109/", + "0 n32w110/", + "0 n32w111/", + "0 n32w112/", + "0 n32w113/", + "0 n32w114/", + "0 n33w080/", + "0 n33w081/", + "0 n33w082/", + "0 n33w083/", + "0 n33w084/", + "0 n33w085/", + "0 n33w086/", + "0 n33w087/", + "0 n33w088/", + "0 n33w089/", + "0 n33w090/", + "0 n33w091/", + "0 n33w092/", + "0 n33w093/", + "0 n33w094/", + "0 n33w095/", + "0 n33w096/", + "0 n33w097/", + "0 n33w098/", + "0 n33w099/", + "0 n33w100/", + "0 n33w101/", + "0 n33w102/", + "0 n33w103/", + "0 n33w104/", + "0 n33w105/", + "0 n33w106/", + "0 n33w107/", + "0 n33w108/", + "0 n33w109/", + "0 n33w110/", + "0 n33w111/", + "0 n33w112/", + "0 n33w113/", + "0 n33w114/", + "0 n33w115/", + "0 n33w116/", + "0 n33w117/", + "0 n33w118/", + "0 n33w119/", + "0 n34w078/", + "0 n34w079/", + "0 n34w080/", + "0 n34w081/", + "0 n34w082/", + "0 n34w083/", + "0 n34w084/", + "0 n34w085/", + "0 n34w086/", + "0 n34w087/", + "0 n34w088/", + "0 n34w089/", + "0 n34w090/", + "0 n34w091/", + "0 n34w092/", + "0 n34w093/", + "0 n34w094/", + "0 n34w095/", + "0 n34w096/", + "0 n34w097/", + "0 n34w098/", + "0 n34w099/", + "0 n34w100/", + "0 n34w101/", + "0 n34w102/", + "0 n34w103/", + "0 n34w104/", + "0 n34w105/", + "0 n34w106/", + "0 n34w107/", + "0 n34w108/", + "0 n34w109/", + "0 n34w110/", + "0 n34w111/", + "0 n34w112/", + "0 n34w113/", + "0 n34w114/", + "0 n34w115/", + "0 n34w116/", + "0 n34w117/", + "0 n34w118/", + "0 n34w119/", + "0 n34w120/", + "0 n34w121/", + "0 n35w076/", + "0 n35w077/", + "0 n35w078/", + "0 n35w079/", + "0 n35w080/", + "0 n35w081/", + "0 n35w082/", + "0 n35w083/", + "0 n35w084/", + "0 n35w085/", + "0 n35w086/", + "0 n35w087/", + "0 n35w088/", + "0 n35w089/", + "0 n35w090/", + "0 n35w091/", + "0 n35w092/", + "0 n35w093/", + "0 n35w094/", + "0 n35w095/", + "0 n35w096/", + "0 n35w097/", + "0 n35w098/", + "0 n35w099/", + "0 n35w100/", + "0 n35w101/", + "0 n35w102/", + "0 n35w103/", + "0 n35w104/", + "0 n35w105/", + "0 n35w106/", + "0 n35w107/", + "0 n35w108/", + "0 n35w109/", + "0 n35w110/", + "0 n35w111/", + "0 n35w112/", + "0 n35w113/", + "0 n35w114/", + "0 n35w115/", + "0 n35w116/", + "0 n35w117/", + "0 n35w118/", + "0 n35w119/", + "0 n35w120/", + "0 n35w121/", + "0 n36w076/", + "0 n36w077/", + "0 n36w078/", + "0 n36w079/", + "0 n36w080/", + "0 n36w081/", + "0 n36w082/", + "0 n36w083/", + "0 n36w084/", + "0 n36w085/", + "0 n36w086/", + "0 n36w087/", + "0 n36w088/", + "0 n36w089/", + "0 n36w090/", + "0 n36w091/", + "0 n36w092/", + "0 n36w093/", + "0 n36w094/", + "0 n36w095/", + "0 n36w096/", + "0 n36w097/", + "0 n36w098/", + "0 n36w099/", + "0 n36w100/", + "0 n36w101/", + "0 n36w102/", + "0 n36w103/", + "0 n36w104/", + "0 n36w105/", + "0 n36w106/", + "0 n36w107/", + "0 n36w108/", + "0 n36w109/", + "0 n36w110/", + "0 n36w111/", + "0 n36w112/", + "0 n36w113/", + "0 n36w114/", + "0 n36w115/", + "0 n36w116/", + "0 n36w117/", + "0 n36w118/", + "0 n36w119/", + "0 n36w120/", + "0 n36w121/", + "0 n36w122/", + "0 n37w076/", + "0 n37w077/", + "0 n37w078/", + "0 n37w079/", + "0 n37w080/", + "0 n37w081/", + "0 n37w082/", + "0 n37w083/", + "0 n37w084/", + "0 n37w085/", + "0 n37w086/", + "0 n37w087/", + "0 n37w088/", + "0 n37w089/", + "0 n37w090/", + "0 n37w091/", + "0 n37w092/", + "0 n37w093/", + "0 n37w094/", + "0 n37w095/", + "0 n37w096/", + "0 n37w097/", + "0 n37w098/", + "0 n37w099/", + "0 n37w100/", + "0 n37w101/", + "0 n37w102/", + "0 n37w103/", + "0 n37w104/", + "0 n37w105/", + "0 n37w106/", + "0 n37w107/", + "0 n37w108/", + "0 n37w109/", + "0 n37w110/", + "0 n37w111/", + "0 n37w112/", + "0 n37w113/", + "0 n37w114/", + "0 n37w115/", + "0 n37w116/", + "0 n37w117/", + "0 n37w118/", + "0 n37w119/", + "0 n37w120/", + "0 n37w121/", + "0 n37w122/", + "0 n37w123/", + "0 n38w076/", + "0 n38w077/", + "0 n38w078/", + "0 n38w079/", + "0 n38w080/", + "0 n38w081/", + "0 n38w082/", + "0 n38w083/", + "0 n38w084/", + "0 n38w085/", + "0 n38w086/", + "0 n38w087/", + "0 n38w088/", + "0 n38w089/", + "0 n38w090/", + "0 n38w091/", + "0 n38w092/", + "0 n38w093/", + "0 n38w094/", + "0 n38w095/", + "0 n38w096/", + "0 n38w097/", + "0 n38w098/", + "0 n38w099/", + "0 n38w100/", + "0 n38w101/", + "0 n38w102/", + "0 n38w103/", + "0 n38w104/", + "0 n38w105/", + "0 n38w106/", + "0 n38w107/", + "0 n38w108/", + "0 n38w109/", + "0 n38w110/", + "0 n38w111/", + "0 n38w112/", + "0 n38w113/", + "0 n38w114/", + "0 n38w115/", + "0 n38w116/", + "0 n38w117/", + "0 n38w118/", + "0 n38w119/", + "0 n38w120/", + "0 n38w121/", + "0 n38w122/", + "0 n38w123/", + "0 n38w124/", + "0 n39w075/", + "0 n39w076/", + "0 n39w077/", + "0 n39w078/", + "0 n39w079/", + "0 n39w080/", + "0 n39w081/", + "0 n39w082/", + "0 n39w083/", + "0 n39w084/", + "0 n39w085/", + "0 n39w086/", + "0 n39w087/", + "0 n39w088/", + "0 n39w089/", + "0 n39w090/", + "0 n39w091/", + "0 n39w092/", + "0 n39w093/", + "0 n39w094/", + "0 n39w095/", + "0 n39w096/", + "0 n39w097/", + "0 n39w098/", + "0 n39w099/", + "0 n39w100/", + "0 n39w101/", + "0 n39w102/", + "0 n39w103/", + "0 n39w104/", + "0 n39w105/", + "0 n39w106/", + "0 n39w107/", + "0 n39w108/", + "0 n39w109/", + "0 n39w110/", + "0 n39w111/", + "0 n39w112/", + "0 n39w113/", + "0 n39w114/", + "0 n39w115/", + "0 n39w116/", + "0 n39w117/", + "0 n39w118/", + "0 n39w119/", + "0 n39w120/", + "0 n39w121/", + "0 n39w122/", + "0 n39w123/", + "0 n39w124/", + "0 n40w075/", + "0 n40w076/", + "0 n40w077/", + "0 n40w078/", + "0 n40w079/", + "0 n40w080/", + "0 n40w081/", + "0 n40w082/", + "0 n40w083/", + "0 n40w084/", + "0 n40w085/", + "0 n40w086/", + "0 n40w087/", + "0 n40w088/", + "0 n40w089/", + "0 n40w090/", + "0 n40w091/", + "0 n40w092/", + "0 n40w093/", + "0 n40w094/", + "0 n40w095/", + "0 n40w096/", + "0 n40w097/", + "0 n40w098/", + "0 n40w099/", + "0 n40w100/", + "0 n40w101/", + "0 n40w102/", + "0 n40w103/", + "0 n40w104/", + "0 n40w105/", + "0 n40w106/", + "0 n40w107/", + "0 n40w108/", + "0 n40w109/", + "0 n40w110/", + "0 n40w111/", + "0 n40w112/", + "0 n40w113/", + "0 n40w114/", + "0 n40w115/", + "0 n40w116/", + "0 n40w117/", + "0 n40w118/", + "0 n40w119/", + "0 n40w120/", + "0 n40w121/", + "0 n40w122/", + "0 n40w123/", + "0 n40w124/", + "0 n40w125/", + "0 n41w073/", + "0 n41w074/", + "0 n41w075/", + "0 n41w076/", + "0 n41w077/", + "0 n41w078/", + "0 n41w079/", + "0 n41w080/", + "0 n41w081/", + "0 n41w082/", + "0 n41w083/", + "0 n41w084/", + "0 n41w085/", + "0 n41w086/", + "0 n41w087/", + "0 n41w088/", + "0 n41w089/", + "0 n41w090/", + "0 n41w091/", + "0 n41w092/", + "0 n41w093/", + "0 n41w094/", + "0 n41w095/", + "0 n41w096/", + "0 n41w097/", + "0 n41w098/", + "0 n41w099/", + "0 n41w100/", + "0 n41w101/", + "0 n41w102/", + "0 n41w103/", + "0 n41w104/", + "0 n41w105/", + "0 n41w106/", + "0 n41w107/", + "0 n41w108/", + "0 n41w109/", + "0 n41w110/", + "0 n41w111/", + "0 n41w112/", + "0 n41w113/", + "0 n41w114/", + "0 n41w115/", + "0 n41w116/", + "0 n41w117/", + "0 n41w118/", + "0 n41w119/", + "0 n41w120/", + "0 n41w121/", + "0 n41w122/", + "0 n41w123/", + "0 n41w124/", + "0 n41w125/", + "0 n42w070/", + "0 n42w071/", + "0 n42w072/", + "0 n42w073/", + "0 n42w074/", + "0 n42w075/", + "0 n42w076/", + "0 n42w077/", + "0 n42w078/", + "0 n42w079/", + "0 n42w080/", + "0 n42w081/", + "0 n42w082/", + "0 n42w083/", + "0 n42w084/", + "0 n42w085/", + "0 n42w086/", + "0 n42w087/", + "0 n42w088/", + "0 n42w089/", + "0 n42w090/", + "0 n42w091/", + "0 n42w092/", + "0 n42w093/", + "0 n42w094/", + "0 n42w095/", + "0 n42w096/", + "0 n42w097/", + "0 n42w098/", + "0 n42w099/", + "0 n42w100/", + "0 n42w101/", + "0 n42w102/", + "0 n42w103/", + "0 n42w104/", + "0 n42w105/", + "0 n42w106/", + "0 n42w107/", + "0 n42w108/", + "0 n42w109/", + "0 n42w110/", + "0 n42w111/", + "0 n42w112/", + "0 n42w113/", + "0 n42w114/", + "0 n42w115/", + "0 n42w116/", + "0 n42w117/", + "0 n42w118/", + "0 n42w119/", + "0 n42w120/", + "0 n42w121/", + "0 n42w122/", + "0 n42w123/", + "0 n42w124/", + "0 n42w125/", + "0 n43w071/", + "0 n43w072/", + "0 n43w073/", + "0 n43w074/", + "0 n43w075/", + "0 n43w076/", + "0 n43w077/", + "0 n43w078/", + "0 n43w079/", + "0 n43w080/", + "0 n43w081/", + "0 n43w082/", + "0 n43w083/", + "0 n43w084/", + "0 n43w085/", + "0 n43w086/", + "0 n43w087/", + "0 n43w088/", + "0 n43w089/", + "0 n43w090/", + "0 n43w091/", + "0 n43w092/", + "0 n43w093/", + "0 n43w094/", + "0 n43w095/", + "0 n43w096/", + "0 n43w097/", + "0 n43w098/", + "0 n43w099/", + "0 n43w100/", + "0 n43w101/", + "0 n43w102/", + "0 n43w103/", + "0 n43w104/", + "0 n43w105/", + "0 n43w106/", + "0 n43w107/", + "0 n43w108/", + "0 n43w109/", + "0 n43w110/", + "0 n43w111/", + "0 n43w112/", + "0 n43w113/", + "0 n43w114/", + "0 n43w115/", + "0 n43w116/", + "0 n43w117/", + "0 n43w118/", + "0 n43w119/", + "0 n43w120/", + "0 n43w121/", + "0 n43w122/", + "0 n43w123/", + "0 n43w124/", + "0 n43w125/", + "0 n44w069/", + "0 n44w070/", + "0 n44w071/", + "0 n44w072/", + "0 n44w073/", + "0 n44w074/", + "0 n44w075/", + "0 n44w076/", + "0 n44w077/", + "0 n44w078/", + "0 n44w079/", + "0 n44w080/", + "0 n44w081/", + "0 n44w083/", + "0 n44w084/", + "0 n44w085/", + "0 n44w086/", + "0 n44w087/", + "0 n44w088/", + "0 n44w089/", + "0 n44w090/", + "0 n44w091/", + "0 n44w092/", + "0 n44w093/", + "0 n44w094/", + "0 n44w095/", + "0 n44w096/", + "0 n44w097/", + "0 n44w098/", + "0 n44w099/", + "0 n44w100/", + "0 n44w101/", + "0 n44w102/", + "0 n44w103/", + "0 n44w104/", + "0 n44w105/", + "0 n44w106/", + "0 n44w107/", + "0 n44w108/", + "0 n44w109/", + "0 n44w110/", + "0 n44w111/", + "0 n44w112/", + "0 n44w113/", + "0 n44w114/", + "0 n44w115/", + "0 n44w116/", + "0 n44w117/", + "0 n44w118/", + "0 n44w119/", + "0 n44w120/", + "0 n44w121/", + "0 n44w122/", + "0 n44w123/", + "0 n44w124/", + "0 n44w125/", + "0 n45w067/", + "0 n45w068/", + "0 n45w069/", + "0 n45w070/", + "0 n45w071/", + "0 n45w072/", + "0 n45w073/", + "0 n45w074/", + "0 n45w075/", + "0 n45w076/", + "0 n45w077/", + "0 n45w083/", + "0 n45w084/", + "0 n45w085/", + "0 n45w086/", + "0 n45w087/", + "0 n45w088/", + "0 n45w089/", + "0 n45w090/", + "0 n45w091/", + "0 n45w092/", + "0 n45w093/", + "0 n45w094/", + "0 n45w095/", + "0 n45w096/", + "0 n45w097/", + "0 n45w098/", + "0 n45w099/", + "0 n45w100/", + "0 n45w101/", + "0 n45w102/", + "0 n45w103/", + "0 n45w104/", + "0 n45w105/", + "0 n45w106/", + "0 n45w107/", + "0 n45w108/", + "0 n45w109/", + "0 n45w110/", + "0 n45w111/", + "0 n45w112/", + "0 n45w113/", + "0 n45w114/", + "0 n45w115/", + "0 n45w116/", + "0 n45w117/", + "0 n45w118/", + "0 n45w119/", + "0 n45w120/", + "0 n45w121/", + "0 n45w122/", + "0 n45w123/", + "0 n45w124/", + "0 n45w125/", + "0 n46w068/", + "0 n46w069/", + "0 n46w070/", + "0 n46w071/", + "0 n46w072/", + "0 n46w073/", + "0 n46w074/", + "0 n46w075/", + "0 n46w084/", + "0 n46w085/", + "0 n46w086/", + "0 n46w087/", + "0 n46w088/", + "0 n46w089/", + "0 n46w090/", + "0 n46w091/", + "0 n46w092/", + "0 n46w093/", + "0 n46w094/", + "0 n46w095/", + "0 n46w096/", + "0 n46w097/", + "0 n46w098/", + "0 n46w099/", + "0 n46w100/", + "0 n46w101/", + "0 n46w102/", + "0 n46w103/", + "0 n46w104/", + "0 n46w105/", + "0 n46w106/", + "0 n46w107/", + "0 n46w108/", + "0 n46w109/", + "0 n46w110/", + "0 n46w111/", + "0 n46w112/", + "0 n46w113/", + "0 n46w114/", + "0 n46w115/", + "0 n46w116/", + "0 n46w117/", + "0 n46w118/", + "0 n46w119/", + "0 n46w120/", + "0 n46w121/", + "0 n46w122/", + "0 n46w123/", + "0 n46w124/", + "0 n46w125/", + "0 n47w068/", + "0 n47w069/", + "0 n47w070/", + "0 n47w071/", + "0 n47w084/", + "0 n47w085/", + "0 n47w086/", + "0 n47w087/", + "0 n47w088/", + "0 n47w089/", + "0 n47w090/", + "0 n47w091/", + "0 n47w092/", + "0 n47w093/", + "0 n47w094/", + "0 n47w095/", + "0 n47w096/", + "0 n47w097/", + "0 n47w098/", + "0 n47w099/", + "0 n47w100/", + "0 n47w101/", + "0 n47w102/", + "0 n47w103/", + "0 n47w104/", + "0 n47w105/", + "0 n47w106/", + "0 n47w107/", + "0 n47w108/", + "0 n47w109/", + "0 n47w110/", + "0 n47w111/", + "0 n47w112/", + "0 n47w113/", + "0 n47w114/", + "0 n47w115/", + "0 n47w116/", + "0 n47w117/", + "0 n47w118/", + "0 n47w119/", + "0 n47w120/", + "0 n47w121/", + "0 n47w122/", + "0 n47w123/", + "0 n47w124/", + "0 n47w125/", + "0 n48w068/", + "0 n48w069/", + "0 n48w070/", + "0 n48w087/", + "0 n48w088/", + "0 n48w089/", + "0 n48w090/", + "0 n48w091/", + "0 n48w092/", + "0 n48w093/", + "0 n48w094/", + "0 n48w095/", + "0 n48w096/", + "0 n48w097/", + "0 n48w098/", + "0 n48w099/", + "0 n48w100/", + "0 n48w101/", + "0 n48w102/", + "0 n48w103/", + "0 n48w104/", + "0 n48w105/", + "0 n48w106/", + "0 n48w107/", + "0 n48w108/", + "0 n48w109/", + "0 n48w110/", + "0 n48w111/", + "0 n48w112/", + "0 n48w113/", + "0 n48w114/", + "0 n48w115/", + "0 n48w116/", + "0 n48w117/", + "0 n48w118/", + "0 n48w119/", + "0 n48w120/", + "0 n48w121/", + "0 n48w122/", + "0 n48w123/", + "0 n48w124/", + "0 n48w125/", + "0 n49w089/", + "0 n49w090/", + "0 n49w091/", + "0 n49w092/", + "0 n49w093/", + "0 n49w094/", + "0 n49w095/", + "0 n49w096/", + "0 n49w097/", + "0 n49w098/", + "0 n49w099/", + "0 n49w100/", + "0 n49w101/", + "0 n49w102/", + "0 n49w103/", + "0 n49w104/", + "0 n49w105/", + "0 n49w106/", + "0 n49w107/", + "0 n49w108/", + "0 n49w109/", + "0 n49w110/", + "0 n49w111/", + "0 n49w112/", + "0 n49w113/", + "0 n49w114/", + "0 n49w115/", + "0 n49w116/", + "0 n49w117/", + "0 n49w118/", + "0 n49w119/", + "0 n49w120/", + "0 n49w121/", + "0 n49w122/", + "0 n49w123/", + "0 n49w124/", + "0 n49w125/", + "0 n50w095/", + "0 n50w096/", + "0 n50w097/", + "0 n50w098/", + "0 n50w099/", + "0 n50w100/", + "0 n50w101/", + "0 n50w107/", + "0 n50w108/", + "0 n50w122/", + "0 n50w123/", + "0 n50w124/", + "0 n52e177/", + "0 n52e178/", + "0 n52e179/", + "0 n52w174/", + "0 n52w176/", + "0 n52w177/", + "0 n52w178/", + "0 n52w179/", + "0 n52w180/", + "0 n53e172/", + "0 n53e173/", + "0 n53e174/", + "0 n53e175/", + "0 n53e177/", + "0 n53e178/", + "0 n53e179/", + "0 n53w169/", + "0 n53w170/", + "0 n53w171/", + "0 n53w172/", + "0 n53w173/", + "0 n53w174/", + "0 n53w175/", + "0 n53w176/", + "0 n53w177/", + "0 n54e172/", + "0 n54e173/", + "0 n54w167/", + "0 n54w168/", + "0 n54w169/", + "0 n54w170/", + "0 n55w131/", + "0 n55w132/", + "0 n55w133/", + "0 n55w134/", + "0 n55w160/", + "0 n55w161/", + "0 n55w162/", + "0 n55w163/", + "0 n55w164/", + "0 n55w165/", + "0 n55w166/", + "0 n55w167/", + "0 n56w130/", + "0 n56w131/", + "0 n56w132/", + "0 n56w133/", + "0 n56w134/", + "0 n56w135/", + "0 n56w156/", + "0 n56w157/", + "0 n56w159/", + "0 n56w160/", + "0 n56w161/", + "0 n56w162/", + "0 n56w163/", + "0 n56w164/", + "0 n57w131/", + "0 n57w132/", + "0 n57w133/", + "0 n57w134/", + "0 n57w135/", + "0 n57w136/", + "0 n57w153/", + "0 n57w154/", + "0 n57w155/", + "0 n57w156/", + "0 n57w157/", + "0 n57w158/", + "0 n57w159/", + "0 n57w160/", + "0 n57w161/", + "0 n57w162/", + "0 n57w170/", + "0 n57w171/", + "0 n58w133/", + "0 n58w134/", + "0 n58w135/", + "0 n58w136/", + "0 n58w137/", + "0 n58w153/", + "0 n58w154/", + "0 n58w155/", + "0 n58w156/", + "0 n58w157/", + "0 n58w158/", + "0 n58w159/", + "0 n58w170/", + "0 n58w171/", + "0 n59w134/", + "0 n59w135/", + "0 n59w136/", + "0 n59w137/", + "0 n59w138/", + "0 n59w139/", + "0 n59w152/", + "0 n59w153/", + "0 n59w154/", + "0 n59w155/", + "0 n59w156/", + "0 n59w157/", + "0 n59w158/", + "0 n59w159/", + "0 n59w160/", + "0 n59w161/", + "0 n59w162/", + "0 n59w163/", + "0 n60w135/", + "0 n60w136/", + "0 n60w137/", + "0 n60w138/", + "0 n60w139/", + "0 n60w140/", + "0 n60w141/", + "0 n60w142/", + "0 n60w143/", + "0 n60w144/", + "0 n60w145/", + "0 n60w146/", + "0 n60w147/", + "0 n60w148/", + "0 n60w149/", + "0 n60w150/", + "0 n60w151/", + "0 n60w152/", + "0 n60w153/", + "0 n60w154/", + "0 n60w155/", + "0 n60w156/", + "0 n60w157/", + "0 n60w158/", + "0 n60w159/", + "0 n60w160/", + "0 n60w161/", + "0 n60w162/", + "0 n60w163/", + "0 n60w164/", + "0 n60w165/", + "0 n60w166/", + "0 n60w167/", + "0 n60w168/", + "0 n61w140/", + "0 n61w141/", + "0 n61w142/", + "0 n61w143/", + "0 n61w144/", + "0 n61w145/", + "0 n61w146/", + "0 n61w147/", + "0 n61w148/", + "0 n61w149/", + "0 n61w150/", + "0 n61w151/", + "0 n61w152/", + "0 n61w153/", + "0 n61w154/", + "0 n61w155/", + "0 n61w156/", + "0 n61w157/", + "0 n61w158/", + "0 n61w159/", + "0 n61w160/", + "0 n61w161/", + "0 n61w162/", + "0 n61w163/", + "0 n61w164/", + "0 n61w165/", + "0 n61w166/", + "0 n61w167/", + "0 n61w168/", + "0 n61w173/", + "0 n61w174/", + "0 n62w142/", + "0 n62w143/", + "0 n62w144/", + "0 n62w145/", + "0 n62w146/", + "0 n62w147/", + "0 n62w148/", + "0 n62w149/", + "0 n62w150/", + "0 n62w151/", + "0 n62w152/", + "0 n62w153/", + "0 n62w154/", + "0 n62w155/", + "0 n62w156/", + "0 n62w157/", + "0 n62w158/", + "0 n62w159/", + "0 n62w160/", + "0 n62w161/", + "0 n62w162/", + "0 n62w163/", + "0 n62w164/", + "0 n62w165/", + "0 n62w166/", + "0 n62w167/", + "0 n63w142/", + "0 n63w143/", + "0 n63w144/", + "0 n63w145/", + "0 n63w146/", + "0 n63w147/", + "0 n63w148/", + "0 n63w149/", + "0 n63w150/", + "0 n63w151/", + "0 n63w152/", + "0 n63w153/", + "0 n63w154/", + "0 n63w155/", + "0 n63w156/", + "0 n63w157/", + "0 n63w158/", + "0 n63w159/", + "0 n63w160/", + "0 n63w161/", + "0 n63w162/", + "0 n63w163/", + "0 n63w164/", + "0 n63w165/", + "0 n63w166/", + "0 n63w167/", + "0 n63w170/", + "0 n64w141/", + "0 n64w142/", + "0 n64w143/", + "0 n64w144/", + "0 n64w145/", + "0 n64w146/", + "0 n64w147/", + "0 n64w148/", + "0 n64w149/", + "0 n64w150/", + "0 n64w151/", + "0 n64w152/", + "0 n64w153/", + "0 n64w154/", + "0 n64w155/", + "0 n64w156/", + "0 n64w157/", + "0 n64w158/", + "0 n64w159/", + "0 n64w160/", + "0 n64w161/", + "0 n64w162/", + "0 n64w163/", + "0 n64w164/", + "0 n64w165/", + "0 n64w169/", + "0 n64w170/", + "0 n64w171/", + "0 n64w172/", + "0 n65w141/", + "0 n65w142/", + "0 n65w143/", + "0 n65w144/", + "0 n65w145/", + "0 n65w146/", + "0 n65w147/", + "0 n65w148/", + "0 n65w149/", + "0 n65w150/", + "0 n65w151/", + "0 n65w152/", + "0 n65w153/", + "0 n65w154/", + "0 n65w155/", + "0 n65w156/", + "0 n65w157/", + "0 n65w158/", + "0 n65w159/", + "0 n65w160/", + "0 n65w161/", + "0 n65w162/", + "0 n65w163/", + "0 n65w164/", + "0 n65w165/", + "0 n65w166/", + "0 n65w167/", + "0 n66w141/", + "0 n66w142/", + "0 n66w143/", + "0 n66w144/", + "0 n66w145/", + "0 n66w146/", + "0 n66w147/", + "0 n66w148/", + "0 n66w149/", + "0 n66w150/", + "0 n66w151/", + "0 n66w152/", + "0 n66w153/", + "0 n66w154/", + "0 n66w155/", + "0 n66w156/", + "0 n66w157/", + "0 n66w158/", + "0 n66w159/", + "0 n66w160/", + "0 n66w161/", + "0 n66w162/", + "0 n66w163/", + "0 n66w164/", + "0 n66w165/", + "0 n66w166/", + "0 n66w167/", + "0 n66w168/", + "0 n66w169/", + "0 n67w141/", + "0 n67w142/", + "0 n67w143/", + "0 n67w144/", + "0 n67w145/", + "0 n67w146/", + "0 n67w147/", + "0 n67w148/", + "0 n67w149/", + "0 n67w150/", + "0 n67w151/", + "0 n67w152/", + "0 n67w153/", + "0 n67w154/", + "0 n67w155/", + "0 n67w156/", + "0 n67w157/", + "0 n67w158/", + "0 n67w159/", + "0 n67w160/", + "0 n67w161/", + "0 n67w162/", + "0 n67w163/", + "0 n67w164/", + "0 n67w165/", + "0 n67w166/", + "0 n67w167/", + "0 n67w168/", + "0 n68w141/", + "0 n68w142/", + "0 n68w143/", + "0 n68w144/", + "0 n68w145/", + "0 n68w146/", + "0 n68w147/", + "0 n68w148/", + "0 n68w149/", + "0 n68w150/", + "0 n68w151/", + "0 n68w152/", + "0 n68w153/", + "0 n68w154/", + "0 n68w155/", + "0 n68w156/", + "0 n68w157/", + "0 n68w158/", + "0 n68w159/", + "0 n68w160/", + "0 n68w161/", + "0 n68w162/", + "0 n68w163/", + "0 n68w164/", + "0 n68w165/", + "0 n68w166/", + "0 n69w141/", + "0 n69w142/", + "0 n69w143/", + "0 n69w144/", + "0 n69w145/", + "0 n69w146/", + "0 n69w147/", + "0 n69w148/", + "0 n69w149/", + "0 n69w150/", + "0 n69w151/", + "0 n69w152/", + "0 n69w153/", + "0 n69w154/", + "0 n69w155/", + "0 n69w156/", + "0 n69w157/", + "0 n69w158/", + "0 n69w159/", + "0 n69w160/", + "0 n69w161/", + "0 n69w162/", + "0 n69w163/", + "0 n69w164/", + "0 n69w165/", + "0 n69w166/", + "0 n69w167/", + "0 n70w141/", + "0 n70w142/", + "0 n70w143/", + "0 n70w144/", + "0 n70w145/", + "0 n70w146/", + "0 n70w147/", + "0 n70w148/", + "0 n70w149/", + "0 n70w150/", + "0 n70w151/", + "0 n70w152/", + "0 n70w153/", + "0 n70w154/", + "0 n70w155/", + "0 n70w156/", + "0 n70w157/", + "0 n70w158/", + "0 n70w159/", + "0 n70w160/", + "0 n70w161/", + "0 n70w162/", + "0 n70w163/", + "0 n70w164/", + "0 n70w165/", + "0 n71w143/", + "0 n71w144/", + "0 n71w145/", + "0 n71w146/", + "0 n71w147/", + "0 n71w148/", + "0 n71w149/", + "0 n71w150/", + "0 n71w151/", + "0 n71w152/", + "0 n71w153/", + "0 n71w154/", + "0 n71w155/", + "0 n71w156/", + "0 n71w157/", + "0 n71w158/", + "0 n71w159/", + "0 n71w160/", + "0 n71w161/", + "0 n71w162/", + "0 n71w163/", + "0 n71w164/", + "0 n72w155/", + "0 n72w156/", + "0 n72w157/", + "0 n72w158/", + "0 s14w170/", + "0 s14w171/" + ) + + + # sub out HTML copy pattern with vsi URL + t2 <- gsub("0 ", + "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/", + t) + + # add file paths following NED scheme + t3 <- paste0(t2, + "USGS_13_", + basename(t2), + ".tif") + + # Write table to data-raw + write.table(t3, + paste0(base_dir, "/ned_list_USGS_13.txt"), + row.names = FALSE, + col.names = FALSE, + quote = FALSE) + + +} +# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem" +# download_3dep_vrt(dem_base_dir) +# +# # Create meta data object of three NED resoruces +# ned <- data.frame(rbind( +# # c(id = "USGS_1", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1", +# # varname = "30m elevation", +# # long_name = "30m (1 arcsec) National Elevation Dataset", +# # units = "m"), +# # +# # c(id = "USGS_2", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2", +# # varname = "60m elevation", +# # long_name = "60m (2 arcsec) National Elevation Dataset Alaska", +# # units = "m"), +# +# c(id = "USGS_13", +# URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13", +# varname = "10m elevation", +# long_name = "10m (1/3th arcsec) National Elevation Dataset", +# units = "m") +# )) +# +# +# +# +# # Loop over the three resolutions +# for (i in 1:length(ned)) { +# i = 1 +# +# # Define output text file path +# txt_file <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt") +# +# # Define output VRT path +# vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt")) +# +# # If VRT does NOT exist, build VRT +# if (!file.exists(vrt_file)) { +# +# # read the corresponding index.gpkg +# files <- sf::read_sf(ned$domain_url[i]) +# DEM_UR +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# # Build full HTTPS paths to "./current/" +# files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location))) +# +# files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# # write list of files to text file +# write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE) +# +# # build VRT from text file input using GDAL system call ... +# system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file)) +# } +# +# logger::log_info("Finished ", ned$id[i], "...") +# } + +# +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(hy_id := !!sym(crosswalk_id)) +# # +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# crosswalk_id = crosswalk_id, +# pct = extension_pct, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + +# utility function for getting transects extended and +# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement +# returns the extended version of the transects +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(hy_id := !!sym(crosswalk_id)) +# +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# pct = EXTENSION_PCT, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + + +