From 0092719594f53cd379c701f5865d9c0f52d18c22 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 6 Dec 2023 05:02:00 -0800 Subject: [PATCH 01/64] updated transects and cross section pts runners with new hydrofabric3D features, added waterbody reference feature download and waterbody intersection removal to 02_cs_pts.R runner --- runners/cs_runner/.Rapp.history | 0 runners/cs_runner/01_transects.R | 83 ++++++----- runners/cs_runner/02_cs_pts.R | 189 +++++++++++++++++++----- runners/cs_runner/config.R | 209 ++++++++++++++++++++++++++- runners/cs_runner/config_vars.R | 14 +- runners/cs_runner/download_nextgen.R | 32 ++++ 6 files changed, 442 insertions(+), 85 deletions(-) delete mode 100644 runners/cs_runner/.Rapp.history diff --git a/runners/cs_runner/.Rapp.history b/runners/cs_runner/.Rapp.history deleted file mode 100644 index e69de29..0000000 diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 0880d33..456be21 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -1,23 +1,21 @@ # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU -# source("runners/cs_runner/config.R") +source("runners/cs_runner/config.R") -# # load libraries +# # # load libraries +library(hydrofabric3D) # library(terrainSliceR) -# library(dplyr) -# library(sf) - -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" +library(dplyr) +library(sf) # transect bucket prefix -transects_prefix <- paste0(s3_bucket, "v20/3D/transects/") +transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files nextgen_files <- list.files(nextgen_dir, full.names = FALSE) model_attr_files <- list.files(model_attr_dir, full.names = FALSE) # string to fill in "cs_source" column in output datasets -net_source <- "terrainSliceR" +net_source <- "hydrofabric3D" # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( @@ -33,40 +31,44 @@ for(i in 1:nrow(path_df)) { nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) - # model attributes file and full path - model_attr_file <- path_df$y[i] - model_attr_path <- paste0(model_attr_dir, model_attr_file) + # # model attributes file and full path + # model_attr_file <- path_df$y[i] + # model_attr_path <- paste0(model_attr_dir, model_attr_file) - message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'") + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # model attributes - model_attrs <- arrow::read_parquet(model_attr_path) + # # model attributes + # model_attrs <- arrow::read_parquet(model_attr_path) + + # # join flowlines with model atttributes + # flines <- dplyr::left_join( + # flines, + # dplyr::select( + # model_attrs, + # id, eTW + # ), + # by = "id" + # ) - # join flowlines with model atttributes - flines <- dplyr::left_join( - flines, - dplyr::select( - model_attrs, - id, eTW - ), - by = "id" - ) - # calculate bankfull width flines <- flines %>% dplyr::mutate( - bf_width = 11 * eTW - ) %>% - dplyr::mutate( # if there are any NAs, use exp(0.700 + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width - bf_width = dplyr::case_when( - is.na(bf_width) ~ exp(0.700 + 0.365* log(tot_drainage_areasqkm)), - TRUE ~ bf_width - ) + bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) ) %>% + # dplyr::mutate( + # bf_width = 11 * eTW + # ) %>% + # dplyr::mutate( # if there are any NAs, use exp(0.700 + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width + # bf_width = dplyr::case_when( + # is.na(bf_width) ~ exp(0.700 + 0.365* log(tot_drainage_areasqkm)), + # TRUE ~ bf_width + # ) + # ) %>% dplyr::select( hy_id = id, lengthkm, @@ -74,16 +76,17 @@ for(i in 1:nrow(path_df)) { bf_width, geometry = geom ) - + # flines$bf_width <- ifelse(is.na(flines$bf_width), exp(0.700 + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width) time1 <- Sys.time() # system.time({ # create transect lines - transects <- terrainSliceR::cut_cross_sections( + transects <- hydrofabric3D::cut_cross_sections( net = flines, # flowlines network id = "hy_id", # Unique feature ID - cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") num = 10, # number of cross sections per "id" linestring ("hy_id") smooth = TRUE, # smooth lines densify = 3, # densify linestring points @@ -132,13 +135,9 @@ for(i in 1:nrow(path_df)) { ) # command to copy transects geopackage to S3 - if (!is.null(aws_profile)) { - copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - } else { - copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file) - } - + copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) + ) message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", copy_to_s3, diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index da6e3e8..9c7c237 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -1,16 +1,18 @@ # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU -# source("runners/cs_runner/config.R") +source("runners/cs_runner/config.R") # # load libraries -# library(terrainSliceR) +# library(hydrofabric3D) +# # library(terrainSliceR) # library(dplyr) # library(sf) -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" - # cross section bucket prefix -cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") +cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/") +# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") + +# transect bucket prefix +transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") # paths to nextgen datasets nextgen_files <- list.files(nextgen_dir, full.names = FALSE) @@ -19,20 +21,31 @@ nextgen_files <- list.files(nextgen_dir, full.names = FALSE) transect_files <- list.files(transects_dir, full.names = FALSE) # string to fill in "cs_source" column in output datasets -cs_source <- "terrainSliceR" +cs_source <- "hydrofabric3D" + +# reference features dataframe +ref_df <- data.frame( + vpu = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = ref_features +) # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, - y = transect_files, - base = base_dir - ) + x = nextgen_files, + y = transect_files, + base = base_dir + ) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { - + # i = 15 + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -40,8 +53,17 @@ for (i in 1:nrow(path_df)) { # model attributes file and full path transect_file <- path_df$y[i] transect_path <- paste0(transects_dir, transect_file) - message("Creating VPU ", path_df$vpu[i], " cross section points:\n - flowpaths: '", nextgen_file, "'\n - transects: '", transect_file, "'") + # model attributes file and full path + ref_file <- path_df$ref_file[i] + ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) + + message("Creating VPU ", path_df$vpu[i], + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - transects: '", transect_file, "'", + "\n - waterbodies: '", ref_file, "'" + ) + ################### # read in transects data @@ -50,21 +72,45 @@ for (i in 1:nrow(path_df)) { # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - transects <- - transects %>% - dplyr::rename(lengthm = cs_lengthm) + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + ##### subset flowlines and transects to first 5 features for testing ##### + # flines = dplyr::slice(flines, 1:5) + # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) + ##### ##### + + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] # get start time for log messages time1 <- Sys.time() - + # get cross section point elevations - cs_pts <- terrainSliceR::cross_section_pts( - cs = transects[lengths(sf::st_intersects(transects, flines)) == 1, ], - points_per_cs = NULL, - min_pts_per_cs = 10, - dem = DEM_URL - ) - + cs_pts <- hydrofabric3D::cross_section_pts( + cs = transects, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_URL + ) + + # try to extend any cross sections that returned cross section points with + # identical Z values within a certain threshold ("flat" cross sections) + cs_pts <- hydrofabric3D::rectify_flat_cs( + net = flines, + cs = transects, + cs_pts = cs_pts, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_URL, + scale = EXTENSION_PCT, + threshold = 0 + ) + # get end time for log messages time2 <- Sys.time() time_diff <- round(as.numeric(time2 - time1 ), 2) @@ -78,12 +124,83 @@ for (i in 1:nrow(path_df)) { dplyr::group_by(hy_id, cs_id) %>% dplyr::filter(!any(is.na(Z))) %>% dplyr::ungroup() + + # # check the number of cross sections that were extended + # cs_pts$is_extended %>% table() + + # extract cross section points that have an "is_extended" value of TRUE + extended_pts = + cs_pts %>% + dplyr::filter(is_extended) %>% + dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) + + # extract transects that have a "hy_id" in the "extended_pts" dataset + update_transects = + transects %>% + dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>% + dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) + + # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages + if (nrow(update_transects) > 0) { + message("Updating ", nrow(update_transects), " transects") + + update_transects = + update_transects %>% + # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% + # apply extend_by_percent function to each transect line: + hydrofabric3D:::extend_by_percent( + pct = EXTENSION_PCT, + length_col = "cs_lengthm" + ) + + # remove old transects that have "tmp_id" in "extended_pts", and replace with "update_transects" + out_transects = + transects %>% + dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + dplyr::bind_rows( + update_transects + ) %>% + # dplyr::mutate(is_extended = FALSE) %>% + # dplyr::bind_rows( + # dplyr::mutate(update_transects, is_extended = TRUE) + # ) %>% + dplyr::select(-tmp_id) + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + + # mapview::mapview(flines, color = "dodgerblue") + + ###################################### + + ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections + message("Saving updated transects to:\n - filepath: '", transect_path, "'") + + # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg) + sf::write_sf( + out_transects, + transect_path + ) + + # command to copy transects geopackage to S3 + trans_to_s3 <- paste0("aws s3 cp ", transect_path, " ", transects_prefix, transect_file, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) + + message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", + trans_to_s3, + "'\n==========================") + system(trans_to_s3, intern = TRUE) + + ###################################### + + } + # classify the cross section points cs_pts <- cs_pts %>% - dplyr::rename(cs_widths = lengthm) %>% - terrainSliceR::classify_points() %>% + dplyr::rename(cs_widths = cs_lengthm) %>% + hydrofabric3D::classify_points() %>% dplyr::mutate( X = sf::st_coordinates(.)[,1], Y = sf::st_coordinates(.)[,2] @@ -95,7 +212,7 @@ for (i in 1:nrow(path_df)) { X, Y, Z, class ) - + # Drop point geometries, leaving just X, Y, Z values cs_pts <- sf::st_drop_geometry(cs_pts) @@ -106,9 +223,10 @@ for (i in 1:nrow(path_df)) { Z_source = cs_source ) %>% dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class) - - ################### - + + ###################################### + + ###################################### # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") out_path <- paste0(cs_pts_dir, out_file) @@ -119,17 +237,12 @@ for (i in 1:nrow(path_df)) { arrow::write_parquet(cs_pts, out_path) # command to copy cross section points parquet to S3 - if (!is.null(aws_profile)) { - copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - } else { - copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file) - - } + copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), "'\n==========================") system(copy_cs_pts_to_s3, intern = TRUE) diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index 855cd9b..c72d563 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -2,7 +2,8 @@ pacman::p_load( archive, hydrofabric, - terrainSliceR + hydrofabric3D + # terrainSliceR ) # load root directory @@ -10,9 +11,24 @@ source("runners/cs_runner/config_vars.R") sf::sf_use_s2(FALSE) +### Cross section point + +### S3 names + +# name of S3 bucket +s3_bucket <- "s3://lynker-spatial/" + # name of bucket with nextgen data nextgen_bucket <- "lynker-spatial" +# reference features S3 bucket prefix +ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/" + +# S3 prefix/folder of version run +version_prefix <- "v20.1" +# version_prefix <- "v20" + +### LOCAL DIRS # directory to copy nextgen bucket data too nextgen_dir <- paste0(base_dir, "/pre-release/") @@ -26,14 +42,45 @@ cs_pts_dir <- paste0(base_dir, "/02_cs_pts/") # final output directory with geopackages per VPU final_dir <- paste0(base_dir, "/cross_sections/") +# directory to copy nextgen bucket data too +ref_features_dir <- paste0(base_dir, "/00_reference_features/") + # create directories dir.create(transects_dir, showWarnings = FALSE) dir.create(cs_pts_dir, showWarnings = FALSE) +dir.create(ref_features_dir, showWarnings = FALSE) +dir.create(paste0(ref_features_dir, "gpkg/"), showWarnings = FALSE) dir.create(final_dir, showWarnings = FALSE) # dir.create(model_attr_dir, showWarnings = FALSE) +## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory + +# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +list_ref_features <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', ref_features_prefix , '" + + # Regular expression pattern to match object keys + PATTERN="reference_features.gpkg" + + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# ---- Get a list of reference features geopackages ---- + +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +ref_features <- system(list_ref_features, intern = TRUE) + +# ref features datasets +ref_features_keys <- paste0(ref_features_prefix, ref_features) +ref_features_files <- paste0(ref_features_dir, "gpkg/", ref_features) + +### +### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ### +### -##### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ###### # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to # make sure they are aligned and in the same order # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string @@ -76,3 +123,161 @@ align_files_by_vpu <- function( return(matched_paths) } + +# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# flowlines: flowlines linestring sf object +# trans: transects linestring sf object +# waterbodies: waterbodies polygon sf object +# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies +### Returns a list of length 2 with updated "flowlines" and "transects" sf objects +wb_intersects <- function(flowlines, trans, waterbodies) { + + ######## ######## ######## ######## ######## ######## + ######## ######## ######## ######## ######## ######## + # if(type == 1) { + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # trans[trans$hy_id %in% unique(to_check$id), ] %>% nrow() + # + # trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies...") + trans_wb_index <- geos::geos_intersects_any( + trans_geos_check, + wbs_geos[unlist(wb_index)] + ) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[!trans_wb_index, ] + # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects + ) + ) + # } + ######## ######## ######## ######## ######## ######## + ######## ######## ######## ######## ######## ######## + # if(type == 2) { + # + # # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + # trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + # + # # trans$order <- 1:nrow(trans) + # + # message("Checking flowlines against waterbodies...") + # + # # create an index between flowlines and waterbodies + # wb_index <- sf::st_intersects(flowlines, waterbodies) + # + # # remove any flowlines that cross more than 1 waterbody + # to_keep <- flowlines[lengths(wb_index) == 0, ] + # to_check <- flowlines[lengths(wb_index) != 0, ] + # + # # subset transects to the hy_ids in "to_check" set of flowlines + # trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # + # message("Checking transects against waterbodies...") + # + # # check where the transects linestrings intersect with the waterbodies + # trans_wb_index <- sf::st_intersects(trans_check, + # waterbodies[unlist(wb_index), ] + # ) + # # trans_wb_index <- sf::st_intersects(trans_check, wbs) + # + # # within the transects lines that are on a flowline that crosses a waterbody, + # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + # trans_keep <- trans_check[lengths(trans_wb_index) == 0, ] + # + # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + # + # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # # have transects that are NOT in the waterbody + # to_keep <- dplyr::bind_rows(to_keep, to_check) + # + # # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # # that were determined to be valid (are being kept) + # check_ids <- unique(trans_check$tmp_id) + # keep_ids <- unique(trans_keep$tmp_id) + # + # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # valid_flowlines <- flowlines$id %in% to_keep$id + # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + # + # # return alist of updated flowlines and transects + # return( + # list( + # "valid_flowlines" = valid_flowlines, + # "valid_transects" = valid_transects + # ) + # ) + # + # # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # # trans <- + # # trans %>% + # # dplyr::filter( + # # !tmp_id %in% check_ids[!check_ids %in% keep_ids] + # # ) %>% + # # dplyr::select(-tmp_id) + # # + # # # return alist of updated flowlines and transects + # # return( + # # list( + # # "flowlines" = to_keep, + # # "transects" = trans + # # ) + # # ) + # } +} diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 9738869..72d87b9 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -1,11 +1,19 @@ ### EDIT base_dir, aws_profile, and DEM_URL ### -base_dir <- '/Users/anguswatters/Desktop/lynker-spatial' +base_dir <- '/Users/anguswatters/Desktop/lynker-spatial' # AWS profile to run CLI commands -aws_profile <- "angus-lynker" +aws_profile <- "angus-lynker" + +# name of S3 bucket +s3_bucket <- "s3://lynker-spatial/" # DEM URL -DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +# scale argument for cross_section_pts() function. +# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line +# Default setting is 50% of the original transect lines length (0.5) +EXTENSION_PCT <- 0.5 # # create the directory if it does NOT exist # if(!dir.exists(base_dir)) { diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R index a1aa30a..39ff31d 100644 --- a/runners/cs_runner/download_nextgen.R +++ b/runners/cs_runner/download_nextgen.R @@ -10,6 +10,9 @@ s3_bucket <- "s3://lynker-spatial/" # nextgen bucket name prerelease_prefix <- "s3://lynker-spatial/pre-release/" +# # reference features S3 bucket prefix +# ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/" + # nextgen model attributes folder in S3 bucket with parquet files model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/") @@ -95,5 +98,34 @@ for (key in model_attr_keys) { message("Download '", paste0(model_attr_prefix, key), "' complete!") message("------------------") } +## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory + +# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +list_ref_features <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', ref_features_prefix , '" + + # Regular expression pattern to match object keys + PATTERN="reference_features.gpkg" + + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) +# ---- Get a list of reference features geopackages geopackages ---- +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +ref_features <- system(list_ref_features, intern = TRUE) +## Download reference features geopackages and save them to a local directory +# Parse the selected S3 objects keys and copy them to the destination directory +for (key in ref_features) { + # paste0(ref_features_dir, "gpkg/") + copy_cmd <- paste0('aws s3 cp ', ref_features_prefix, key, ' ', paste0(ref_features_dir, "gpkg/"), key) + + message("Copying S3 object:\n", paste0(ref_features_prefix, key)) + system(copy_cmd) + + message("Download '", paste0(ref_features_dir, "gpkg/", key), "' complete!") + message("------------------") +} From 16c6637728693928b8640f2d2252622dc71716d2 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 6 Dec 2023 05:05:38 -0800 Subject: [PATCH 02/64] small cleanups --- runners/cs_runner/01_transects.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 456be21..1d6f70d 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -50,9 +50,7 @@ for(i in 1:nrow(path_df)) { # dplyr::select( # model_attrs, # id, eTW - # ), - # by = "id" - # ) + # ), by = "id") # calculate bankfull width flines <- @@ -80,7 +78,6 @@ for(i in 1:nrow(path_df)) { # flines$bf_width <- ifelse(is.na(flines$bf_width), exp(0.700 + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width) time1 <- Sys.time() - # system.time({ # create transect lines transects <- hydrofabric3D::cut_cross_sections( net = flines, # flowlines network @@ -100,7 +97,6 @@ for(i in 1:nrow(path_df)) { # precision = 1, add = TRUE # whether to add back the original data ) - # }) time2 <- Sys.time() time_diff <- round(as.numeric(time2 - time1 ), 2) From ddecfffc8bfee04214a743e0ef6d3d83112c97b8 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Sat, 9 Dec 2023 06:58:42 -0800 Subject: [PATCH 03/64] Updating config and downlaod_nextgen runner scripts to be more flexible in which remote datasets theypoint too, updating transects runner to use powerlaw bankful width and updated cs_pts runner to remove waterbodies and try and extend transects with flat cross section Z values --- runners/cs_runner/01_transects.R | 10 +++++----- runners/cs_runner/02_cs_pts.R | 4 ++-- runners/cs_runner/config.R | 10 +++++++++- runners/cs_runner/download_nextgen.R | 15 ++++++++++----- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 1d6f70d..7cf33ae 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -1,11 +1,11 @@ # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") -# # # load libraries -library(hydrofabric3D) -# library(terrainSliceR) -library(dplyr) -library(sf) +# # # # load libraries +# library(hydrofabric3D) +# # library(terrainSliceR) +# library(dplyr) +# library(sf) # transect bucket prefix transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 9c7c237..02dad68 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -44,8 +44,7 @@ path_df <- align_files_by_vpu( # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { - # i = 15 - + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -248,3 +247,4 @@ for (i in 1:nrow(path_df)) { system(copy_cs_pts_to_s3, intern = TRUE) } + diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index c72d563..ffc88e5 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -21,6 +21,12 @@ s3_bucket <- "s3://lynker-spatial/" # name of bucket with nextgen data nextgen_bucket <- "lynker-spatial" +# nextgen bucket folder name +nextgen_bucket_folder <- "v20.1/gpkg/" + +# nextgen bucket name +nextgen_prefix <- paste0(s3_bucket, nextgen_bucket_folder) + # reference features S3 bucket prefix ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/" @@ -29,8 +35,10 @@ version_prefix <- "v20.1" # version_prefix <- "v20" ### LOCAL DIRS + # directory to copy nextgen bucket data too -nextgen_dir <- paste0(base_dir, "/pre-release/") +nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder) +# nextgen_dir <- paste0(base_dir, "/pre-release/") # model attributes directory model_attr_dir <- paste0(base_dir, "/model_attributes/") diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R index 39ff31d..c405fa9 100644 --- a/runners/cs_runner/download_nextgen.R +++ b/runners/cs_runner/download_nextgen.R @@ -7,8 +7,13 @@ source("runners/cs_runner/config_vars.R") # name of S3 bucket s3_bucket <- "s3://lynker-spatial/" +# nextgen bucket folder name +nextgen_bucket_folder <- "v20.1/gpkg/" + # nextgen bucket name -prerelease_prefix <- "s3://lynker-spatial/pre-release/" +nextgen_prefix <- paste0(s3_bucket, nextgen_bucket_folder) + +# prerelease_prefix <- "s3://lynker-spatial/pre-release/" # # reference features S3 bucket prefix # ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/" @@ -17,7 +22,7 @@ prerelease_prefix <- "s3://lynker-spatial/pre-release/" model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/") # directory to copy nextgen bucket data too -nextgen_dir <- paste0(base_dir, "/pre-release/") +nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder) # create the directory if it does NOT exist if(!dir.exists(nextgen_dir)) { @@ -38,7 +43,7 @@ if(!dir.exists(model_attr_dir)) { # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern command <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information - S3_BUCKET="', prerelease_prefix, '" + S3_BUCKET="', nextgen_prefix, '" DESTINATION_DIR=', nextgen_dir, ' # Regular expression pattern to match object keys @@ -57,8 +62,8 @@ bucket_keys <- system(command, intern = TRUE) # Parse the selected S3 objects keys and copy them to the destination directory for (key in bucket_keys) { - copy_cmd <- paste0('aws s3 cp ', prerelease_prefix, key, " ", nextgen_dir, key) - message("Copying S3 object:\n", paste0(prerelease_prefix, key)) + copy_cmd <- paste0('aws s3 cp ', nextgen_prefix, key, " ", nextgen_dir, key) + message("Copying S3 object:\n", paste0(nextgen_prefix, key)) system(copy_cmd) From 2567b41cc6abf5f5ed111c8ec98293e63be43fe1 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 11 Dec 2023 14:14:24 -0800 Subject: [PATCH 04/64] small cleanups to cs_runner/config.R, removing old code --- runners/cs_runner/config.R | 88 +------------------------------------- 1 file changed, 1 insertion(+), 87 deletions(-) diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index ffc88e5..58cb4ed 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -141,8 +141,6 @@ align_files_by_vpu <- function( wb_intersects <- function(flowlines, trans, waterbodies) { ######## ######## ######## ######## ######## ######## - ######## ######## ######## ######## ######## ######## - # if(type == 1) { flowlines_geos <- geos::as_geos_geometry(flowlines) wbs_geos <- geos::as_geos_geometry(waterbodies) @@ -163,10 +161,6 @@ wb_intersects <- function(flowlines, trans, waterbodies) { trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] - # trans[trans$hy_id %in% unique(to_check$id), ] %>% nrow() - # - # trans_geos[trans$hy_id %in% unique(to_check$id)] - # check where the transects linestrings intersect with the waterbodies trans_geos_check <- geos::as_geos_geometry(trans_check) @@ -199,7 +193,7 @@ wb_intersects <- function(flowlines, trans, waterbodies) { # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody valid_flowlines <- flowlines$id %in% to_keep$id valid_transects <- trans$tmp_id %in% dplyr::filter(trans, - !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id # return alist of updated flowlines and transects return( @@ -208,84 +202,4 @@ wb_intersects <- function(flowlines, trans, waterbodies) { "valid_transects" = valid_transects ) ) - # } - ######## ######## ######## ######## ######## ######## - ######## ######## ######## ######## ######## ######## - # if(type == 2) { - # - # # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps - # trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) - # - # # trans$order <- 1:nrow(trans) - # - # message("Checking flowlines against waterbodies...") - # - # # create an index between flowlines and waterbodies - # wb_index <- sf::st_intersects(flowlines, waterbodies) - # - # # remove any flowlines that cross more than 1 waterbody - # to_keep <- flowlines[lengths(wb_index) == 0, ] - # to_check <- flowlines[lengths(wb_index) != 0, ] - # - # # subset transects to the hy_ids in "to_check" set of flowlines - # trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] - # - # message("Checking transects against waterbodies...") - # - # # check where the transects linestrings intersect with the waterbodies - # trans_wb_index <- sf::st_intersects(trans_check, - # waterbodies[unlist(wb_index), ] - # ) - # # trans_wb_index <- sf::st_intersects(trans_check, wbs) - # - # # within the transects lines that are on a flowline that crosses a waterbody, - # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL - # trans_keep <- trans_check[lengths(trans_wb_index) == 0, ] - # - # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies - # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] - # - # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, - # # have transects that are NOT in the waterbody - # to_keep <- dplyr::bind_rows(to_keep, to_check) - # - # # 'tmp_ids' of transects that are being checked and also the transects within trans_check - # # that were determined to be valid (are being kept) - # check_ids <- unique(trans_check$tmp_id) - # keep_ids <- unique(trans_keep$tmp_id) - # - # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) - # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. - # # - Keep original transects that are not on flowlines that intersect waterbodies AND - # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody - # valid_flowlines <- flowlines$id %in% to_keep$id - # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, - # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id - # - # # return alist of updated flowlines and transects - # return( - # list( - # "valid_flowlines" = valid_flowlines, - # "valid_transects" = valid_transects - # ) - # ) - # - # # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. - # # # - Keep original transects that are not on flowlines that intersect waterbodies AND - # # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody - # # trans <- - # # trans %>% - # # dplyr::filter( - # # !tmp_id %in% check_ids[!check_ids %in% keep_ids] - # # ) %>% - # # dplyr::select(-tmp_id) - # # - # # # return alist of updated flowlines and transects - # # return( - # # list( - # # "flowlines" = to_keep, - # # "transects" = trans - # # ) - # # ) - # } } From 67d33bb7bd6b6b026efaf8b42cfc524e04bc4f9c Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 26 Feb 2024 07:59:53 -0800 Subject: [PATCH 05/64] updated transect and cross section runners to allow transects to be updated and save a new updated local version of transects after cross section rectification, added a COLLECT_META flag for if meta data CSVs for each cross section run should be collected and saved locally --- .gitignore | 3 +- runners/cs_runner/01_transects.R | 71 ++++- runners/cs_runner/02_cs_pts.R | 487 +++++++++++++++++++++++++------ runners/cs_runner/config.R | 228 +++++++++++---- runners/cs_runner/config_vars.R | 4 + 5 files changed, 625 insertions(+), 168 deletions(-) diff --git a/.gitignore b/.gitignore index d70321e..3d01a85 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ dead inst/doc vignettes/cihro-data data -check \ No newline at end of file +check +.Rapp.history \ No newline at end of file diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 7cf33ae..6bc0025 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -26,7 +26,9 @@ path_df <- align_files_by_vpu( # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { - + + # i = 8 + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -78,6 +80,9 @@ for(i in 1:nrow(path_df)) { # flines$bf_width <- ifelse(is.na(flines$bf_width), exp(0.700 + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width) time1 <- Sys.time() + + # system.time({ + # create transect lines transects <- hydrofabric3D::cut_cross_sections( net = flines, # flowlines network @@ -97,6 +102,7 @@ for(i in 1:nrow(path_df)) { # precision = 1, add = TRUE # whether to add back the original data ) + # }) time2 <- Sys.time() time_diff <- round(as.numeric(time2 - time1 ), 2) @@ -109,35 +115,70 @@ for(i in 1:nrow(path_df)) { message("Saving transects to:\n - filepath: '", out_path, "'") - # add cs_source column and keep just the desired columns to save and upload to S3 - transects <- + # add cs_source column and rename cs_widths to cs_lengthm + transects <- transects %>% dplyr::mutate( cs_source = net_source ) %>% - dplyr::select( - hy_id, - cs_source, - cs_id, - cs_measure, - cs_lengthm = cs_widths, - geometry - ) + dplyr::rename("cs_lengthm" = cs_widths) + + # # add cs_source column and keep just the desired columns to save and upload to S3 + # transects <- + # transects %>% + # dplyr::mutate( + # cs_source = net_source + # ) %>% + # dplyr::select( + # hy_id, + # cs_source, + # cs_id, + # cs_measure, + # cs_lengthm = cs_widths, + # geometry + # ) + # tmp <- sf::read_sf(out_path) - # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg) + # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg) sf::write_sf( - transects, + # save dataset with only subset of columns to upload to S3 + dplyr::select(transects, + hy_id, + cs_source, + cs_id, + cs_measure, + cs_lengthm, + # sinuosity, + geometry + ), out_path - ) + ) # command to copy transects geopackage to S3 copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) ) - + message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", copy_to_s3, "'\n==========================") system(copy_to_s3, intern = TRUE) + + message("Overwritting local copy of transects to include 'is_extended' column...\n==========================") + # Overwrite transects with additional columns for development purposes (is_extended) to have a local copy of dataset with information about extensions + sf::write_sf( + dplyr::select( + dplyr::mutate(transects, is_extended = FALSE), + hy_id, + cs_source, + cs_id, + cs_measure, + cs_lengthm, + # sinuosity, + is_extended, + geometry + ), + out_path + ) } diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 02dad68..93c561b 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -40,11 +40,16 @@ path_df <- align_files_by_vpu( by = "vpu" ) +# Local path to save CSVs of cross section meta data during each iteration +meta_path <- "/local/path/to/save/cross_section_meta_data/" + # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { + start <- Sys.time() + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -57,10 +62,14 @@ for (i in 1:nrow(path_df)) { ref_file <- path_df$ref_file[i] ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) - message("Creating VPU ", path_df$vpu[i], + # current VPU being processed + VPU = path_df$vpu[i] + + message("Creating VPU ", VPU, " cross section points:\n - flowpaths: '", nextgen_file, "'\n - transects: '", transect_file, "'", - "\n - waterbodies: '", ref_file, "'" + "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" ) ################### @@ -78,17 +87,35 @@ for (i in 1:nrow(path_df)) { # flines = dplyr::slice(flines, 1:5) # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) ##### ##### - - # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies - feature_subsets <- wb_intersects(flines, transects, waterbodies) - + + # system.time({ + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + # }) + + # Collect meta data on features and changes + if(COLLECT_META) { + + fline_count <- nrow(flines) + transect_count <- nrow(transects) + wb_count <- nrow(waterbodies) + + fline_wb_count <- sum(feature_subsets$valid_flowlines) + transect_wb_count <- sum(feature_subsets$valid_transects) + } + # replace flowlines and transects objects with updated versions in "updated_features" flines <- flines[feature_subsets$valid_flowlines, ] transects <- transects[feature_subsets$valid_transects, ] - - # get start time for log messages - time1 <- Sys.time() - + + rm(waterbodies) + gc() + + start_cs_pts <- Sys.time() + message("Extracting cross section points (", start_cs_pts,")") + + system.time({ + # get cross section point elevations cs_pts <- hydrofabric3D::cross_section_pts( cs = transects, @@ -96,54 +123,166 @@ for (i in 1:nrow(path_df)) { min_pts_per_cs = 10, dem = DEM_URL ) - - # try to extend any cross sections that returned cross section points with - # identical Z values within a certain threshold ("flat" cross sections) - cs_pts <- hydrofabric3D::rectify_flat_cs( - net = flines, - cs = transects, - cs_pts = cs_pts, - points_per_cs = NULL, - min_pts_per_cs = 10, - dem = DEM_URL, - scale = EXTENSION_PCT, - threshold = 0 - ) - - # get end time for log messages - time2 <- Sys.time() - time_diff <- round(as.numeric(time2 - time1 ), 2) - message("\n\n ---> Cross section point elevations processed in ", time_diff) + }) + + end_cs_pts <- Sys.time() + message("\n ---> Completed extraction of cross section points (", end_cs_pts,")") + + if(COLLECT_META) { + start_cs_pts_count <- nrow(cs_pts) + } + + # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2) + # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time) + + start_rectify <- Sys.time() + message("Rectifying cross section points (", start_rectify,")") + + # collect the hy_ids and number of stream orders in cs_pts + if(COLLECT_META) { + + cs_pts_ids <- unique(cs_pts$hy_id) + start_cs_pts_ids <- length(cs_pts_ids) + + start_order_count <- + flines %>% + sf::st_drop_geometry() %>% + dplyr::filter(id %in% cs_pts_ids) %>% + dplyr::group_by(order) %>% + dplyr::count() %>% + tidyr::pivot_wider(names_from = order, + names_glue = "start_order_{order}", + values_from = n + ) %>% + dplyr::ungroup() + } # Remove any cross section that has ANY missing (NA) Z values. - cs_pts <- - cs_pts %>% + cs_pts <- + cs_pts %>% # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% dplyr::group_by(hy_id, cs_id) %>% dplyr::filter(!any(is.na(Z))) %>% dplyr::ungroup() - + # try to extend any cross sections that returned cross section points with + # identical Z values within a certain threshold ("flat" cross sections) + + system.time({ + # cs_pts <- hydrofabric3D::rectify_flat_cs( + fixed_pts <- hydrofabric3D::rectify_flat_cs( + cs_pts = cs_pts, + net = flines, + cs = transects, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_URL, + scale = EXTENSION_PCT, + threshold = 1, + pct_threshold = 0.99, + fix_ids = FALSE + ) + }) + + + end_rectify <- Sys.time() + rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2) + + message("\n ---> Completed rectifying cross section points (", end_rectify,")") + + if(COLLECT_META) { + rectify_cs_pts_count <- nrow(fixed_pts) + # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts + rectify_cs_pts_ids <- unique(fixed_pts$hy_id) + rectify_cs_pts_id_count <- length(rectify_cs_pts_ids) + + rectify_order_count <- + flines %>% + sf::st_drop_geometry() %>% + dplyr::filter(id %in% rectify_cs_pts_ids) %>% + dplyr::group_by(order) %>% + dplyr::count() %>% + tidyr::pivot_wider(names_from = order, + names_glue = "rectify_order_{order}", + values_from = n + ) %>% + dplyr::ungroup() + } + + rm(cs_pts) + gc() + + message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ") + + # Remove any cross section that has ANY missing (NA) Z values. + fixed_pts <- + fixed_pts %>% + # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::add_tmp_id() + + # Number of cross section points after removing any cross sections that contain any NA Z values + if(COLLECT_META) { + cs_pts_na_removed_count <- nrow(fixed_pts) + } + + # Stash meta data about the points + pts_meta <- + fixed_pts %>% + sf::st_drop_geometry() %>% + dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended) + + message("Classifying cross section points...") + + # Classify points + fixed_pts <- hydrofabric3D::classify_points(fixed_pts) + + # add meta data back to the points + fixed_pts <- + fixed_pts %>% + dplyr::left_join( + pts_meta, + by = c("hy_id", "cs_id", "pt_id") + # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended) + ) + + message("Gathering count of point types per cross section...") + + # get the counts of each point type to add this data to the transects dataset + point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) + # # check the number of cross sections that were extended - # cs_pts$is_extended %>% table() - + # fixed_pts$is_extended %>% table() + message("Subsetting cross section points generated after extending transects...") + # extract cross section points that have an "is_extended" value of TRUE - extended_pts = - cs_pts %>% + extended_pts <- + fixed_pts %>% dplyr::filter(is_extended) %>% - dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) - + hydrofabric3D::add_tmp_id() + # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) + # extract transects that have a "hy_id" in the "extended_pts" dataset - update_transects = + update_transects <- transects %>% - dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>% + hydrofabric3D::add_tmp_id() %>% dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) - + + # Number of cross section points generated from extending transects and number of tmpIDs + if(COLLECT_META) { + extended_pts_count <- nrow(extended_pts) + extended_pts_ids <- length(unique(extended_pts$tmp_id)) + extended_transects_count <- nrow(update_transects) + extended_transects_ids <- length(unique(update_transects$tmp_id)) + } + # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages if (nrow(update_transects) > 0) { message("Updating ", nrow(update_transects), " transects") - - update_transects = + + update_transects <- update_transects %>% # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% # apply extend_by_percent function to each transect line: @@ -151,81 +290,201 @@ for (i in 1:nrow(path_df)) { pct = EXTENSION_PCT, length_col = "cs_lengthm" ) - - # remove old transects that have "tmp_id" in "extended_pts", and replace with "update_transects" - out_transects = - transects %>% - dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>% - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% - dplyr::bind_rows( - update_transects - ) %>% - # dplyr::mutate(is_extended = FALSE) %>% - # dplyr::bind_rows( - # dplyr::mutate(update_transects, is_extended = TRUE) - # ) %>% - dplyr::select(-tmp_id) - + + # # Number of transects being updated + # if(COLLECT_META) { + # extended_transects_count <- nrow(update_transects) + # extended_transects_ids <- length(unique(update_transects$tmp_id)) + # } + + # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() + # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") + # and then replace with old transects with the "update_transects" + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + # dplyr::filter(!tmp_id %in% ) + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + dplyr::bind_rows( + dplyr::mutate( + update_transects, + is_extended = TRUE + ) + ) + + # dplyr::mutate(is_extended = FALSE) %>% + # dplyr::bind_rows( + # dplyr::mutate(update_transects, is_extended = TRUE) + # ) %>% + # dplyr::select(-tmp_id) + + } else { + + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) + } + + # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) + if(COLLECT_META) { + output_transects_count <- nrow(out_transects) + output_transects_ids <- length(unique(out_transects$tmp_id)) + } + + # finalize new transects + out_transects <- + out_transects %>% + dplyr::left_join( + point_type_counts, + by = c("hy_id", "cs_id") + ) %>% + dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + geom) + + # ------------------------------------------------------------------- + # ---- Re enumerate the transects & cross section points "cs_id" ---- + # ------------------------------------------------------------------- + + # make a dataframe that has a new_cs_id column that has + # the cs_id renumbered to fill in any missing IDs, + # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id + # The dataframe below will be used to join the "new_cs_id" with + # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets + renumbered_ids <- + fixed_pts %>% + sf::st_drop_geometry() %>% + # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% + dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% + dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::group_by(hy_id) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::select(new_cs_id, tmp_id) + + # Renumber the transects to have correct CS IDs + out_transects <- dplyr::left_join( + hydrofabric3D::add_tmp_id(out_transects), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::select(hy_id, cs_source, cs_id = new_cs_id, + cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + geometry = geom + ) + + # # fline_lengths <- sf::st_drop_geometry(flines) %>% + # # dplyr::filter(id %in% out_transects$hy_id) %>% + # # dplyr::mutate(lengthm = lengthkm * 1000) %>% + # # dplyr::select(hy_id = id, lengthm, lengthkm) + # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% + # dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% + # dplyr::select(-sinuosity) %>% + # dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% + # dplyr::rename("geometry" = geom) + + # Renumber the cross sections points to have correct CS IDs + fixed_pts <- dplyr::left_join( + hydrofabric3D::add_tmp_id(fixed_pts), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::rename(cs_id = new_cs_id) + # mapview::mapview(transects, color = "red") + # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + # mapview::mapview(flines, color = "dodgerblue") ###################################### - - ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections - message("Saving updated transects to:\n - filepath: '", transect_path, "'") - - # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg) - sf::write_sf( - out_transects, - transect_path - ) - - # command to copy transects geopackage to S3 - trans_to_s3 <- paste0("aws s3 cp ", transect_path, " ", transects_prefix, transect_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", - trans_to_s3, - "'\n==========================") + # ---------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------- - system(trans_to_s3, intern = TRUE) - - ###################################### - - } - # classify the cross section points - cs_pts <- - cs_pts %>% - dplyr::rename(cs_widths = cs_lengthm) %>% - hydrofabric3D::classify_points() %>% + fixed_pts <- + fixed_pts %>% dplyr::mutate( X = sf::st_coordinates(.)[,1], Y = sf::st_coordinates(.)[,2] ) %>% + sf::st_drop_geometry() %>% dplyr::select( hy_id, cs_id, pt_id, - cs_lengthm = cs_widths, + cs_lengthm, relative_distance, X, Y, Z, - class + class, point_type ) # Drop point geometries, leaving just X, Y, Z values - cs_pts <- sf::st_drop_geometry(cs_pts) + fixed_pts <- sf::st_drop_geometry(fixed_pts) # add Z_source column for source of elevation data - cs_pts <- - cs_pts %>% + fixed_pts <- + fixed_pts %>% dplyr::mutate( Z_source = cs_source ) %>% dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class) + # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) + if(COLLECT_META) { + output_cs_pts_count <- nrow(fixed_pts) + output_cs_pts_ids <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) + dropped_transects_count <- transect_count - output_transects_count + } + ###################################### - + + # ---------------------------------------------------------- + # ---- Re upload the updated transects geopackage to S3 ---- + # ---------------------------------------------------------- + updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path) + + ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections + message("Saving updated transects to:\n - filepath: '", updated_path, "'") + + # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg) + sf::write_sf( + out_transects, + # transect_path + updated_path + ) + + # command to copy transects geopackage to S3 + trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) + + message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", + trans_to_s3, + "'\n==========================") + + system(trans_to_s3, intern = TRUE) + ###################################### + ###################################### + + # ---------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------- + # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") out_path <- paste0(cs_pts_dir, out_file) @@ -233,7 +492,7 @@ for (i in 1:nrow(path_df)) { message("Saving cross section points to:\n - filepath: '", out_path, "'") # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) - arrow::write_parquet(cs_pts, out_path) + arrow::write_parquet(fixed_pts, out_path) # command to copy cross section points parquet to S3 copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, @@ -245,6 +504,50 @@ for (i in 1:nrow(path_df)) { "'\n==========================") system(copy_cs_pts_to_s3, intern = TRUE) - -} + + end <- Sys.time() + + message("Finished cross section point generation for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + if(COLLECT_META) { + + meta_df <- data.frame( + vpu = VPU, + start = as.character(start), + end = as.character(end), + start_cs_pts = as.character(start_cs_pts), + end_cs_pts = as.character(end_cs_pts), + start_rectify = as.character(start_rectify), + end_rectify = as.character(end_rectify), + fline_count = fline_count, + transect_count = transect_count, + wb_count = wb_count, + fline_wb_count = fline_wb_count, + transect_wb_count = transect_wb_count, + start_cs_pts_count = start_cs_pts_count, + start_cs_pts_ids = start_cs_pts_ids, + rectify_cs_pts_count = rectify_cs_pts_count, + rectify_cs_pts_ids = rectify_cs_pts_id_count, + extended_transects_count = extended_transects_count, + extended_transects_ids = extended_transects_ids, + dropped_transects = dropped_transects_count, + output_transects_count = output_transects_count, + output_cs_pts_count = output_cs_pts_count, + output_transects_ids = output_transects_ids, + output_cs_pts_ids = output_cs_pts_ids + ) + + order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count) + + readr::write_csv(meta_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_metadata.csv")) + readr::write_csv(order_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_streamorder.csv")) + } + + rm(fixed_pts) + gc() + gc() + + } diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index 58cb4ed..41f995d 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -3,9 +3,11 @@ pacman::p_load( archive, hydrofabric, hydrofabric3D - # terrainSliceR ) +# # install.packages("devtools") +# devtools::install_github("anguswg-ucsb/hydrofabric3D") + # load root directory source("runners/cs_runner/config_vars.R") @@ -139,67 +141,173 @@ align_files_by_vpu <- function( # Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies ### Returns a list of length 2 with updated "flowlines" and "transects" sf objects wb_intersects <- function(flowlines, trans, waterbodies) { - + ######## ######## ######## ######## ######## ######## - - flowlines_geos <- geos::as_geos_geometry(flowlines) - wbs_geos <- geos::as_geos_geometry(waterbodies) - - # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps - trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) - - message("Checking flowlines against waterbodies...") - - # create an index between flowlines and waterbodies - wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) - - # remove any flowlines that cross more than 1 waterbody - to_keep <- flowlines[lengths(wb_index) == 0, ] - to_check <- flowlines[lengths(wb_index) != 0, ] - - # subset transects to the hy_ids in "to_check" set of flowlines - trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] - # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] - - # check where the transects linestrings intersect with the waterbodies - trans_geos_check <- geos::as_geos_geometry(trans_check) - - message("Checking transects against waterbodies...") - trans_wb_index <- geos::geos_intersects_any( - trans_geos_check, - wbs_geos[unlist(wb_index)] + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies (v2) ...") + wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos) # (NEW METHOD) + # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)]) # (OLD METHOD) + + # sum(lengths(wb_trans_index) == 0) + # length(wb_trans_index) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[lengths(wb_trans_index) == 0, ] # (NEW METHOD) + # trans_keep <- trans_check[!wb_trans_index, ] # (OLD METHOD) + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects ) + ) + + # # within the transects lines that are on a flowline that crosses a waterbody, + # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + # trans_keep <- trans_check[!trans_wb_index, ] + # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + # + # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + # + # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # # have transects that are NOT in the waterbody + # to_keep <- dplyr::bind_rows(to_keep, to_check) + # + # # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # # that were determined to be valid (are being kept) + # check_ids <- unique(trans_check$tmp_id) + # keep_ids <- unique(trans_keep$tmp_id) + # + # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # valid_flowlines <- flowlines$id %in% to_keep$id + # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + # + # # return alist of updated flowlines and transects + # return( + # list( + # "valid_flowlines" = valid_flowlines, + # "valid_transects" = valid_transects + # ) + # ) +} + +# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# flowlines: flowlines linestring sf object +# trans: transects linestring sf object +# waterbodies: waterbodies polygon sf object +# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies +### Returns a list of length 2 with updated "flowlines" and "transects" sf objects +wb_intersects_v1 <- function(flowlines, trans, waterbodies) { + + ######## ######## ######## ######## ######## ######## + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies...") + trans_wb_index <- geos::geos_intersects_any( + trans_geos_check, + wbs_geos[unlist(wb_index)] + ) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[!trans_wb_index, ] + # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id - # within the transects lines that are on a flowline that crosses a waterbody, - # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL - trans_keep <- trans_check[!trans_wb_index, ] - # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] - - # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies - to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] - - # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, - # have transects that are NOT in the waterbody - to_keep <- dplyr::bind_rows(to_keep, to_check) - - # 'tmp_ids' of transects that are being checked and also the transects within trans_check - # that were determined to be valid (are being kept) - check_ids <- unique(trans_check$tmp_id) - keep_ids <- unique(trans_keep$tmp_id) - - # logical vectors of which flowlines/transects to keep (KEEP == TRUE) - # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. - # - Keep original transects that are not on flowlines that intersect waterbodies AND - # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody - valid_flowlines <- flowlines$id %in% to_keep$id - valid_transects <- trans$tmp_id %in% dplyr::filter(trans, - !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id - - # return alist of updated flowlines and transects - return( - list( - "valid_flowlines" = valid_flowlines, - "valid_transects" = valid_transects - ) + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects ) + ) } diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 72d87b9..c282421 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -15,6 +15,10 @@ DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevat # Default setting is 50% of the original transect lines length (0.5) EXTENSION_PCT <- 0.5 +# Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R) +COLLECT_META <- TRUE + + # # create the directory if it does NOT exist # if(!dir.exists(base_dir)) { # message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}')) From a5d6e05ea0ec6e6ecff35389809f29d773536139 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 26 Feb 2024 08:01:03 -0800 Subject: [PATCH 06/64] added COLLECT_META flag before setting the meta_path variable in cs runner --- runners/cs_runner/02_cs_pts.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 93c561b..8b2afbf 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -40,8 +40,12 @@ path_df <- align_files_by_vpu( by = "vpu" ) -# Local path to save CSVs of cross section meta data during each iteration -meta_path <- "/local/path/to/save/cross_section_meta_data/" +# Where should meta data CSVs be saved to? +if(COLLECT_META) { + # Local path to save CSVs of cross section meta data during each iteration + meta_path <- "/local/path/to/save/cross_section_meta_data/" + +} # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. From ad88c273733f21936390ccfac3cfdd5e5eb2a72e Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 28 Feb 2024 08:53:06 -0800 Subject: [PATCH 07/64] updated cross secction points generation to better rectify flat cross section points, also added a COLLECT_META and META_PATH in config_vars.R that if selected will save out changes made during cs pt rectification --- runners/cs_runner/01_transects.R | 3 --- runners/cs_runner/02_cs_pts.R | 29 +++++++++++------------------ runners/cs_runner/config_vars.R | 5 +++++ 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 6bc0025..8216658 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -3,7 +3,6 @@ source("runners/cs_runner/config.R") # # # # load libraries # library(hydrofabric3D) -# # library(terrainSliceR) # library(dplyr) # library(sf) @@ -27,8 +26,6 @@ path_df <- align_files_by_vpu( # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { - # i = 8 - # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 8b2afbf..2423cad 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -40,13 +40,6 @@ path_df <- align_files_by_vpu( by = "vpu" ) -# Where should meta data CSVs be saved to? -if(COLLECT_META) { - # Local path to save CSVs of cross section meta data during each iteration - meta_path <- "/local/path/to/save/cross_section_meta_data/" - -} - # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket @@ -175,15 +168,15 @@ for (i in 1:nrow(path_df)) { system.time({ # cs_pts <- hydrofabric3D::rectify_flat_cs( fixed_pts <- hydrofabric3D::rectify_flat_cs( - cs_pts = cs_pts, - net = flines, - cs = transects, - points_per_cs = NULL, - min_pts_per_cs = 10, - dem = DEM_URL, - scale = EXTENSION_PCT, - threshold = 1, - pct_threshold = 0.99, + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + cs = transects, # original transect lines + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_URL, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + threshold = 1, # 1 meter from bottom + pct_threshold = 0.99, # rectify if 99% points are within 1 meter from the bottom fix_ids = FALSE ) }) @@ -545,8 +538,8 @@ for (i in 1:nrow(path_df)) { order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count) - readr::write_csv(meta_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_metadata.csv")) - readr::write_csv(order_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_streamorder.csv")) + readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) + readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv")) } rm(fixed_pts) diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index c282421..8745b7a 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -18,6 +18,11 @@ EXTENSION_PCT <- 0.5 # Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R) COLLECT_META <- TRUE +# Where should meta data CSVs be saved to? +# Local path to save CSVs of cross section meta data during each iteration +META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' +# META_PATH <- "/local/path/to/save/cross_section_meta_data/" + # # create the directory if it does NOT exist # if(!dir.exists(base_dir)) { From 7bf195e7d68f436e56ce264e2f162c956d47dfc3 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 19 Mar 2024 15:38:06 -0700 Subject: [PATCH 08/64] added download script for FEMA 100 year flood fgb files, and related config var variables --- runners/cs_runner/01_transects.R | 5 +- runners/cs_runner/02_cs_pts.R | 1327 ++++++++++++++++++-------- runners/cs_runner/config_vars.R | 14 + runners/cs_runner/download_fema100.R | 50 + 4 files changed, 988 insertions(+), 408 deletions(-) create mode 100644 runners/cs_runner/download_fema100.R diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 8216658..912d95d 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -5,6 +5,9 @@ source("runners/cs_runner/config.R") # library(hydrofabric3D) # library(dplyr) # library(sf) +# install.packages("devtools") + +# devtools::install_github("anguswg-ucsb/hydrofabric3D") # transect bucket prefix transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") @@ -39,7 +42,7 @@ for(i in 1:nrow(path_df)) { # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - + # # model attributes # model_attrs <- arrow::read_parquet(model_attr_path) diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 2423cad..dff9218 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -1,11 +1,15 @@ + +########################################################################################################################################### +################################################. REDO EVERYTHING ####################################################### +########################################################################################################################################### + # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") # # load libraries -# library(hydrofabric3D) -# # library(terrainSliceR) -# library(dplyr) -# library(sf) +library(hydrofabric3D) +library(dplyr) +library(sf) # cross section bucket prefix cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/") @@ -19,6 +23,7 @@ nextgen_files <- list.files(nextgen_dir, full.names = FALSE) # paths to nextgen datasets transect_files <- list.files(transects_dir, full.names = FALSE) +transect_files <- transect_files[!grepl("updated_", transect_files)] # string to fill in "cs_source" column in output datasets cs_source <- "hydrofabric3D" @@ -31,20 +36,20 @@ ref_df <- data.frame( # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, - y = transect_files, - base = base_dir - ) %>% - dplyr::left_join( - ref_df, - by = "vpu" - ) + x = nextgen_files, + y = transect_files, + base = base_dir +) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 1:nrow(path_df)) { - +for (i in 20:nrow(path_df)) { + start <- Sys.time() # nextgen file and full path @@ -67,386 +72,319 @@ for (i in 1:nrow(path_df)) { "'\n - transects: '", transect_file, "'", "\n - waterbodies: '", ref_file, "'", "'\n - start time: '", start, "'" - ) - + ) + ################### - # read in transects data - transects <- sf::read_sf(transect_path) + # read in transects data + transects <- sf::read_sf(transect_path) - # read in nextgen data - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # read in waterbodies reference features layer - waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") - - ##### subset flowlines and transects to first 5 features for testing ##### - # flines = dplyr::slice(flines, 1:5) - # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) - ##### ##### - - # system.time({ - # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies - feature_subsets <- wb_intersects(flines, transects, waterbodies) - # }) - - # Collect meta data on features and changes - if(COLLECT_META) { - - fline_count <- nrow(flines) - transect_count <- nrow(transects) - wb_count <- nrow(waterbodies) - - fline_wb_count <- sum(feature_subsets$valid_flowlines) - transect_wb_count <- sum(feature_subsets$valid_transects) - } - - # replace flowlines and transects objects with updated versions in "updated_features" - flines <- flines[feature_subsets$valid_flowlines, ] - transects <- transects[feature_subsets$valid_transects, ] - - rm(waterbodies) - gc() - - start_cs_pts <- Sys.time() - message("Extracting cross section points (", start_cs_pts,")") - - system.time({ + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + ##### subset flowlines and transects to first 5 features for testing ##### + # flines = dplyr::slice(flines, 1:5) + # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) + ##### ##### + # system.time({ + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + # }) + + # tmp <- transects %>% + # dplyr::slice(1:15000) + # tmp_flines <- flines %>% + # dplyr::filter(id %in% tmp$hy_id) + # mapview::mapview(tmp_flines, color = "dodgerblue") + mapview::mapview(tmp, color = "red") + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] + + rm(waterbodies) + gc() + + ################################################################# + ##### Temporary subsetting to speed local development up ######## + ################################################################# + + # flines <- + # flines %>% + # dplyr::group_by(order) %>% + # dplyr::slice(1:100) %>% + # dplyr::ungroup() + # # flines <- + # # flines %>% + # # dplyr::slice(1:2500) + # + # transects <- + # transects %>% + # # dplyr::filter(hy_id %in% unique(tmp$id)) + # dplyr::filter(hy_id %in% unique(flines$id)) + + ################################################################# + ################################################################# + + start_cs_pts <- Sys.time() + message("Extracting cross section points (", start_cs_pts, ")") + # message("Extracting cross section points (", Sys.time(),")") + + # STEP 1: Extract cs points from DEM + # system.time({ + # get cross section point elevations cs_pts <- hydrofabric3D::cross_section_pts( cs = transects, points_per_cs = NULL, min_pts_per_cs = 10, dem = DEM_URL - ) - - }) - - end_cs_pts <- Sys.time() - message("\n ---> Completed extraction of cross section points (", end_cs_pts,")") - - if(COLLECT_META) { - start_cs_pts_count <- nrow(cs_pts) - } - - # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2) - # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time) - - start_rectify <- Sys.time() - message("Rectifying cross section points (", start_rectify,")") - - # collect the hy_ids and number of stream orders in cs_pts - if(COLLECT_META) { - - cs_pts_ids <- unique(cs_pts$hy_id) - start_cs_pts_ids <- length(cs_pts_ids) - - start_order_count <- - flines %>% - sf::st_drop_geometry() %>% - dplyr::filter(id %in% cs_pts_ids) %>% - dplyr::group_by(order) %>% - dplyr::count() %>% - tidyr::pivot_wider(names_from = order, - names_glue = "start_order_{order}", - values_from = n - ) %>% - dplyr::ungroup() - } - - # Remove any cross section that has ANY missing (NA) Z values. - cs_pts <- - cs_pts %>% - # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::filter(!any(is.na(Z))) %>% - dplyr::ungroup() - # try to extend any cross sections that returned cross section points with - # identical Z values within a certain threshold ("flat" cross sections) + ) - system.time({ + # }) + + # system.time({ + + # STEP 2: + # Remove any cross section that has ANY missing (NA) Z values, and classify the points + cs_pts <- + cs_pts %>% + # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% + dplyr::group_by(hy_id, cs_id) %>% + dplyr::filter(!any(is.na(Z))) %>% + dplyr::ungroup() %>% + hydrofabric3D::classify_points(pct_of_length_for_relief = 0.01) + + # }) + + # STEP 3: Try to rectify any no relief and invalid banks cross sections + system.time({ # cs_pts <- hydrofabric3D::rectify_flat_cs( - fixed_pts <- hydrofabric3D::rectify_flat_cs( - cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() - net = flines, # original flowline network - cs = transects, # original transect lines - points_per_cs = NULL, - min_pts_per_cs = 10, # number of points per cross sections - dem = DEM_URL, # DEM to extract points from - scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked - threshold = 1, # 1 meter from bottom - pct_threshold = 0.99, # rectify if 99% points are within 1 meter from the bottom - fix_ids = FALSE - ) - }) - - - end_rectify <- Sys.time() - rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2) - - message("\n ---> Completed rectifying cross section points (", end_rectify,")") - - if(COLLECT_META) { - rectify_cs_pts_count <- nrow(fixed_pts) - # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts - rectify_cs_pts_ids <- unique(fixed_pts$hy_id) - rectify_cs_pts_id_count <- length(rectify_cs_pts_ids) - - rectify_order_count <- - flines %>% - sf::st_drop_geometry() %>% - dplyr::filter(id %in% rectify_cs_pts_ids) %>% - dplyr::group_by(order) %>% - dplyr::count() %>% - tidyr::pivot_wider(names_from = order, - names_glue = "rectify_order_{order}", - values_from = n - ) %>% - dplyr::ungroup() - } - - rm(cs_pts) - gc() - - message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ") - - # Remove any cross section that has ANY missing (NA) Z values. - fixed_pts <- - fixed_pts %>% - # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% - # dplyr::group_by(hy_id, cs_id) %>% - # dplyr::filter(!any(is.na(Z))) %>% - # dplyr::ungroup() %>% - hydrofabric3D::add_tmp_id() - - # Number of cross section points after removing any cross sections that contain any NA Z values - if(COLLECT_META) { - cs_pts_na_removed_count <- nrow(fixed_pts) - } - - # Stash meta data about the points - pts_meta <- - fixed_pts %>% - sf::st_drop_geometry() %>% - dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended) - - message("Classifying cross section points...") - - # Classify points - fixed_pts <- hydrofabric3D::classify_points(fixed_pts) + fixed_pts <- hydrofabric3D::rectify_cs( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + transects = transects, # original transect lines + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_URL, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) + }) + # }) + + # get a summary dataframe and print out details message + rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE) + rectify_summary <- + rectify_summary %>% + dplyr::mutate( + vpu = VPU + ) %>% + dplyr::relocate(vpu, metric, value) + + readr::write_csv(rectify_summary, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) + + # get the counts of each point type to add this data to the transects dataset + point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) + + # # check the number of cross sections that were extended + # fixed_pts$is_extended %>% table() + message("Subsetting cross section points generated after extending transects...") + + # extract cross section points that have an "is_extended" value of TRUE + extended_pts <- + fixed_pts %>% + dplyr::filter(is_extended) %>% + hydrofabric3D::add_tmp_id() + # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) + + # extract transects that have a "hy_id" in the "extended_pts" dataset + update_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) + + # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages + if (nrow(update_transects) > 0) { + message("Updating ", nrow(update_transects), " transects") - # add meta data back to the points - fixed_pts <- - fixed_pts %>% - dplyr::left_join( - pts_meta, - by = c("hy_id", "cs_id", "pt_id") - # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended) + update_transects <- + update_transects %>% + # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% + # apply extend_by_percent function to each transect line: + hydrofabric3D:::extend_by_percent( + pct = EXTENSION_PCT, + length_col = "cs_lengthm" ) - message("Gathering count of point types per cross section...") - - # get the counts of each point type to add this data to the transects dataset - point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) - - # # check the number of cross sections that were extended - # fixed_pts$is_extended %>% table() - message("Subsetting cross section points generated after extending transects...") + # # Number of transects being updated + # if(COLLECT_META) { + # extended_transects_count <- nrow(update_transects) + # extended_transects_ids <- length(unique(update_transects$tmp_id)) + # } - # extract cross section points that have an "is_extended" value of TRUE - extended_pts <- - fixed_pts %>% - dplyr::filter(is_extended) %>% - hydrofabric3D::add_tmp_id() - # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) - - # extract transects that have a "hy_id" in the "extended_pts" dataset - update_transects <- + # start_uids <- hydrofabric3D:::get_unique_tmp_ids(cs_pts) + # end_uids <- hydrofabric3D:::get_unique_tmp_ids(fixed_pts) + # removed_tmp_ids <- start_uids[!start_uids %in% end_uids] + # transects %>% + # hydrofabric3D::add_tmp_id() %>% + # dplyr::filter(!tmp_id %in% removed_tmp_ids) + + # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() + # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") + # and then replace with old transects with the "update_transects" + out_transects <- transects %>% hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) - - # Number of cross section points generated from extending transects and number of tmpIDs - if(COLLECT_META) { - extended_pts_count <- nrow(extended_pts) - extended_pts_ids <- length(unique(extended_pts$tmp_id)) - extended_transects_count <- nrow(update_transects) - extended_transects_ids <- length(unique(update_transects$tmp_id)) - } - - # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages - if (nrow(update_transects) > 0) { - message("Updating ", nrow(update_transects), " transects") - - update_transects <- - update_transects %>% - # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% - # apply extend_by_percent function to each transect line: - hydrofabric3D:::extend_by_percent( - pct = EXTENSION_PCT, - length_col = "cs_lengthm" + # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids + dplyr::bind_rows( # bring in the new updated extended transects + dplyr::mutate( + update_transects, + is_extended = TRUE ) - - # # Number of transects being updated - # if(COLLECT_META) { - # extended_transects_count <- nrow(update_transects) - # extended_transects_ids <- length(unique(update_transects$tmp_id)) - # } - - # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() - # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") - # and then replace with old transects with the "update_transects" - out_transects <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% - # dplyr::filter(!tmp_id %in% ) - dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% - dplyr::bind_rows( - dplyr::mutate( - update_transects, - is_extended = TRUE - ) - ) - - # dplyr::mutate(is_extended = FALSE) %>% - # dplyr::bind_rows( - # dplyr::mutate(update_transects, is_extended = TRUE) - # ) %>% - # dplyr::select(-tmp_id) - - } else { - - out_transects <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) - } + ) - # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) - if(COLLECT_META) { - output_transects_count <- nrow(out_transects) - output_transects_ids <- length(unique(out_transects$tmp_id)) - } + } else { - # finalize new transects - out_transects <- - out_transects %>% - dplyr::left_join( - point_type_counts, - by = c("hy_id", "cs_id") - ) %>% - dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, - # sinuosity, - is_extended, - left_bank_count, right_bank_count, channel_count, bottom_count, - geom) - - # ------------------------------------------------------------------- - # ---- Re enumerate the transects & cross section points "cs_id" ---- - # ------------------------------------------------------------------- - - # make a dataframe that has a new_cs_id column that has - # the cs_id renumbered to fill in any missing IDs, - # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id - # The dataframe below will be used to join the "new_cs_id" with - # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets - renumbered_ids <- - fixed_pts %>% - sf::st_drop_geometry() %>% - # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% - dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::group_by(hy_id) %>% - dplyr::mutate( - new_cs_id = 1:dplyr::n() - ) %>% - dplyr::ungroup() %>% - dplyr::select(new_cs_id, tmp_id) - - # Renumber the transects to have correct CS IDs - out_transects <- dplyr::left_join( - hydrofabric3D::add_tmp_id(out_transects), - renumbered_ids, - by = "tmp_id" - ) %>% - dplyr::select(-cs_id, -tmp_id) %>% - dplyr::select(hy_id, cs_source, cs_id = new_cs_id, - cs_measure, cs_lengthm, - # sinuosity, - is_extended, - left_bank_count, right_bank_count, channel_count, bottom_count, - geometry = geom - ) - - # # fline_lengths <- sf::st_drop_geometry(flines) %>% - # # dplyr::filter(id %in% out_transects$hy_id) %>% - # # dplyr::mutate(lengthm = lengthkm * 1000) %>% - # # dplyr::select(hy_id = id, lengthm, lengthkm) - # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% - # dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% - # dplyr::select(-sinuosity) %>% - # dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% - # dplyr::rename("geometry" = geom) - - # Renumber the cross sections points to have correct CS IDs - fixed_pts <- dplyr::left_join( - hydrofabric3D::add_tmp_id(fixed_pts), - renumbered_ids, - by = "tmp_id" - ) %>% - dplyr::select(-cs_id, -tmp_id) %>% - dplyr::rename(cs_id = new_cs_id) - - # mapview::mapview(transects, color = "red") + - # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + - # mapview::mapview(flines, color = "dodgerblue") - - ###################################### - - # ---------------------------------------------------------- - # ---- Cross section points parquet to S3 ---- - # ---------------------------------------------------------- - - # classify the cross section points - fixed_pts <- - fixed_pts %>% - dplyr::mutate( - X = sf::st_coordinates(.)[,1], - Y = sf::st_coordinates(.)[,2] - ) %>% - sf::st_drop_geometry() %>% - dplyr::select( - hy_id, cs_id, pt_id, - cs_lengthm, - relative_distance, - X, Y, Z, - class, point_type - ) - - # Drop point geometries, leaving just X, Y, Z values - fixed_pts <- sf::st_drop_geometry(fixed_pts) - + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) + } + + # finalize new transects + out_transects <- + out_transects %>% + dplyr::left_join( + point_type_counts, + by = c("hy_id", "cs_id") + ) %>% + dplyr::left_join( + dplyr::ungroup( + dplyr::slice( + dplyr::group_by( + dplyr::select(sf::st_drop_geometry(fixed_pts), + hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), + hy_id, cs_id), + 1) + ), + by = c("hy_id", "cs_id") + ) %>% + dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geom + ) + + # ------------------------------------------------------------------- + # ---- Re enumerate the transects & cross section points "cs_id" ---- + # ------------------------------------------------------------------- + + # make a dataframe that has a new_cs_id column that has + # the cs_id renumbered to fill in any missing IDs, + # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id + # The dataframe below will be used to join the "new_cs_id" with + # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets + renumbered_ids <- + fixed_pts %>% + sf::st_drop_geometry() %>% + # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% + dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% + dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::group_by(hy_id) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::select(new_cs_id, tmp_id) + + # Renumber the transects to have correct CS IDs + out_transects <- dplyr::left_join( + hydrofabric3D::add_tmp_id(out_transects), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::select(hy_id, cs_source, cs_id = new_cs_id, + cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geometry = geom + ) + + # # fline_lengths <- sf::st_drop_geometry(flines) %>% + # # dplyr::filter(id %in% out_transects$hy_id) %>% + # # dplyr::mutate(lengthm = lengthkm * 1000) %>% + # # dplyr::select(hy_id = id, lengthm, lengthkm) + # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% + # dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% + # dplyr::select(-sinuosity) %>% + # dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% + # dplyr::rename("geometry" = geom) + + # Renumber the cross sections points to have correct CS IDs + fixed_pts <- dplyr::left_join( + hydrofabric3D::add_tmp_id(fixed_pts), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::rename(cs_id = new_cs_id) + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + + # mapview::mapview(flines, color = "dodgerblue") + + ###################################### + + # ---------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------- + + # classify the cross section points + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + # # Drop point geometries, leaving just X, Y, Z values + # fixed_pts <- sf::st_drop_geometry(fixed_pts) + # add Z_source column for source of elevation data fixed_pts <- fixed_pts %>% dplyr::mutate( Z_source = cs_source - ) %>% - dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class) - - # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) - if(COLLECT_META) { - output_cs_pts_count <- nrow(fixed_pts) - output_cs_pts_ids <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) - dropped_transects_count <- transect_count - output_transects_count - } + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) ###################################### @@ -464,7 +402,7 @@ for (i in 1:nrow(path_df)) { # transect_path updated_path ) - + # command to copy transects geopackage to S3 trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) @@ -493,11 +431,11 @@ for (i in 1:nrow(path_df)) { # command to copy cross section points parquet to S3 copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) + message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), "'\n==========================") system(copy_cs_pts_to_s3, intern = TRUE) @@ -508,43 +446,618 @@ for (i in 1:nrow(path_df)) { message("- Completed at: ", end) message("==========================") - if(COLLECT_META) { - - meta_df <- data.frame( - vpu = VPU, - start = as.character(start), - end = as.character(end), - start_cs_pts = as.character(start_cs_pts), - end_cs_pts = as.character(end_cs_pts), - start_rectify = as.character(start_rectify), - end_rectify = as.character(end_rectify), - fline_count = fline_count, - transect_count = transect_count, - wb_count = wb_count, - fline_wb_count = fline_wb_count, - transect_wb_count = transect_wb_count, - start_cs_pts_count = start_cs_pts_count, - start_cs_pts_ids = start_cs_pts_ids, - rectify_cs_pts_count = rectify_cs_pts_count, - rectify_cs_pts_ids = rectify_cs_pts_id_count, - extended_transects_count = extended_transects_count, - extended_transects_ids = extended_transects_ids, - dropped_transects = dropped_transects_count, - output_transects_count = output_transects_count, - output_cs_pts_count = output_cs_pts_count, - output_transects_ids = output_transects_ids, - output_cs_pts_ids = output_cs_pts_ids - ) - - order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count) - - readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) - readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv")) - } - rm(fixed_pts) gc() gc() - } - +} +# +# transects +# flines +# ########################################################################################################################################### +# ########################################################################################################################################### +# +# # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU +# source("runners/cs_runner/config.R") +# +# # # load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) +# +# # cross section bucket prefix +# cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/") +# # cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") +# +# # transect bucket prefix +# transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") +# +# # paths to nextgen datasets +# nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +# +# # paths to nextgen datasets +# transect_files <- list.files(transects_dir, full.names = FALSE) +# +# # string to fill in "cs_source" column in output datasets +# cs_source <- "hydrofabric3D" +# +# # reference features dataframe +# ref_df <- data.frame( +# vpu = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), +# ref_file = ref_features +# ) +# +# # ensure the files are in the same order and matched up by VPU +# path_df <- align_files_by_vpu( +# x = nextgen_files, +# y = transect_files, +# base = base_dir +# ) %>% +# dplyr::left_join( +# ref_df, +# by = "vpu" +# ) +# +# # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# # Save parquet locally and upload to specified S3 bucket +# for (i in 1:nrow(path_df)) { +# +# # i = 8 +# +# start <- Sys.time() +# +# # nextgen file and full path +# nextgen_file <- path_df$x[i] +# nextgen_path <- paste0(nextgen_dir, nextgen_file) +# +# # model attributes file and full path +# transect_file <- path_df$y[i] +# transect_path <- paste0(transects_dir, transect_file) +# +# # model attributes file and full path +# ref_file <- path_df$ref_file[i] +# ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) +# +# # current VPU being processed +# VPU = path_df$vpu[i] +# +# message("Creating VPU ", VPU, +# " cross section points:\n - flowpaths: '", nextgen_file, +# "'\n - transects: '", transect_file, "'", +# "\n - waterbodies: '", ref_file, "'", +# "'\n - start time: '", start, "'" +# ) +# +# ################### +# +# # read in transects data +# transects <- sf::read_sf(transect_path) +# +# # read in nextgen data +# flines <- sf::read_sf(nextgen_path, layer = "flowpaths") +# +# # read in waterbodies reference features layer +# waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") +# +# ##### subset flowlines and transects to first 5 features for testing ##### +# # flines = dplyr::slice(flines, 1:5) +# # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) +# ##### ##### +# +# # system.time({ +# # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# feature_subsets <- wb_intersects(flines, transects, waterbodies) +# # }) +# +# # Collect meta data on features and changes +# if(COLLECT_META) { +# +# fline_count <- nrow(flines) +# transect_count <- nrow(transects) +# wb_count <- nrow(waterbodies) +# +# fline_wb_count <- sum(feature_subsets$valid_flowlines) +# transect_wb_count <- sum(feature_subsets$valid_transects) +# } +# +# # replace flowlines and transects objects with updated versions in "updated_features" +# flines <- flines[feature_subsets$valid_flowlines, ] +# transects <- transects[feature_subsets$valid_transects, ] +# + # rm(waterbodies) + # gc() +# +# flines <- +# flines %>% +# dplyr::group_by(order) %>% +# dplyr::slice(1:100) %>% +# dplyr::ungroup() +# # flines <- +# # flines %>% +# # dplyr::slice(1:2500) +# +# transects <- +# transects %>% +# # dplyr::filter(hy_id %in% unique(tmp$id)) +# dplyr::filter(hy_id %in% unique(flines$id)) +# +# +# start_cs_pts <- Sys.time() +# +# message("Extracting cross section points (", start_cs_pts,")") +# +# system.time({ +# +# # get cross section point elevations +# cs_pts <- hydrofabric3D::cross_section_pts( +# cs = transects, +# points_per_cs = NULL, +# min_pts_per_cs = 10, +# dem = DEM_URL +# ) +# +# }) +# +# # Remove any cross section that has ANY missing (NA) Z values. +# cs_pts2 <- +# cs_pts %>% +# # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% +# dplyr::group_by(hy_id, cs_id) %>% +# dplyr::filter(!any(is.na(Z))) %>% +# dplyr::ungroup() +# +# # Stash meta data about the points +# pts_meta <- +# cs_pts2 %>% +# sf::st_drop_geometry() %>% +# dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended) +# +# # classify the points +# cs_pts2 <- hydrofabric3D::classify_points(cs_pts2, pct_of_length_for_relief = 0.01) +# +# system.time({ +# # cs_pts <- hydrofabric3D::rectify_flat_cs( +# fixed_pts <- hydrofabric3D::rectify_cs( +# cs_pts = cs_pts2, # cross section points generated from hydrofabric3D::cross_section_pts() +# net = flines, # original flowline network +# transects = transects, # original transect lines +# points_per_cs = NULL, +# min_pts_per_cs = 10, # number of points per cross sections +# dem = DEM_URL, # DEM to extract points from +# scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked +# pct_of_length_for_relief = 0.01, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" +# fix_ids = FALSE, +# verbose = TRUE +# ) +# }) +# +# rectify_summary <- hydrofabric3D::rectify_summary(cs_pts2, fixed_pts) +# +# end_cs_pts <- Sys.time() +# message("\n ---> Completed extraction of cross section points (", end_cs_pts,")") +# +# if(COLLECT_META) { +# start_cs_pts_count <- nrow(cs_pts) +# } +# +# # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2) +# # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time) +# +# start_rectify <- Sys.time() +# message("Rectifying cross section points (", start_rectify,")") +# +# # collect the hy_ids and number of stream orders in cs_pts +# if(COLLECT_META) { +# +# cs_pts_ids <- unique(cs_pts$hy_id) +# start_cs_pts_ids <- length(cs_pts_ids) +# +# start_order_count <- +# flines %>% +# sf::st_drop_geometry() %>% +# dplyr::filter(id %in% cs_pts_ids) %>% +# dplyr::group_by(order) %>% +# dplyr::count() %>% +# tidyr::pivot_wider(names_from = order, +# names_glue = "start_order_{order}", +# values_from = n +# ) %>% +# dplyr::ungroup() +# } +# +# # Remove any cross section that has ANY missing (NA) Z values. +# cs_pts <- +# cs_pts %>% +# # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% +# dplyr::group_by(hy_id, cs_id) %>% +# dplyr::filter(!any(is.na(Z))) %>% +# dplyr::ungroup() +# # try to extend any cross sections that returned cross section points with +# # identical Z values within a certain threshold ("flat" cross sections) +# +# system.time({ +# # cs_pts <- hydrofabric3D::rectify_flat_cs( +# fixed_pts <- hydrofabric3D::rectify_flat_cs( +# cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() +# net = flines, # original flowline network +# cs = transects, # original transect lines +# points_per_cs = NULL, +# min_pts_per_cs = 10, # number of points per cross sections +# dem = DEM_URL, # DEM to extract points from +# scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked +# threshold = 1, # 1 meter from bottom +# pct_threshold = 0.99, # rectify if 99% points are within 1 meter from the bottom +# fix_ids = FALSE +# ) +# }) +# +# +# end_rectify <- Sys.time() +# rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2) +# +# message("\n ---> Completed rectifying cross section points (", end_rectify,")") +# +# if(COLLECT_META) { +# rectify_cs_pts_count <- nrow(fixed_pts) +# # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts +# rectify_cs_pts_ids <- unique(fixed_pts$hy_id) +# rectify_cs_pts_id_count <- length(rectify_cs_pts_ids) +# +# rectify_order_count <- +# flines %>% +# sf::st_drop_geometry() %>% +# dplyr::filter(id %in% rectify_cs_pts_ids) %>% +# dplyr::group_by(order) %>% +# dplyr::count() %>% +# tidyr::pivot_wider(names_from = order, +# names_glue = "rectify_order_{order}", +# values_from = n +# ) %>% +# dplyr::ungroup() +# } +# +# rm(cs_pts) +# gc() +# +# message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ") +# +# # Remove any cross section that has ANY missing (NA) Z values. +# fixed_pts <- +# fixed_pts %>% +# # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% +# # dplyr::group_by(hy_id, cs_id) %>% +# # dplyr::filter(!any(is.na(Z))) %>% +# # dplyr::ungroup() %>% +# hydrofabric3D::add_tmp_id() +# +# # Number of cross section points after removing any cross sections that contain any NA Z values +# if(COLLECT_META) { +# cs_pts_na_removed_count <- nrow(fixed_pts) +# } +# +# # Stash meta data about the points +# pts_meta <- +# fixed_pts %>% +# sf::st_drop_geometry() %>% +# dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended) +# +# message("Classifying cross section points...") +# +# # # Classify points +# # fixed_pts <- hydrofabric3D::classify_points(fixed_pts) +# +# # add meta data back to the points +# fixed_pts <- +# fixed_pts %>% +# dplyr::select(-is_extended) %>% +# dplyr::left_join( +# pts_meta, +# # dplyr::select(pts_meta, -is_extended), +# by = c("hy_id", "cs_id", "pt_id") +# # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended) +# ) +# +# message("Gathering count of point types per cross section...") +# +# # get the counts of each point type to add this data to the transects dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) +# +# # # check the number of cross sections that were extended +# # fixed_pts$is_extended %>% table() +# message("Subsetting cross section points generated after extending transects...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id() +# # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) +# +# # extract transects that have a "hy_id" in the "extended_pts" dataset +# update_transects <- +# transects %>% +# hydrofabric3D::add_tmp_id() %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# # Number of cross section points generated from extending transects and number of tmpIDs +# if(COLLECT_META) { +# extended_pts_count <- nrow(extended_pts) +# extended_pts_ids <- length(unique(extended_pts$tmp_id)) +# extended_transects_count <- nrow(update_transects) +# extended_transects_ids <- length(unique(update_transects$tmp_id)) +# } +# +# # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages +# if (nrow(update_transects) > 0) { +# message("Updating ", nrow(update_transects), " transects") +# +# update_transects <- +# update_transects %>% +# # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# pct = EXTENSION_PCT, +# length_col = "cs_lengthm" +# ) +# +# # # Number of transects being updated +# # if(COLLECT_META) { +# # extended_transects_count <- nrow(update_transects) +# # extended_transects_ids <- length(unique(update_transects$tmp_id)) +# # } +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") +# # and then replace with old transects with the "update_transects" +# out_transects <- +# transects %>% +# hydrofabric3D::add_tmp_id() %>% +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(!tmp_id %in% ) +# dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate( +# update_transects, +# is_extended = TRUE +# ) +# ) +# +# # dplyr::mutate(is_extended = FALSE) %>% +# # dplyr::bind_rows( +# # dplyr::mutate(update_transects, is_extended = TRUE) +# # ) %>% +# # dplyr::select(-tmp_id) +# +# } else { +# +# out_transects <- +# transects %>% +# hydrofabric3D::add_tmp_id() %>% +# dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) +# if(COLLECT_META) { +# output_transects_count <- nrow(out_transects) +# output_transects_ids <- length(unique(out_transects$tmp_id)) +# } +# +# # finalize new transects +# out_transects <- +# out_transects %>% +# dplyr::left_join( +# point_type_counts, +# by = c("hy_id", "cs_id") +# ) %>% +# dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# geom) +# +# # ------------------------------------------------------------------- +# # ---- Re enumerate the transects & cross section points "cs_id" ---- +# # ------------------------------------------------------------------- +# +# # make a dataframe that has a new_cs_id column that has +# # the cs_id renumbered to fill in any missing IDs, +# # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id +# # The dataframe below will be used to join the "new_cs_id" with +# # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets +# renumbered_ids <- +# fixed_pts %>% +# sf::st_drop_geometry() %>% +# # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% +# dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% +# dplyr::group_by(hy_id, cs_id) %>% +# dplyr::slice(1) %>% +# dplyr::ungroup() %>% +# hydrofabric3D::add_tmp_id() %>% +# dplyr::group_by(hy_id) %>% +# dplyr::mutate( +# new_cs_id = 1:dplyr::n() +# ) %>% +# dplyr::ungroup() %>% +# dplyr::select(new_cs_id, tmp_id) +# +# # Renumber the transects to have correct CS IDs +# out_transects <- dplyr::left_join( +# hydrofabric3D::add_tmp_id(out_transects), +# renumbered_ids, +# by = "tmp_id" +# ) %>% +# dplyr::select(-cs_id, -tmp_id) %>% +# dplyr::select(hy_id, cs_source, cs_id = new_cs_id, +# cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# geometry = geom +# ) +# +# # # fline_lengths <- sf::st_drop_geometry(flines) %>% +# # # dplyr::filter(id %in% out_transects$hy_id) %>% +# # # dplyr::mutate(lengthm = lengthkm * 1000) %>% +# # # dplyr::select(hy_id = id, lengthm, lengthkm) +# # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% +# # dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% +# # dplyr::select(-sinuosity) %>% +# # dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% +# # dplyr::rename("geometry" = geom) +# +# # Renumber the cross sections points to have correct CS IDs +# fixed_pts <- dplyr::left_join( +# hydrofabric3D::add_tmp_id(fixed_pts), +# renumbered_ids, +# by = "tmp_id" +# ) %>% +# dplyr::select(-cs_id, -tmp_id) %>% +# dplyr::rename(cs_id = new_cs_id) +# +# # mapview::mapview(transects, color = "red") + +# # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + +# # mapview::mapview(flines, color = "dodgerblue") +# +# ###################################### +# +# # ---------------------------------------------------------- +# # ---- Cross section points parquet to S3 ---- +# # ---------------------------------------------------------- +# +# # classify the cross section points +# fixed_pts <- +# fixed_pts %>% +# dplyr::mutate( +# X = sf::st_coordinates(.)[,1], +# Y = sf::st_coordinates(.)[,2] +# ) %>% +# sf::st_drop_geometry() %>% +# dplyr::select( +# hy_id, cs_id, pt_id, +# cs_lengthm, +# relative_distance, +# X, Y, Z, +# class, point_type +# ) +# +# # Drop point geometries, leaving just X, Y, Z values +# fixed_pts <- sf::st_drop_geometry(fixed_pts) +# +# # add Z_source column for source of elevation data +# fixed_pts <- +# fixed_pts %>% +# dplyr::mutate( +# Z_source = cs_source +# ) %>% +# dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class) +# +# # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) +# if(COLLECT_META) { +# output_cs_pts_count <- nrow(fixed_pts) +# output_cs_pts_ids <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) +# dropped_transects_count <- transect_count - output_transects_count +# } +# +# ###################################### +# +# # ---------------------------------------------------------- +# # ---- Re upload the updated transects geopackage to S3 ---- +# # ---------------------------------------------------------- +# updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path) +# +# ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections +# message("Saving updated transects to:\n - filepath: '", updated_path, "'") +# +# # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg) +# sf::write_sf( +# out_transects, +# # transect_path +# updated_path +# ) +# +# # command to copy transects geopackage to S3 +# trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, +# ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) +# +# message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", +# trans_to_s3, +# "'\n==========================") +# +# system(trans_to_s3, intern = TRUE) +# +# ###################################### +# ###################################### +# +# # ---------------------------------------------------------- +# # ---- Upload the cross section points parquet to S3 ---- +# # ---------------------------------------------------------- +# +# # name of file and path to save transects gpkg too +# out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") +# out_path <- paste0(cs_pts_dir, out_file) +# +# message("Saving cross section points to:\n - filepath: '", out_path, "'") +# +# # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) +# arrow::write_parquet(fixed_pts, out_path) +# +# # command to copy cross section points parquet to S3 +# copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, +# ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) +# +# message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", +# paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, +# ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), +# "'\n==========================") +# +# system(copy_cs_pts_to_s3, intern = TRUE) +# +# end <- Sys.time() +# +# message("Finished cross section point generation for VPU ", VPU) +# message("- Completed at: ", end) +# message("==========================") +# +# if(COLLECT_META) { +# +# meta_df <- data.frame( +# vpu = VPU, +# start = as.character(start), +# end = as.character(end), +# start_cs_pts = as.character(start_cs_pts), +# end_cs_pts = as.character(end_cs_pts), +# start_rectify = as.character(start_rectify), +# end_rectify = as.character(end_rectify), +# fline_count = fline_count, +# transect_count = transect_count, +# wb_count = wb_count, +# fline_wb_count = fline_wb_count, +# transect_wb_count = transect_wb_count, +# start_cs_pts_count = start_cs_pts_count, +# start_cs_pts_ids = start_cs_pts_ids, +# rectify_cs_pts_count = rectify_cs_pts_count, +# rectify_cs_pts_ids = rectify_cs_pts_id_count, +# extended_transects_count = extended_transects_count, +# extended_transects_ids = extended_transects_ids, +# dropped_transects = dropped_transects_count, +# output_transects_count = output_transects_count, +# output_cs_pts_count = output_cs_pts_count, +# output_transects_ids = output_transects_ids, +# output_cs_pts_ids = output_cs_pts_ids +# ) +# +# order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count) +# +# readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) +# readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv")) +# } +# + # rm(fixed_pts) + # gc() + # gc() +# +# } +# diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 8745b7a..8e345d8 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -7,6 +7,14 @@ aws_profile <- "angus-lynker" # name of S3 bucket s3_bucket <- "s3://lynker-spatial/" +# location of FEMA 100 year flood plain FGB files +FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +FEMA_S3_BUCKET_PREFIX <- "FEMA100/" +FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX) + +# FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100") + # DEM URL DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" @@ -15,6 +23,11 @@ DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevat # Default setting is 50% of the original transect lines length (0.5) EXTENSION_PCT <- 0.5 +# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not +# 1% of the cross sections length is the default value we are using +# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") +PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF = 0.01 + # Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R) COLLECT_META <- TRUE @@ -24,6 +37,7 @@ META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' # META_PATH <- "/local/path/to/save/cross_section_meta_data/" + # # create the directory if it does NOT exist # if(!dir.exists(base_dir)) { # message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}')) diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R new file mode 100644 index 0000000..f55e8d7 --- /dev/null +++ b/runners/cs_runner/download_fema100.R @@ -0,0 +1,50 @@ +# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "base_dir" +# base_dir is defined within runners/workflow/root_dir.R + +# NOTE: The lynker-hydrofabric S3 bucket is private at the moment + +# load config variables +source("runners/cs_runner/config_vars.R") + +# create FEMA100/ directory if it does NOT exist +if (!dir.exists(FEMA_FGB_PATH)) { + message(glue::glue('FEMA100/ directory does not exist...\nCreating directory: {FEMA_FGB_PATH}')) + dir.create(FEMA_FGB_PATH) +} + + +# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +fema_list_command <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', FEMA_S3_DIR, '" + + # Regular expression pattern to match object keys + PATTERN=".fgb$" + + # AWS CLI command to list objects in the S3 bucket and use grep to filter them + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', aws_profile, ' | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# ---- Get nextgen geopackages ---- +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) + +# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, FEMA_BUCKET_KEYS) + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (key in FEMA_BUCKET_KEYS) { + local_save_path <- paste0(FEMA_FGB_PATH, "/", key) + + if(!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path) + + message("Copying S3 object:\n", local_save_path) + + # system(copy_cmd) + + message("Download '", key, "' complete!") + message("------------------") + } +} \ No newline at end of file From 28be701d17830d0090a069ba82afb78b39e80181 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 21 Mar 2024 08:04:17 -0700 Subject: [PATCH 09/64] cleaned up 01_transects and 02_cs_pts runners to reflect newest methods in hydrofabric3D transect and cross section point generation --- runners/cs_runner/01_transects.R | 73 ++- runners/cs_runner/02_cs_pts.R | 791 +++---------------------------- 2 files changed, 84 insertions(+), 780 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 912d95d..9c081e8 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -43,7 +43,7 @@ for(i in 1:nrow(path_df)) { # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # # model attributes + # # model attributes # model_attrs <- arrow::read_parquet(model_attr_path) # # join flowlines with model atttributes @@ -60,15 +60,12 @@ for(i in 1:nrow(path_df)) { dplyr::mutate( bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) ) %>% - # dplyr::mutate( - # bf_width = 11 * eTW - # ) %>% + # dplyr::mutate( bf_width = 11 * eTW) %>% # dplyr::mutate( # if there are any NAs, use exp(0.700 + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width # bf_width = dplyr::case_when( # is.na(bf_width) ~ exp(0.700 + 0.365* log(tot_drainage_areasqkm)), # TRUE ~ bf_width - # ) - # ) %>% + # )) %>% dplyr::select( hy_id = id, lengthkm, @@ -80,29 +77,26 @@ for(i in 1:nrow(path_df)) { # flines$bf_width <- ifelse(is.na(flines$bf_width), exp(0.700 + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width) time1 <- Sys.time() - - # system.time({ - - # create transect lines - transects <- hydrofabric3D::cut_cross_sections( - net = flines, # flowlines network - id = "hy_id", # Unique feature ID - cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") - # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") - num = 10, # number of cross sections per "id" linestring ("hy_id") - smooth = TRUE, # smooth lines - densify = 3, # densify linestring points - rm_self_intersect = TRUE, # remove self intersecting transects - fix_braids = FALSE, # whether to fix braided flowlines or not - #### Arguments used for when fix_braids = TRUE - # terminal_id = NULL, - # braid_threshold = NULL, - # version = 2, - # braid_method = "comid", - # precision = 1, - add = TRUE # whether to add back the original data - ) - # }) + + # create transect lines + transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + id = "hy_id", # Unique feature ID + cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data + ) time2 <- Sys.time() time_diff <- round(as.numeric(time2 - time1 ), 2) @@ -120,24 +114,13 @@ for(i in 1:nrow(path_df)) { transects %>% dplyr::mutate( cs_source = net_source - ) %>% - dplyr::rename("cs_lengthm" = cs_widths) + ) + # dplyr::rename("cs_lengthm" = cs_widths) # # add cs_source column and keep just the desired columns to save and upload to S3 - # transects <- - # transects %>% - # dplyr::mutate( - # cs_source = net_source - # ) %>% - # dplyr::select( - # hy_id, - # cs_source, - # cs_id, - # cs_measure, - # cs_lengthm = cs_widths, - # geometry - # ) - # tmp <- sf::read_sf(out_path) + # transects <- transects %>% + # dplyr::mutate(cs_source = net_source) %>% + # dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm = cs_widths, geometry) # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg) sf::write_sf( diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index dff9218..1f6faa8 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -1,7 +1,7 @@ -########################################################################################################################################### -################################################. REDO EVERYTHING ####################################################### -########################################################################################################################################### +############################################################################################################################## +################################################ REDO EVERYTHING ####################################################### +############################################################################################################################## # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") @@ -48,7 +48,7 @@ path_df <- align_files_by_vpu( # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 20:nrow(path_df)) { +for (i in 1:nrow(path_df)) { start <- Sys.time() @@ -84,99 +84,58 @@ for (i in 20:nrow(path_df)) { # read in waterbodies reference features layer waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") - - ##### subset flowlines and transects to first 5 features for testing ##### - # flines = dplyr::slice(flines, 1:5) - # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) - ##### ##### - - # system.time({ + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies feature_subsets <- wb_intersects(flines, transects, waterbodies) - # }) - - # tmp <- transects %>% - # dplyr::slice(1:15000) - # tmp_flines <- flines %>% - # dplyr::filter(id %in% tmp$hy_id) - # mapview::mapview(tmp_flines, color = "dodgerblue") + mapview::mapview(tmp, color = "red") - + # replace flowlines and transects objects with updated versions in "updated_features" flines <- flines[feature_subsets$valid_flowlines, ] transects <- transects[feature_subsets$valid_transects, ] - rm(waterbodies) - gc() - - ################################################################# - ##### Temporary subsetting to speed local development up ######## - ################################################################# - - # flines <- - # flines %>% - # dplyr::group_by(order) %>% - # dplyr::slice(1:100) %>% - # dplyr::ungroup() - # # flines <- - # # flines %>% - # # dplyr::slice(1:2500) - # - # transects <- - # transects %>% - # # dplyr::filter(hy_id %in% unique(tmp$id)) - # dplyr::filter(hy_id %in% unique(flines$id)) - - ################################################################# - ################################################################# - start_cs_pts <- Sys.time() message("Extracting cross section points (", start_cs_pts, ")") - # message("Extracting cross section points (", Sys.time(),")") - - # STEP 1: Extract cs points from DEM - # system.time({ - - # get cross section point elevations - cs_pts <- hydrofabric3D::cross_section_pts( - cs = transects, - points_per_cs = NULL, - min_pts_per_cs = 10, - dem = DEM_URL - ) + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 1: Extract cs points from DEM ---- + # ---------------------------------------------------------------------------------------------------------------- + + # get cross section point elevations + cs_pts <- hydrofabric3D::cross_section_pts( + cs = transects, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_URL + ) - # }) + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- + # ---------------------------------------------------------------------------------------------------------------- - # system.time({ - - # STEP 2: - # Remove any cross section that has ANY missing (NA) Z values, and classify the points + # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points cs_pts <- cs_pts %>% - # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% dplyr::group_by(hy_id, cs_id) %>% dplyr::filter(!any(is.na(Z))) %>% dplyr::ungroup() %>% - hydrofabric3D::classify_points(pct_of_length_for_relief = 0.01) - - # }) - - # STEP 3: Try to rectify any no relief and invalid banks cross sections - system.time({ - # cs_pts <- hydrofabric3D::rectify_flat_cs( - fixed_pts <- hydrofabric3D::rectify_cs( - cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() - net = flines, # original flowline network - transects = transects, # original transect lines - points_per_cs = NULL, - min_pts_per_cs = 10, # number of points per cross sections - dem = DEM_URL, # DEM to extract points from - scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked - pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" - fix_ids = FALSE, - verbose = TRUE - ) - }) - # }) + hydrofabric3D::classify_points(pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- + # ---------------------------------------------------------------------------------------------------------------- + + # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more + fixed_pts <- hydrofabric3D::rectify_cs( + # cs_pts <- hydrofabric3D::rectify_flat_cs( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + transects = transects, # original transect lines + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_URL, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) # get a summary dataframe and print out details message rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE) @@ -189,6 +148,10 @@ for (i in 20:nrow(path_df)) { readr::write_csv(rectify_summary, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 4: Update transects with extended transects (if exists) ---- + # ---------------------------------------------------------------------------------------------------------------- + # get the counts of each point type to add this data to the transects dataset point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) @@ -201,7 +164,6 @@ for (i in 20:nrow(path_df)) { fixed_pts %>% dplyr::filter(is_extended) %>% hydrofabric3D::add_tmp_id() - # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) # extract transects that have a "hy_id" in the "extended_pts" dataset update_transects <- @@ -215,25 +177,11 @@ for (i in 20:nrow(path_df)) { update_transects <- update_transects %>% - # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% # apply extend_by_percent function to each transect line: hydrofabric3D:::extend_by_percent( pct = EXTENSION_PCT, length_col = "cs_lengthm" ) - - # # Number of transects being updated - # if(COLLECT_META) { - # extended_transects_count <- nrow(update_transects) - # extended_transects_ids <- length(unique(update_transects$tmp_id)) - # } - - # start_uids <- hydrofabric3D:::get_unique_tmp_ids(cs_pts) - # end_uids <- hydrofabric3D:::get_unique_tmp_ids(fixed_pts) - # removed_tmp_ids <- start_uids[!start_uids %in% end_uids] - # transects %>% - # hydrofabric3D::add_tmp_id() %>% - # dplyr::filter(!tmp_id %in% removed_tmp_ids) # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") @@ -286,9 +234,9 @@ for (i in 20:nrow(path_df)) { geom ) - # ------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # ---- Re enumerate the transects & cross section points "cs_id" ---- - # ------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # make a dataframe that has a new_cs_id column that has # the cs_id renumbered to fill in any missing IDs, @@ -327,16 +275,6 @@ for (i in 20:nrow(path_df)) { geometry = geom ) - # # fline_lengths <- sf::st_drop_geometry(flines) %>% - # # dplyr::filter(id %in% out_transects$hy_id) %>% - # # dplyr::mutate(lengthm = lengthkm * 1000) %>% - # # dplyr::select(hy_id = id, lengthm, lengthkm) - # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% - # dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% - # dplyr::select(-sinuosity) %>% - # dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% - # dplyr::rename("geometry" = geom) - # Renumber the cross sections points to have correct CS IDs fixed_pts <- dplyr::left_join( hydrofabric3D::add_tmp_id(fixed_pts), @@ -346,15 +284,11 @@ for (i in 20:nrow(path_df)) { dplyr::select(-cs_id, -tmp_id) %>% dplyr::rename(cs_id = new_cs_id) - # mapview::mapview(transects, color = "red") + - # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + - # mapview::mapview(flines, color = "dodgerblue") - ###################################### - # ---------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # ---- Cross section points parquet to S3 ---- - # ---------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # classify the cross section points fixed_pts <- @@ -386,11 +320,11 @@ for (i in 20:nrow(path_df)) { class, point_type, bottom, left_bank, right_bank, valid_banks, has_relief) - ###################################### + ############################################################################## - # ---------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # ---- Re upload the updated transects geopackage to S3 ---- - # ---------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path) ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections @@ -413,12 +347,11 @@ for (i in 20:nrow(path_df)) { system(trans_to_s3, intern = TRUE) - ###################################### - ###################################### + ############################################################################## - # ---------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # ---- Upload the cross section points parquet to S3 ---- - # ---------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") @@ -446,618 +379,6 @@ for (i in 20:nrow(path_df)) { message("- Completed at: ", end) message("==========================") - rm(fixed_pts) - gc() - gc() - } -# -# transects -# flines -# ########################################################################################################################################### + # ########################################################################################################################################### -# -# # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU -# source("runners/cs_runner/config.R") -# -# # # load libraries -# library(hydrofabric3D) -# library(dplyr) -# library(sf) -# -# # cross section bucket prefix -# cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/") -# # cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") -# -# # transect bucket prefix -# transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") -# -# # paths to nextgen datasets -# nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -# -# # paths to nextgen datasets -# transect_files <- list.files(transects_dir, full.names = FALSE) -# -# # string to fill in "cs_source" column in output datasets -# cs_source <- "hydrofabric3D" -# -# # reference features dataframe -# ref_df <- data.frame( -# vpu = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), -# ref_file = ref_features -# ) -# -# # ensure the files are in the same order and matched up by VPU -# path_df <- align_files_by_vpu( -# x = nextgen_files, -# y = transect_files, -# base = base_dir -# ) %>% -# dplyr::left_join( -# ref_df, -# by = "vpu" -# ) -# -# # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, -# # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. -# # Save parquet locally and upload to specified S3 bucket -# for (i in 1:nrow(path_df)) { -# -# # i = 8 -# -# start <- Sys.time() -# -# # nextgen file and full path -# nextgen_file <- path_df$x[i] -# nextgen_path <- paste0(nextgen_dir, nextgen_file) -# -# # model attributes file and full path -# transect_file <- path_df$y[i] -# transect_path <- paste0(transects_dir, transect_file) -# -# # model attributes file and full path -# ref_file <- path_df$ref_file[i] -# ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) -# -# # current VPU being processed -# VPU = path_df$vpu[i] -# -# message("Creating VPU ", VPU, -# " cross section points:\n - flowpaths: '", nextgen_file, -# "'\n - transects: '", transect_file, "'", -# "\n - waterbodies: '", ref_file, "'", -# "'\n - start time: '", start, "'" -# ) -# -# ################### -# -# # read in transects data -# transects <- sf::read_sf(transect_path) -# -# # read in nextgen data -# flines <- sf::read_sf(nextgen_path, layer = "flowpaths") -# -# # read in waterbodies reference features layer -# waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") -# -# ##### subset flowlines and transects to first 5 features for testing ##### -# # flines = dplyr::slice(flines, 1:5) -# # transects = dplyr::filter(transects, hy_id %in% unique(flines$id)) -# ##### ##### -# -# # system.time({ -# # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies -# feature_subsets <- wb_intersects(flines, transects, waterbodies) -# # }) -# -# # Collect meta data on features and changes -# if(COLLECT_META) { -# -# fline_count <- nrow(flines) -# transect_count <- nrow(transects) -# wb_count <- nrow(waterbodies) -# -# fline_wb_count <- sum(feature_subsets$valid_flowlines) -# transect_wb_count <- sum(feature_subsets$valid_transects) -# } -# -# # replace flowlines and transects objects with updated versions in "updated_features" -# flines <- flines[feature_subsets$valid_flowlines, ] -# transects <- transects[feature_subsets$valid_transects, ] -# - # rm(waterbodies) - # gc() -# -# flines <- -# flines %>% -# dplyr::group_by(order) %>% -# dplyr::slice(1:100) %>% -# dplyr::ungroup() -# # flines <- -# # flines %>% -# # dplyr::slice(1:2500) -# -# transects <- -# transects %>% -# # dplyr::filter(hy_id %in% unique(tmp$id)) -# dplyr::filter(hy_id %in% unique(flines$id)) -# -# -# start_cs_pts <- Sys.time() -# -# message("Extracting cross section points (", start_cs_pts,")") -# -# system.time({ -# -# # get cross section point elevations -# cs_pts <- hydrofabric3D::cross_section_pts( -# cs = transects, -# points_per_cs = NULL, -# min_pts_per_cs = 10, -# dem = DEM_URL -# ) -# -# }) -# -# # Remove any cross section that has ANY missing (NA) Z values. -# cs_pts2 <- -# cs_pts %>% -# # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% -# dplyr::group_by(hy_id, cs_id) %>% -# dplyr::filter(!any(is.na(Z))) %>% -# dplyr::ungroup() -# -# # Stash meta data about the points -# pts_meta <- -# cs_pts2 %>% -# sf::st_drop_geometry() %>% -# dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended) -# -# # classify the points -# cs_pts2 <- hydrofabric3D::classify_points(cs_pts2, pct_of_length_for_relief = 0.01) -# -# system.time({ -# # cs_pts <- hydrofabric3D::rectify_flat_cs( -# fixed_pts <- hydrofabric3D::rectify_cs( -# cs_pts = cs_pts2, # cross section points generated from hydrofabric3D::cross_section_pts() -# net = flines, # original flowline network -# transects = transects, # original transect lines -# points_per_cs = NULL, -# min_pts_per_cs = 10, # number of points per cross sections -# dem = DEM_URL, # DEM to extract points from -# scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked -# pct_of_length_for_relief = 0.01, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" -# fix_ids = FALSE, -# verbose = TRUE -# ) -# }) -# -# rectify_summary <- hydrofabric3D::rectify_summary(cs_pts2, fixed_pts) -# -# end_cs_pts <- Sys.time() -# message("\n ---> Completed extraction of cross section points (", end_cs_pts,")") -# -# if(COLLECT_META) { -# start_cs_pts_count <- nrow(cs_pts) -# } -# -# # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2) -# # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time) -# -# start_rectify <- Sys.time() -# message("Rectifying cross section points (", start_rectify,")") -# -# # collect the hy_ids and number of stream orders in cs_pts -# if(COLLECT_META) { -# -# cs_pts_ids <- unique(cs_pts$hy_id) -# start_cs_pts_ids <- length(cs_pts_ids) -# -# start_order_count <- -# flines %>% -# sf::st_drop_geometry() %>% -# dplyr::filter(id %in% cs_pts_ids) %>% -# dplyr::group_by(order) %>% -# dplyr::count() %>% -# tidyr::pivot_wider(names_from = order, -# names_glue = "start_order_{order}", -# values_from = n -# ) %>% -# dplyr::ungroup() -# } -# -# # Remove any cross section that has ANY missing (NA) Z values. -# cs_pts <- -# cs_pts %>% -# # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% -# dplyr::group_by(hy_id, cs_id) %>% -# dplyr::filter(!any(is.na(Z))) %>% -# dplyr::ungroup() -# # try to extend any cross sections that returned cross section points with -# # identical Z values within a certain threshold ("flat" cross sections) -# -# system.time({ -# # cs_pts <- hydrofabric3D::rectify_flat_cs( -# fixed_pts <- hydrofabric3D::rectify_flat_cs( -# cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() -# net = flines, # original flowline network -# cs = transects, # original transect lines -# points_per_cs = NULL, -# min_pts_per_cs = 10, # number of points per cross sections -# dem = DEM_URL, # DEM to extract points from -# scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked -# threshold = 1, # 1 meter from bottom -# pct_threshold = 0.99, # rectify if 99% points are within 1 meter from the bottom -# fix_ids = FALSE -# ) -# }) -# -# -# end_rectify <- Sys.time() -# rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2) -# -# message("\n ---> Completed rectifying cross section points (", end_rectify,")") -# -# if(COLLECT_META) { -# rectify_cs_pts_count <- nrow(fixed_pts) -# # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts -# rectify_cs_pts_ids <- unique(fixed_pts$hy_id) -# rectify_cs_pts_id_count <- length(rectify_cs_pts_ids) -# -# rectify_order_count <- -# flines %>% -# sf::st_drop_geometry() %>% -# dplyr::filter(id %in% rectify_cs_pts_ids) %>% -# dplyr::group_by(order) %>% -# dplyr::count() %>% -# tidyr::pivot_wider(names_from = order, -# names_glue = "rectify_order_{order}", -# values_from = n -# ) %>% -# dplyr::ungroup() -# } -# -# rm(cs_pts) -# gc() -# -# message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ") -# -# # Remove any cross section that has ANY missing (NA) Z values. -# fixed_pts <- -# fixed_pts %>% -# # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% -# # dplyr::group_by(hy_id, cs_id) %>% -# # dplyr::filter(!any(is.na(Z))) %>% -# # dplyr::ungroup() %>% -# hydrofabric3D::add_tmp_id() -# -# # Number of cross section points after removing any cross sections that contain any NA Z values -# if(COLLECT_META) { -# cs_pts_na_removed_count <- nrow(fixed_pts) -# } -# -# # Stash meta data about the points -# pts_meta <- -# fixed_pts %>% -# sf::st_drop_geometry() %>% -# dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended) -# -# message("Classifying cross section points...") -# -# # # Classify points -# # fixed_pts <- hydrofabric3D::classify_points(fixed_pts) -# -# # add meta data back to the points -# fixed_pts <- -# fixed_pts %>% -# dplyr::select(-is_extended) %>% -# dplyr::left_join( -# pts_meta, -# # dplyr::select(pts_meta, -is_extended), -# by = c("hy_id", "cs_id", "pt_id") -# # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended) -# ) -# -# message("Gathering count of point types per cross section...") -# -# # get the counts of each point type to add this data to the transects dataset -# point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) -# -# # # check the number of cross sections that were extended -# # fixed_pts$is_extended %>% table() -# message("Subsetting cross section points generated after extending transects...") -# -# # extract cross section points that have an "is_extended" value of TRUE -# extended_pts <- -# fixed_pts %>% -# dplyr::filter(is_extended) %>% -# hydrofabric3D::add_tmp_id() -# # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) -# -# # extract transects that have a "hy_id" in the "extended_pts" dataset -# update_transects <- -# transects %>% -# hydrofabric3D::add_tmp_id() %>% -# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) -# -# # Number of cross section points generated from extending transects and number of tmpIDs -# if(COLLECT_META) { -# extended_pts_count <- nrow(extended_pts) -# extended_pts_ids <- length(unique(extended_pts$tmp_id)) -# extended_transects_count <- nrow(update_transects) -# extended_transects_ids <- length(unique(update_transects$tmp_id)) -# } -# -# # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages -# if (nrow(update_transects) > 0) { -# message("Updating ", nrow(update_transects), " transects") -# -# update_transects <- -# update_transects %>% -# # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% -# # apply extend_by_percent function to each transect line: -# hydrofabric3D:::extend_by_percent( -# pct = EXTENSION_PCT, -# length_col = "cs_lengthm" -# ) -# -# # # Number of transects being updated -# # if(COLLECT_META) { -# # extended_transects_count <- nrow(update_transects) -# # extended_transects_ids <- length(unique(update_transects$tmp_id)) -# # } -# -# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() -# # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") -# # and then replace with old transects with the "update_transects" -# out_transects <- -# transects %>% -# hydrofabric3D::add_tmp_id() %>% -# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% -# # dplyr::filter(!tmp_id %in% ) -# dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% -# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% -# dplyr::bind_rows( -# dplyr::mutate( -# update_transects, -# is_extended = TRUE -# ) -# ) -# -# # dplyr::mutate(is_extended = FALSE) %>% -# # dplyr::bind_rows( -# # dplyr::mutate(update_transects, is_extended = TRUE) -# # ) %>% -# # dplyr::select(-tmp_id) -# -# } else { -# -# out_transects <- -# transects %>% -# hydrofabric3D::add_tmp_id() %>% -# dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% -# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) -# } -# -# # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) -# if(COLLECT_META) { -# output_transects_count <- nrow(out_transects) -# output_transects_ids <- length(unique(out_transects$tmp_id)) -# } -# -# # finalize new transects -# out_transects <- -# out_transects %>% -# dplyr::left_join( -# point_type_counts, -# by = c("hy_id", "cs_id") -# ) %>% -# dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, -# # sinuosity, -# is_extended, -# left_bank_count, right_bank_count, channel_count, bottom_count, -# geom) -# -# # ------------------------------------------------------------------- -# # ---- Re enumerate the transects & cross section points "cs_id" ---- -# # ------------------------------------------------------------------- -# -# # make a dataframe that has a new_cs_id column that has -# # the cs_id renumbered to fill in any missing IDs, -# # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id -# # The dataframe below will be used to join the "new_cs_id" with -# # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets -# renumbered_ids <- -# fixed_pts %>% -# sf::st_drop_geometry() %>% -# # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% -# dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% -# dplyr::group_by(hy_id, cs_id) %>% -# dplyr::slice(1) %>% -# dplyr::ungroup() %>% -# hydrofabric3D::add_tmp_id() %>% -# dplyr::group_by(hy_id) %>% -# dplyr::mutate( -# new_cs_id = 1:dplyr::n() -# ) %>% -# dplyr::ungroup() %>% -# dplyr::select(new_cs_id, tmp_id) -# -# # Renumber the transects to have correct CS IDs -# out_transects <- dplyr::left_join( -# hydrofabric3D::add_tmp_id(out_transects), -# renumbered_ids, -# by = "tmp_id" -# ) %>% -# dplyr::select(-cs_id, -tmp_id) %>% -# dplyr::select(hy_id, cs_source, cs_id = new_cs_id, -# cs_measure, cs_lengthm, -# # sinuosity, -# is_extended, -# left_bank_count, right_bank_count, channel_count, bottom_count, -# geometry = geom -# ) -# -# # # fline_lengths <- sf::st_drop_geometry(flines) %>% -# # # dplyr::filter(id %in% out_transects$hy_id) %>% -# # # dplyr::mutate(lengthm = lengthkm * 1000) %>% -# # # dplyr::select(hy_id = id, lengthm, lengthkm) -# # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% -# # dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% -# # dplyr::select(-sinuosity) %>% -# # dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% -# # dplyr::rename("geometry" = geom) -# -# # Renumber the cross sections points to have correct CS IDs -# fixed_pts <- dplyr::left_join( -# hydrofabric3D::add_tmp_id(fixed_pts), -# renumbered_ids, -# by = "tmp_id" -# ) %>% -# dplyr::select(-cs_id, -tmp_id) %>% -# dplyr::rename(cs_id = new_cs_id) -# -# # mapview::mapview(transects, color = "red") + -# # mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") + -# # mapview::mapview(flines, color = "dodgerblue") -# -# ###################################### -# -# # ---------------------------------------------------------- -# # ---- Cross section points parquet to S3 ---- -# # ---------------------------------------------------------- -# -# # classify the cross section points -# fixed_pts <- -# fixed_pts %>% -# dplyr::mutate( -# X = sf::st_coordinates(.)[,1], -# Y = sf::st_coordinates(.)[,2] -# ) %>% -# sf::st_drop_geometry() %>% -# dplyr::select( -# hy_id, cs_id, pt_id, -# cs_lengthm, -# relative_distance, -# X, Y, Z, -# class, point_type -# ) -# -# # Drop point geometries, leaving just X, Y, Z values -# fixed_pts <- sf::st_drop_geometry(fixed_pts) -# -# # add Z_source column for source of elevation data -# fixed_pts <- -# fixed_pts %>% -# dplyr::mutate( -# Z_source = cs_source -# ) %>% -# dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class) -# -# # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections) -# if(COLLECT_META) { -# output_cs_pts_count <- nrow(fixed_pts) -# output_cs_pts_ids <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) -# dropped_transects_count <- transect_count - output_transects_count -# } -# -# ###################################### -# -# # ---------------------------------------------------------- -# # ---- Re upload the updated transects geopackage to S3 ---- -# # ---------------------------------------------------------- -# updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path) -# -# ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections -# message("Saving updated transects to:\n - filepath: '", updated_path, "'") -# -# # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg) -# sf::write_sf( -# out_transects, -# # transect_path -# updated_path -# ) -# -# # command to copy transects geopackage to S3 -# trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, -# ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) -# -# message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", -# trans_to_s3, -# "'\n==========================") -# -# system(trans_to_s3, intern = TRUE) -# -# ###################################### -# ###################################### -# -# # ---------------------------------------------------------- -# # ---- Upload the cross section points parquet to S3 ---- -# # ---------------------------------------------------------- -# -# # name of file and path to save transects gpkg too -# out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") -# out_path <- paste0(cs_pts_dir, out_file) -# -# message("Saving cross section points to:\n - filepath: '", out_path, "'") -# -# # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) -# arrow::write_parquet(fixed_pts, out_path) -# -# # command to copy cross section points parquet to S3 -# copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, -# ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) -# -# message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", -# paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, -# ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), -# "'\n==========================") -# -# system(copy_cs_pts_to_s3, intern = TRUE) -# -# end <- Sys.time() -# -# message("Finished cross section point generation for VPU ", VPU) -# message("- Completed at: ", end) -# message("==========================") -# -# if(COLLECT_META) { -# -# meta_df <- data.frame( -# vpu = VPU, -# start = as.character(start), -# end = as.character(end), -# start_cs_pts = as.character(start_cs_pts), -# end_cs_pts = as.character(end_cs_pts), -# start_rectify = as.character(start_rectify), -# end_rectify = as.character(end_rectify), -# fline_count = fline_count, -# transect_count = transect_count, -# wb_count = wb_count, -# fline_wb_count = fline_wb_count, -# transect_wb_count = transect_wb_count, -# start_cs_pts_count = start_cs_pts_count, -# start_cs_pts_ids = start_cs_pts_ids, -# rectify_cs_pts_count = rectify_cs_pts_count, -# rectify_cs_pts_ids = rectify_cs_pts_id_count, -# extended_transects_count = extended_transects_count, -# extended_transects_ids = extended_transects_ids, -# dropped_transects = dropped_transects_count, -# output_transects_count = output_transects_count, -# output_cs_pts_count = output_cs_pts_count, -# output_transects_ids = output_transects_ids, -# output_cs_pts_ids = output_cs_pts_ids -# ) -# -# order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count) -# -# readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv")) -# readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv")) -# } -# - # rm(fixed_pts) - # gc() - # gc() -# -# } -# From adeee656277a49965109f3427314d079f8385926 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 21 Mar 2024 08:09:11 -0700 Subject: [PATCH 10/64] cleaned up download_fema100.R script and replaced glue::glue() with paste0() in that script --- runners/cs_runner/download_fema100.R | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index f55e8d7..acb90f9 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -6,13 +6,15 @@ # load config variables source("runners/cs_runner/config_vars.R") -# create FEMA100/ directory if it does NOT exist +# ------------------------------------------------------------------------------------- +# ---- Create FEMA100/ directory (if it does NOT exist) ---- +# ------------------------------------------------------------------------------------- + if (!dir.exists(FEMA_FGB_PATH)) { - message(glue::glue('FEMA100/ directory does not exist...\nCreating directory: {FEMA_FGB_PATH}')) + message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'")) dir.create(FEMA_FGB_PATH) } - # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern fema_list_command <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information @@ -27,13 +29,22 @@ fema_list_command <- paste0('#!/bin/bash echo "$S3_OBJECTS"' ) -# ---- Get nextgen geopackages ---- +# ------------------------------------------------------------------------------------- +# ---- Get the S3 buckets object keys for FEMA 100 FGB files ---- +# ------------------------------------------------------------------------------------- + # Run the script to get a list of the nextgen geopackages that matched the regular expression above FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) +# create bucket object URIs # FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, FEMA_BUCKET_KEYS) +# ------------------------------------------------------------------------------------- +# ---- Download FEMA 100 year FGB files from S3 ---- +# ------------------------------------------------------------------------------------- + # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +FEMA_BUCKET_KEYS[1:2] for (key in FEMA_BUCKET_KEYS) { local_save_path <- paste0(FEMA_FGB_PATH, "/", key) From bfa3c9b530eef8b4d2ca550fc0947a7f7f931190 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 21 Mar 2024 08:34:25 -0700 Subject: [PATCH 11/64] fixed aws s3 copy command to specify the aws_profile to use --- runners/cs_runner/download_fema100.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index acb90f9..fee84dd 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -44,18 +44,19 @@ FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) # ------------------------------------------------------------------------------------- # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet -FEMA_BUCKET_KEYS[1:2] -for (key in FEMA_BUCKET_KEYS) { +for (key in FEMA_BUCKET_KEYS[3:length(FEMA_BUCKET_KEYS)]) { local_save_path <- paste0(FEMA_FGB_PATH, "/", key) if(!file.exists(local_save_path)) { - copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path) + copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", aws_profile) - message("Copying S3 object:\n", local_save_path) + message("S3 object:\n > '", FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + # message("Copy command:\n > '", copy_cmd, "'") - # system(copy_cmd) + system(copy_cmd) - message("Download '", key, "' complete!") - message("------------------") + message(" > '", key, "' download complete!") + message("----------------------------------") } -} \ No newline at end of file +} From 6b8bfe7a09d4608fa88bef61320dd10cb1882788 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Sat, 6 Apr 2024 08:28:28 -0700 Subject: [PATCH 12/64] working on new runner for injecting ML outputs to dem cross sections --- runners/cs_runner/02_cs_pts.R | 91 +++++++-- runners/cs_runner/03_inject_ml.R | 263 +++++++++++++++++++++++++++ runners/cs_runner/config.R | 4 + runners/cs_runner/config_vars.R | 33 +++- runners/cs_runner/download_nextgen.R | 27 ++- 5 files changed, 400 insertions(+), 18 deletions(-) create mode 100644 runners/cs_runner/03_inject_ml.R diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 1f6faa8..7d5cc8e 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -48,7 +48,9 @@ path_df <- align_files_by_vpu( # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 1:nrow(path_df)) { +for (i in 13:nrow(path_df)) { + + # i = 1 start <- Sys.time() @@ -92,12 +94,16 @@ for (i in 1:nrow(path_df)) { flines <- flines[feature_subsets$valid_flowlines, ] transects <- transects[feature_subsets$valid_transects, ] + rm(waterbodies) + gc() + start_cs_pts <- Sys.time() message("Extracting cross section points (", start_cs_pts, ")") # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 1: Extract cs points from DEM ---- # ---------------------------------------------------------------------------------------------------------------- - + # system.time({ + # get cross section point elevations cs_pts <- hydrofabric3D::cross_section_pts( cs = transects, @@ -105,11 +111,13 @@ for (i in 1:nrow(path_df)) { min_pts_per_cs = 10, dem = DEM_URL ) - + # }) # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- # ---------------------------------------------------------------------------------------------------------------- - + + # system.time({ + # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points cs_pts <- cs_pts %>% @@ -117,14 +125,17 @@ for (i in 1:nrow(path_df)) { dplyr::filter(!any(is.na(Z))) %>% dplyr::ungroup() %>% hydrofabric3D::classify_points(pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF) - + + # }) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- # ---------------------------------------------------------------------------------------------------------------- - - # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more - fixed_pts <- hydrofabric3D::rectify_cs( - # cs_pts <- hydrofabric3D::rectify_flat_cs( + # system.time({ + # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief + fixed_pts <- hydrofabric3D::improve_invalid_cs( cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() net = flines, # original flowline network transects = transects, # original transect lines @@ -136,6 +147,24 @@ for (i in 1:nrow(path_df)) { fix_ids = FALSE, verbose = TRUE ) + # }) + + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more + # fixed_pts <- hydrofabric3D::rectify_cs( + # # cs_pts <- hydrofabric3D::rectify_flat_cs( + # cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + # net = flines, # original flowline network + # transects = transects, # original transect lines + # points_per_cs = NULL, + # min_pts_per_cs = 10, # number of points per cross sections + # dem = DEM_URL, # DEM to extract points from + # scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + # fix_ids = FALSE, + # verbose = TRUE + # ) # get a summary dataframe and print out details message rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE) @@ -153,7 +182,7 @@ for (i in 1:nrow(path_df)) { # ---------------------------------------------------------------------------------------------------------------- # get the counts of each point type to add this data to the transects dataset - point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE) + point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts) # # check the number of cross sections that were extended # fixed_pts$is_extended %>% table() @@ -266,7 +295,8 @@ for (i in 1:nrow(path_df)) { by = "tmp_id" ) %>% dplyr::select(-cs_id, -tmp_id) %>% - dplyr::select(hy_id, cs_source, cs_id = new_cs_id, + dplyr::select(hy_id, cs_source, + cs_id = new_cs_id, cs_measure, cs_lengthm, # sinuosity, is_extended, @@ -320,6 +350,42 @@ for (i in 1:nrow(path_df)) { class, point_type, bottom, left_bank, right_bank, valid_banks, has_relief) + ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Aligning banks and smoothing bottoms...") + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts) + + ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Reclassifying cross section points...") + fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + } else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + } + + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + ############################################################################## # ---------------------------------------------------------------------------------------------------------------- @@ -379,6 +445,9 @@ for (i in 1:nrow(path_df)) { message("- Completed at: ", end) message("==========================") + rm(fixed_pts) + gc() + gc() } # ########################################################################################################################################### diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R new file mode 100644 index 0000000..113adb7 --- /dev/null +++ b/runners/cs_runner/03_inject_ml.R @@ -0,0 +1,263 @@ +# ---------------------------------------------------------------------------------------------------------------- +# ---- data paths ----- +# ---------------------------------------------------------------------------------------------------------------- +library(dplyr) +library(hydrofabric3D) +library(sf) +library(patchwork) + +# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU +source("runners/cs_runner/config.R") + +# cross section bucket prefix +CS_ML_PTS_S3_PREFIX <- paste0(s3_bucket, version_prefix, "/3D/cross-sections/") +# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") + +ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE) + +# paths to nextgen datasets +nextgen_files <- list.files(nextgen_dir, full.names = FALSE) + +# paths to nextgen datasets +cs_files <- list.files(cs_pts_dir, full.names = FALSE) + +# string to fill in "cs_source" column in output datasets +cs_source <- "hydrofabric3D" + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = nextgen_files, + y = cs_files, + base = base_dir +) +# dplyr::left_join( +# ref_df, +# by = "vpu" +# ) + +# ML Outputs +ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) + +# cs_ml_data_path <- "/Users/anguswatters/Desktop/cs_pts_for_ml_tests/nextgen_06_cross_sections_for_ml.parquet" +# ml_output_path <- "/Users/anguswatters/Desktop/lynker-spatial/ml-outputs/channel_ml_outputs.parquet" +# conus_network_path <- 's3://lynker-spatial/v20.1/conus_net.parquet' + +# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# Save parquet locally and upload to specified S3 bucket +for (i in 1:nrow(path_df)) { + + # i = 15 + + start <- Sys.time() + + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(nextgen_dir, nextgen_file) + + # model attributes file and full path + cs_file <- path_df$y[i] + cs_pts_path <- paste0(cs_pts_dir, cs_file) + + # # model attributes file and full path + # ref_file <- path_df$ref_file[i] + # ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) + + # current VPU being processed + VPU = path_df$vpu[i] + + message("Creating VPU ", VPU, + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - cross section points: '", cs_file, "'", + # "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Read in data ----- + # ---------------------------------------------------------------------------------------------------------------- + + # CONUS network parquet + net <- + CONUS_NETWORK_URI %>% + arrow::open_dataset() %>% + dplyr::filter(vpu == VPU) %>% + dplyr::collect() + + # Cross section points parquet + cs_pts <- arrow::read_parquet(cs_pts_path) + + fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Extract the max hydroseq "hy_id" for each flowline in the CONUS network parquet ----- + # Use this to join "hy_id" to "hf_id" in ML outputs data + # ---------------------------------------------------------------------------------------------------------------- + + net_subset <- + net %>% + dplyr::select(id, hf_id, hf_hydroseq) %>% + # dplyr::filter(id %in% unique(cs$hy_id)) %>% + dplyr::filter(stringr::str_detect(id, "^wb-") & !is.na(id)) %>% + dplyr::group_by(id) %>% + dplyr::slice_max(hf_hydroseq, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(hy_id = id, + hf_id, + hf_hydroseq + ) + + number_hyids_in_cs_pts <- length(unique(cs_pts$hy_id )) + number_hyids_in_conus_net <- length(unique(net_subset$hy_id)) + + message("VPU ", VPU, ": ", + "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'", + "'\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'" + ) + # ---------------------------------------------------------------------------------------------------------------- + # ---- Find the stream orders for the flowlines in the network ----- + # ---------------------------------------------------------------------------------------------------------------- + + # stream_order <- + # fline_net %>% + # sf::st_drop_geometry() %>% + # dplyr::select(hy_id = id, + # order, + # # hydroseq, + # tot_drainage_areasqkm + # ) %>% + # dplyr::filter(hy_id %in% net_subset$hy_id) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Subset ML data to specific VPU and add "hy_id" column to ML data ----- + # ---------------------------------------------------------------------------------------------------------------- + + + # # ML Outputs + # ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) + + # Join hy_id onto the ML outputs and then remove the rows WITHOUT matches in hy_id + # this should give us a (nearly) one-to-one cross walk between "hy_id" in the cross section points + # and "hf_id" in the ML outputs dataset + ml_subset <- + ml_output %>% + dplyr::left_join( + net_subset, + by = "hf_id" + ) %>% + dplyr::filter(!is.na(hy_id)) %>% + dplyr::relocate(hy_id, hf_id) + + # join the ML outputs data to the cross section points + cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_subset, + hy_id, + hf_id, + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r), + by = "hy_id" + ) + + cs_bottom_lengths <- get_cs_bottom_length(cs_pts) + + # TODO: for now we replace any negative TW values with the length of the bottom of the cross section + # TODO: This method + the negative model output values both need to be looked into (04/05/2024) + cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + # dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) + # dplyr::left_join( stream_order, by = "hy_id") + + + # Split the cross sections into 2 groups: + # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model + # - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model + inchannel_cs <- dplyr::filter(cs_pts, + valid_banks & has_relief) + + bankful_cs <- dplyr::filter(cs_pts, + !valid_banks | !has_relief) + + split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + + if (!split_kept_all_rows) { + warning("When splitting cross section points into 'bankful' and 'inchannel' groups, some points were not put in either group") + } + # Add bathymetry using "inchannel" estimates + cs_bathy_inchannel <- add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = dplyr::slice(inchannel_cs, 1:100000), + top_width = "owp_tw_inchan", + depth = "owp_y_inchan", + dingman_r = "owp_dingman_r" + ) + + # Add bathymetry using "bankful" estimates + cs_bathy_bankful <- add_cs_bathymetry( + cross_section_pts = bankful_cs, + top_width = "owp_tw_bf", + depth = "owp_y_bf", + dingman_r = "owp_dingman_r" + ) + + + final_cs <- dplyr::bind_rows(cs_bathy_inchannel, cs_bathy_bankful) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # name of file and path to save transects gpkg too + out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") + out_path <- paste0(final_dir, out_file) + + message("Saving ML augmented cross section points to:\n - filepath: '", out_path, "'") + + # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) + arrow::write_parquet(final_cs, out_path) + + # command to copy cross section points parquet to S3 + copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) + + message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", + paste0("aws s3 cp ", out_path, " ", CS_ML_PTS_S3_PREFIX, out_file, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), + "'\n==========================") + + system(copy_cs_pts_to_s3, intern = TRUE) + + end <- Sys.time() + + message("Finished augmenting cross section points with ML for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(fixed_pts) + gc() + gc() + +} + + + \ No newline at end of file diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index 41f995d..8d22554 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -55,12 +55,16 @@ final_dir <- paste0(base_dir, "/cross_sections/") # directory to copy nextgen bucket data too ref_features_dir <- paste0(base_dir, "/00_reference_features/") +# make a directory for the ML outputs data +ML_OUTPUTS_DIR <- paste0(base_dir, "/ml-outputs/") + # create directories dir.create(transects_dir, showWarnings = FALSE) dir.create(cs_pts_dir, showWarnings = FALSE) dir.create(ref_features_dir, showWarnings = FALSE) dir.create(paste0(ref_features_dir, "gpkg/"), showWarnings = FALSE) dir.create(final_dir, showWarnings = FALSE) +dir.create(ML_OUTPUTS_DIR, showWarnings = FALSE) # dir.create(model_attr_dir, showWarnings = FALSE) ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 8e345d8..3d0d910 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -1,4 +1,9 @@ ### EDIT base_dir, aws_profile, and DEM_URL ### + +# ---------------------------------------------------------------------------- +# ---- General paths and constants variables ---- +# ---------------------------------------------------------------------------- + base_dir <- '/Users/anguswatters/Desktop/lynker-spatial' # AWS profile to run CLI commands @@ -7,6 +12,9 @@ aws_profile <- "angus-lynker" # name of S3 bucket s3_bucket <- "s3://lynker-spatial/" +# S3 prefix/folder of version run +version_prefix <- "v20.1" + # location of FEMA 100 year flood plain FGB files FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" FEMA_S3_BUCKET_PREFIX <- "FEMA100/" @@ -15,6 +23,10 @@ FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX) # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100") +# ---------------------------------------------------------------------------- +# ---- Cross section point extraction constant variables ---- +# ---------------------------------------------------------------------------- + # DEM URL DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" @@ -26,22 +38,31 @@ EXTENSION_PCT <- 0.5 # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not # 1% of the cross sections length is the default value we are using # (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") -PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF = 0.01 +PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01 # Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R) +# TODO: Probably delete this COLLECT_META <- TRUE # Where should meta data CSVs be saved to? # Local path to save CSVs of cross section meta data during each iteration +# TODO: Probably delete this META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' # META_PATH <- "/local/path/to/save/cross_section_meta_data/" +# ---------------------------------------------------------------------------- +# ---- Machine learning data path variables ---- +# ---------------------------------------------------------------------------- + +ML_OUTPUTS_FILE = "channel_ml_outputs.parquet" +ML_OUTPUTS_PREFIX = "v20.1/3D/ml-outputs/" +ML_OUTPUTS_URI = paste0(s3_bucket, ML_OUTPUTS_PREFIX, ML_OUTPUTS_FILE) +# ML_OUTPUTS_URI = "s3://lynker-spatial/v20.1/3D/ml-outputs/channel_ml_outputs.parquet" +ML_OUTPUTS_PATH <- paste0(base_dir, "/ml-outputs/", ML_OUTPUTS_FILE) -# # create the directory if it does NOT exist -# if(!dir.exists(base_dir)) { -# message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}')) -# dir.create(base_dir) -# } +# path to the remote CONUS net parquet file +CONUS_NETWORK_FILENAME <- "conus_net.parquet" +CONUS_NETWORK_URI <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME) ### EDIT ### diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R index c405fa9..dd0b807 100644 --- a/runners/cs_runner/download_nextgen.R +++ b/runners/cs_runner/download_nextgen.R @@ -40,6 +40,10 @@ if(!dir.exists(model_attr_dir)) { dir.create(model_attr_dir) } +# --------------------------------------------------------------------------- +# ---- List/Get the nextgen gpkgs from S3 bucket ---- +# --------------------------------------------------------------------------- + # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern command <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information @@ -71,7 +75,10 @@ for (key in bucket_keys) { message("------------------") } -# ---- Get nextgen model attributes parquets ---- +# --------------------------------------------------------------------------- +# ---- List/Get nextgen model attributes parquets from S3 bucket ---- +# --------------------------------------------------------------------------- + # aws s3 ls s3://lynker-spatial/v20/3D/model_attributes/ # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern @@ -103,6 +110,11 @@ for (key in model_attr_keys) { message("Download '", paste0(model_attr_prefix, key), "' complete!") message("------------------") } + +# --------------------------------------------------------------------------- +# ---- List/Get reference features from S3 bucket ---- +# --------------------------------------------------------------------------- + ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern @@ -134,3 +146,16 @@ for (key in ref_features) { message("Download '", paste0(ref_features_dir, "gpkg/", key), "' complete!") message("------------------") } + +# --------------------------------------------------------------------------- +# ---- Get ML outputs data from S3 bucket ---- +# --------------------------------------------------------------------------- + +ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI))) + +message("Copying S3 object:\n", ML_OUTPUTS_URI) +system(ml_copy_cmd) + +message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI)), "' complete!") +message("------------------") + From ae7ae6d4812b830fcf10d46f329d42bdcb2a90e6 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 8 Apr 2024 15:16:52 -0700 Subject: [PATCH 13/64] updating inject ml runner to use updated hydrofabric3D functionality --- runners/cs_runner/03_inject_ml.R | 84 ++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index 113adb7..bbaa064 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -188,21 +188,45 @@ for (i in 1:nrow(path_df)) { # dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) # dplyr::left_join( stream_order, by = "hy_id") + missing_cs <- + cs_pts %>% + dplyr::filter(is.na(hf_id) | is.na(owp_tw_inchan) | is.na(owp_y_inchan) | is.na(owp_tw_bf) | is.na(owp_y_bf) | is.na(owp_dingman_r)) %>% + hydrofabric3D::add_tmp_id() # Split the cross sections into 2 groups: # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model # - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model - inchannel_cs <- dplyr::filter(cs_pts, - valid_banks & has_relief) + inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) - bankful_cs <- dplyr::filter(cs_pts, - !valid_banks | !has_relief) + bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) - split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs) + # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) if (!split_kept_all_rows) { warning("When splitting cross section points into 'bankful' and 'inchannel' groups, some points were not put in either group") } + # Add bathymetry using "inchannel" estimates cs_bathy_inchannel <- add_cs_bathymetry( cross_section_pts = inchannel_cs, @@ -223,6 +247,56 @@ for (i in 1:nrow(path_df)) { final_cs <- dplyr::bind_rows(cs_bathy_inchannel, cs_bathy_bankful) + final_cs <- dplyr::bind_rows( + dplyr::select(cs_bathy_inchannel, + -owp_tw_bf, -owp_y_bf, -hf_id), + dplyr::select(cs_bathy_bankful, + -owp_tw_inchan, -owp_y_inchan, -hf_id) + ) %>% + dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source, TW, DEPTH, DINGMAN_R) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + + final_cs <- hydrofabric3D::classify_points(final_cs) + # final_classified %>% + # dplyr::filter(!valid_banks | !has_relief) + # system.time({ + # final_classified <- + # final_cs %>% + # hydrofabric3D::classify_points() + # }) + + final_cs <- dplyr::bind_rows( + dplyr::relocate( + final_cs, + hy_id, cs_id, pt_id, + Z, relative_distance, cs_lengthm, class, point_type, + X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, + TW, DEPTH, DINGMAN_R, is_dem_point + ), + dplyr::relocate( + dplyr::mutate( + dplyr::select(missing_cs, + -tmp_id, -hf_id, -owp_tw_inchan, -owp_y_inchan, -owp_tw_bf, -owp_y_bf, -owp_dingman_r), + TW = NA, + DEPTH = NA, + DINGMAN_R = NA, + is_dem_point = TRUE + ), + hy_id, cs_id, pt_id, + Z, relative_distance, cs_lengthm, class, point_type, + X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, + TW, DEPTH, DINGMAN_R, is_dem_point + ) + ) # ---------------------------------------------------------------------------------------------------------------- # ---- Upload the cross section points parquet to S3 ---- # ---------------------------------------------------------------------------------------------------------------- From 70eccc3c1dee8db84c1e70cc221c67b579229495 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 11 Apr 2024 13:20:41 -0700 Subject: [PATCH 14/64] updated runners/cs_runner to include machine learning estimated widths/depths calculations --- runners/cs_runner/01_transects.R | 6 - runners/cs_runner/03_inject_ml.R | 235 +++++++++++------- .../cs_runner/{03_driver.R => 04_driver.R} | 3 + 3 files changed, 148 insertions(+), 96 deletions(-) rename runners/cs_runner/{03_driver.R => 04_driver.R} (75%) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 9c081e8..5fe88c9 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -115,13 +115,7 @@ for(i in 1:nrow(path_df)) { dplyr::mutate( cs_source = net_source ) - # dplyr::rename("cs_lengthm" = cs_widths) - # # add cs_source column and keep just the desired columns to save and upload to S3 - # transects <- transects %>% - # dplyr::mutate(cs_source = net_source) %>% - # dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm = cs_widths, geometry) - # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg) sf::write_sf( # save dataset with only subset of columns to upload to S3 diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index bbaa064..cc0ee52 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -21,9 +21,6 @@ nextgen_files <- list.files(nextgen_dir, full.names = FALSE) # paths to nextgen datasets cs_files <- list.files(cs_pts_dir, full.names = FALSE) -# string to fill in "cs_source" column in output datasets -cs_source <- "hydrofabric3D" - # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( x = nextgen_files, @@ -42,14 +39,19 @@ ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) # ml_output_path <- "/Users/anguswatters/Desktop/lynker-spatial/ml-outputs/channel_ml_outputs.parquet" # conus_network_path <- 's3://lynker-spatial/v20.1/conus_net.parquet' +# for (i in 1:5000) { +# start <- Sys.time() +# message(i, " - time: '", start, "'") +# } + # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { - # i = 15 + # i = 8 - start <- Sys.time() + start <- round(Sys.time()) # nextgen file and full path @@ -67,16 +69,21 @@ for (i in 1:nrow(path_df)) { # current VPU being processed VPU = path_df$vpu[i] - message("Creating VPU ", VPU, - " cross section points:\n - flowpaths: '", nextgen_file, + message("Augmenting DEM cross sections with ML estimated widths/depths: ", VPU, + " cross section points:", "'\n - cross section points: '", cs_file, "'", + "'\n - ML estimated widths/depths: '", ML_OUTPUTS_FILE, "'", + # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_URI, "'", + "\n - CONUS network file: '", CONUS_NETWORK_URI, "'", + "\n - flowpaths: '", nextgen_file, # "\n - waterbodies: '", ref_file, "'", "'\n - start time: '", start, "'" - ) + ) # ---------------------------------------------------------------------------------------------------------------- # ---- Read in data ----- # ---------------------------------------------------------------------------------------------------------------- + message("Loading data...") # CONUS network parquet net <- @@ -88,7 +95,8 @@ for (i in 1:nrow(path_df)) { # Cross section points parquet cs_pts <- arrow::read_parquet(cs_pts_path) - fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths") + # TODO: Not needed + # fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths") # ---------------------------------------------------------------------------------------------------------------- # ---- Extract the max hydroseq "hy_id" for each flowline in the CONUS network parquet ----- @@ -112,8 +120,8 @@ for (i in 1:nrow(path_df)) { number_hyids_in_conus_net <- length(unique(net_subset$hy_id)) message("VPU ", VPU, ": ", - "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'", - "'\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'" + "\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'", + "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'" ) # ---------------------------------------------------------------------------------------------------------------- # ---- Find the stream orders for the flowlines in the network ----- @@ -149,6 +157,8 @@ for (i in 1:nrow(path_df)) { dplyr::filter(!is.na(hy_id)) %>% dplyr::relocate(hy_id, hf_id) + message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + # join the ML outputs data to the cross section points cs_pts <- cs_pts %>% @@ -164,14 +174,16 @@ for (i in 1:nrow(path_df)) { by = "hy_id" ) - cs_bottom_lengths <- get_cs_bottom_length(cs_pts) + message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + + cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cs_pts) # TODO: for now we replace any negative TW values with the length of the bottom of the cross section # TODO: This method + the negative model output values both need to be looked into (04/05/2024) cs_pts <- cs_pts %>% dplyr::left_join( - cs_bottom_lengths, + cs_bottom_lengths, by = c("hy_id", "cs_id") ) %>% dplyr::mutate( @@ -185,25 +197,40 @@ for (i in 1:nrow(path_df)) { ) ) %>% dplyr::select(-bottom_length) - # dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) + + # cs_pts %>% dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) # dplyr::left_join( stream_order, by = "hy_id") + # extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data + # TODO: look at this stuff with Arash (04/09/2024) missing_cs <- cs_pts %>% - dplyr::filter(is.na(hf_id) | is.na(owp_tw_inchan) | is.na(owp_y_inchan) | is.na(owp_tw_bf) | is.na(owp_y_bf) | is.na(owp_dingman_r)) %>% + dplyr::filter(is.na(hf_id) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r)) %>% hydrofabric3D::add_tmp_id() + # TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" + readr::write_csv( + dplyr::select(missing_cs, -tmp_id), + paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") + ) + # Split the cross sections into 2 groups: # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model # - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model inchannel_cs <- cs_pts %>% hydrofabric3D::add_tmp_id() %>% - dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data dplyr::select(-tmp_id) %>% dplyr::filter(valid_banks & has_relief) %>% - dplyr::rename( - TW = owp_tw_inchan, + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, DEPTH = owp_y_inchan, DINGMAN_R = owp_dingman_r ) @@ -220,114 +247,142 @@ for (i in 1:nrow(path_df)) { DINGMAN_R = owp_dingman_r ) + # sanity check that all rows are accounted for after splitting up data split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs) # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) if (!split_kept_all_rows) { - warning("When splitting cross section points into 'bankful' and 'inchannel' groups, some points were not put in either group") - } - - # Add bathymetry using "inchannel" estimates - cs_bathy_inchannel <- add_cs_bathymetry( - cross_section_pts = inchannel_cs, - # cross_section_pts = dplyr::slice(inchannel_cs, 1:100000), - top_width = "owp_tw_inchan", - depth = "owp_y_inchan", - dingman_r = "owp_dingman_r" + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") ) - - # Add bathymetry using "bankful" estimates - cs_bathy_bankful <- add_cs_bathymetry( - cross_section_pts = bankful_cs, - top_width = "owp_tw_bf", - depth = "owp_y_bf", - dingman_r = "owp_dingman_r" + } + message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs ) - - - final_cs <- dplyr::bind_rows(cs_bathy_inchannel, cs_bathy_bankful) + message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") + + # Add bathymetry using "bankful" estimates + bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs + ) + + # combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points final_cs <- dplyr::bind_rows( - dplyr::select(cs_bathy_inchannel, - -owp_tw_bf, -owp_y_bf, -hf_id), - dplyr::select(cs_bathy_bankful, - -owp_tw_inchan, -owp_y_inchan, -hf_id) - ) %>% - dplyr::group_by(hy_id, cs_id) %>% + dplyr::select( + inchannel_cs, + -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point + ), + dplyr::select( + bankful_cs, + -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point + ), + dplyr::select( + missing_cs, + -hf_id, -dplyr::starts_with("owp"), -tmp_id + ) + ) %>% + dplyr::group_by(hy_id, cs_id) %>% tidyr::fill( - c(cs_lengthm, Z_source, TW, DEPTH, DINGMAN_R) - ) %>% - dplyr::ungroup() %>% + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% dplyr::select( - -point_type, + -point_type, -class, -bottom, -left_bank, -right_bank, -has_relief, -valid_banks ) + message(round(Sys.time()), " - Reclassifying cross section point types...") + + # reclassify final_cs <- hydrofabric3D::classify_points(final_cs) - # final_classified %>% - # dplyr::filter(!valid_banks | !has_relief) - # system.time({ - # final_classified <- - # final_cs %>% - # hydrofabric3D::classify_points() - # }) - final_cs <- dplyr::bind_rows( - dplyr::relocate( - final_cs, - hy_id, cs_id, pt_id, - Z, relative_distance, cs_lengthm, class, point_type, - X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, - TW, DEPTH, DINGMAN_R, is_dem_point - ), - dplyr::relocate( - dplyr::mutate( - dplyr::select(missing_cs, - -tmp_id, -hf_id, -owp_tw_inchan, -owp_y_inchan, -owp_tw_bf, -owp_y_bf, -owp_dingman_r), - TW = NA, - DEPTH = NA, - DINGMAN_R = NA, - is_dem_point = TRUE - ), - hy_id, cs_id, pt_id, - Z, relative_distance, cs_lengthm, class, point_type, - X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, - TW, DEPTH, DINGMAN_R, is_dem_point - ) - ) + # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids() + # random_uids <- sample(x=final_uids, size=12) + # cs_subset <- dplyr::filter(hydrofabric3D::add_tmp_id(final_cs), + # tmp_id %in% random_uids) + # hydrofabric3D::classify_points(cs_subset) %>% hydrofabric3D::plot_cs_pts(color = "point_type") + + starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts) + ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs) + + has_same_number_of_uids <- length(starting_uids) == length(ending_uids) + all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) + all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + + # throw some warnings if: + # - the number of uids in the input is different from the output + # - there are missing hy_id/cs_id + if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) + } + + if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + + } + # ---------------------------------------------------------------------------------------------------------------- # ---- Upload the cross section points parquet to S3 ---- # ---------------------------------------------------------------------------------------------------------------- - + # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") out_path <- paste0(final_dir, out_file) - message("Saving ML augmented cross section points to:\n - filepath: '", out_path, "'") + message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", out_path, "'") # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) arrow::write_parquet(final_cs, out_path) - # command to copy cross section points parquet to S3 - copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) - - message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", - paste0("aws s3 cp ", out_path, " ", CS_ML_PTS_S3_PREFIX, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), - "'\n==========================") + s3_save_uri <- paste0(CS_ML_PTS_S3_PREFIX, out_file) + # command to copy cross section points parquet to S3 + copy_cs_pts_to_s3 <- paste0("aws s3 cp ", + out_path, + " ", + s3_save_uri, + ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) + ) + + message( + "Copy VPU ", path_df$vpu[i], " ML augmented cross sections to S3:\n - S3 copy command:\n'", + copy_cs_pts_to_s3, "'", + "'\n==========================" + ) + system(copy_cs_pts_to_s3, intern = TRUE) - end <- Sys.time() + end <- round(Sys.time()) message("Finished augmenting cross section points with ML for VPU ", VPU) message("- Completed at: ", end) message("==========================") - rm(fixed_pts) + rm(net, net_subset, + final_cs, cs_pts, + inchannel_cs, bankful_cs) gc() gc() diff --git a/runners/cs_runner/03_driver.R b/runners/cs_runner/04_driver.R similarity index 75% rename from runners/cs_runner/03_driver.R rename to runners/cs_runner/04_driver.R index cce576d..af8e1ca 100644 --- a/runners/cs_runner/03_driver.R +++ b/runners/cs_runner/04_driver.R @@ -11,3 +11,6 @@ source("runners/cs_runner/01_transects.R") # generate and upload cross sections points datasets source("runners/cs_runner/02_cs_pts.R") + +# Apply machine learning topwidths and depths estimates to DEM cross section points +source("runners/cs_runner/02_cs_pts.R") From 4e40b009f87937371d680b63ebe8cdd4835c452b Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 17 Apr 2024 17:57:39 -0700 Subject: [PATCH 15/64] super minor cleanups --- runners/cs_runner/01_transects.R | 2 -- runners/cs_runner/config_vars.R | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 5fe88c9..b3727ba 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -7,8 +7,6 @@ source("runners/cs_runner/config.R") # library(sf) # install.packages("devtools") -# devtools::install_github("anguswg-ucsb/hydrofabric3D") - # transect bucket prefix transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 3d0d910..6d9e087 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -63,6 +63,6 @@ ML_OUTPUTS_PATH <- paste0(base_dir, "/ml-outputs/", ML_OUTPUTS_FILE) # path to the remote CONUS net parquet file CONUS_NETWORK_FILENAME <- "conus_net.parquet" -CONUS_NETWORK_URI <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME) +CONUS_NETWORK_URI <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME) ### EDIT ### From e4c2558fa3436ddd96b490a3991435d1a15846f1 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 23 Apr 2024 08:23:54 -0700 Subject: [PATCH 16/64] random cleanup --- runners/cs_runner/download_fema100.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index fee84dd..8732b0e 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -44,7 +44,7 @@ FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) # ------------------------------------------------------------------------------------- # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet -for (key in FEMA_BUCKET_KEYS[3:length(FEMA_BUCKET_KEYS)]) { +for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) { local_save_path <- paste0(FEMA_FGB_PATH, "/", key) if(!file.exists(local_save_path)) { @@ -54,9 +54,11 @@ for (key in FEMA_BUCKET_KEYS[3:length(FEMA_BUCKET_KEYS)]) { message("Downloading S3 object to:\n > '", local_save_path, "'") # message("Copy command:\n > '", copy_cmd, "'") - system(copy_cmd) + # system(copy_cmd) message(" > '", key, "' download complete!") message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") } } From 0b290ce2060d25c68a70b6d34cdf005a0e7b9cfc Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 23 Apr 2024 13:13:11 -0700 Subject: [PATCH 17/64] basic layout for fema processing --- runners/cs_runner/00_fema.R | 100 ++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 runners/cs_runner/00_fema.R diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R new file mode 100644 index 0000000..6348dc9 --- /dev/null +++ b/runners/cs_runner/00_fema.R @@ -0,0 +1,100 @@ +library(dplyr) + +# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU +source("runners/cs_runner/config.R") + +# # # # load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) +# install.packages("devtools") + +# transect bucket prefix +transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") + +# paths to nextgen datasets and model attribute parquet files +nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +FEMA_files <- list.files(FEMA_FGB_PATH, full.names = FALSE) +transects_files <- list.files(transects_dir, full.names = FALSE) + +transects_files <- transects_files[!grepl("updated", transects_files)] + +# string to fill in "cs_source" column in output datasets +net_source <- "hydrofabric3D" + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = nextgen_files, + y = transects_files, + base = base_dir +) + +path_df + +us_states <- + USAboundaries::us_states() %>% + sf::st_transform(5070) + +# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket +for(i in 1:nrow(path_df)) { + + i = 8 + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(nextgen_dir, nextgen_file) + + transect_file <- path_df$y[i] + transect_path <- paste0(transects_dir, transect_file) + + transect_path + + # # model attributes file and full path + # model_attr_file <- path_df$y[i] + # model_attr_path <- paste0(model_attr_dir, model_attr_file) + + message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + flines_bb <- + flines %>% + sf::st_bbox() %>% + sf::st_as_sfc() %>% + sf::st_as_sf() + + transects <- sf::read_sf(transect_path) + + + # find the states intersecting with the given VPU flowlines + intersecting_states <- + sf::st_intersection(us_states, flines_bb) %>% + sf::st_drop_geometry() %>% + .$name %>% + gsub(" ", "-", .) + + # get the matching FEMA floodplain FGB file names + matching_fema_files <- unlist(lapply(intersecting_states, function(state_name) { + FEMA_files[grepl(state_name, FEMA_files)] + })) + + # full paths + files_of_interest <- paste0(FEMA_FGB_PATH, "/", matching_fema_files) + + # Iterate over each FEMA file and determine optimal widths for cross sections..... + # for (file in rev(files_of_interest)) { + + fema_fgb <- + file %>% + sf::read_sf() %>% + sf::st_transform(5070) + + fema_bb <- + fema_fgb %>% + sf::st_bbox() %>% + sf::st_as_sfc() %>% + sf::st_as_sf() + + + } \ No newline at end of file From f7815262fd86fa8fc323c2c09d9b89a190e776dd Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 25 Apr 2024 13:41:15 -0700 Subject: [PATCH 18/64] added code to simplfiy, dissolve, and explode fema geometries via mapshaper CLI, currently in the download_fema.R script but going to move it to its own script --- runners/cs_runner/00_fema.R | 138 +++++++++++++++++- runners/cs_runner/config_vars.R | 10 ++ runners/cs_runner/download_fema100.R | 202 ++++++++++++++++++++++++++- 3 files changed, 341 insertions(+), 9 deletions(-) diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R index 6348dc9..c2dfee9 100644 --- a/runners/cs_runner/00_fema.R +++ b/runners/cs_runner/00_fema.R @@ -1,5 +1,58 @@ library(dplyr) +source("runners/cs_runner/config.R") + +# transect bucket prefix +transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") + +# paths to nextgen datasets and model attribute parquet files +nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +FEMA_FGB_files <- list.files(FEMA_FGB_PATH, full.names = TRUE) +FEMA_files <- list.files(FEMA_DISSOLVED_PATH, full.names = TRUE) +# FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE) + +transects_files <- list.files(transects_dir, full.names = FALSE) +transects_files <- transects_files[!grepl("updated", transects_files)] + +# string to fill in "cs_source" column in output datasets +net_source <- "hydrofabric3D" + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = nextgen_files, + y = transects_files, + base = base_dir +) + +############################################################# +######################################################################## +# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket +for(i in 1:nrow(path_df)) { + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(nextgen_dir, nextgen_file) + + transect_file <- path_df$y[i] + transect_path <- paste0(transects_dir, transect_file) + + transect_path + + # # model attributes file and full path + # model_attr_file <- path_df$y[i] + # model_attr_path <- paste0(model_attr_dir, model_attr_file) + # FEMA_BB_files + + message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + FEMA_files + + fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA100_dissolved/Tennessee-100yr-flood_valid_dissolved.geojson") + plot(fema$geometry) + + } + +library(dplyr) + # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") @@ -15,8 +68,9 @@ transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files nextgen_files <- list.files(nextgen_dir, full.names = FALSE) FEMA_files <- list.files(FEMA_FGB_PATH, full.names = FALSE) -transects_files <- list.files(transects_dir, full.names = FALSE) +FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE) +transects_files <- list.files(transects_dir, full.names = FALSE) transects_files <- transects_files[!grepl("updated", transects_files)] # string to fill in "cs_source" column in output datasets @@ -29,14 +83,35 @@ path_df <- align_files_by_vpu( base = base_dir ) -path_df - -us_states <- - USAboundaries::us_states() %>% - sf::st_transform(5070) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { + + i = 8 + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(nextgen_dir, nextgen_file) + + # transect_file <- path_df$y[i] + # transect_path <- paste0(transects_dir, transect_file) + # transect_path + + # # model attributes file and full path + # model_attr_file <- path_df$y[i] + # model_attr_path <- paste0(model_attr_dir, model_attr_file) + FEMA_BB_files + + message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + +} + +# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket +# for(i in 1:nrow(path_df)) { i = 8 @@ -52,6 +127,7 @@ for(i in 1:nrow(path_df)) { # # model attributes file and full path # model_attr_file <- path_df$y[i] # model_attr_path <- paste0(model_attr_dir, model_attr_file) + FEMA_BB_files message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") @@ -85,6 +161,7 @@ for(i in 1:nrow(path_df)) { # Iterate over each FEMA file and determine optimal widths for cross sections..... # for (file in rev(files_of_interest)) { + file = "/Users/anguswatters/Desktop/lynker-spatial/FEMA100/Tennessee-100yr-flood_valid.fgb" fema_fgb <- file %>% sf::read_sf() %>% @@ -95,6 +172,53 @@ for(i in 1:nrow(path_df)) { sf::st_bbox() %>% sf::st_as_sfc() %>% sf::st_as_sf() + + # fline_subset <- + # flines %>% + # sf::st_intersection(fema_fgb) + + fline_fema_intersects <- sf::st_intersects(flines, fema_fgb) + fema_fline_intersects <- sf::st_intersects(fema_fgb, flines) + fema_subset$FLD_AR_ID %>% unique() %>% length() + + fema_subset <- + fema_fgb[unlist(fema_fline_intersects), ] %>% + rmapshaper::ms_simplify() + + fema_subset$FLD_AR_ID %>% unique() %>% length() + + fema_subset <- fema_subset %>% + rmapshaper::ms_dissolve(field = "FLD_AR_ID") + + + fema_subset %>% mapview::npts() + unlist(flines_with_fema)[1] + fema_fgb + + flines_with_fema <- flines[lengths(fline_fema_intersects) > 0,] + + + # flines %>% + # sf::st_filter( + # fema_fgb, + # .predicate = st_touches + # ) + fema_subset %>% + dplyr::filter() + fema_subset %>% mapview::npts() + + transects_subset <- + transects %>% + dplyr::filter(hy_id %in% flines_with_fema$id) + + flines %>% sf::st_crs() + mapview::mapview(flines) + fema_bb + + # mapview::mapview(fema_fgb, col.regions = "dodgerblue") + + mapview::mapview(fema_subset, col.regions = "dodgerblue") + + mapview::mapview(transects_subset, color = "red") + + mapview::mapview(flines_with_fema, color = "green") + message("file: ", file) - } \ No newline at end of file + # } \ No newline at end of file diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 6d9e087..3019047 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -22,6 +22,16 @@ FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX) # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100") +FEMA_GEOJSON_PATH <- paste0(base_dir, "/FEMA100_geojson") +FEMA_CLEAN_PATH <- paste0(base_dir, "/FEMA100_clean") +FEMA_GPKG_PATH <- paste0(base_dir, "/FEMA100_gpkg") +FEMA_GPKG_BB_PATH <- paste0(base_dir, "/FEMA100_bounding_box") + +# TODO: these can be deleted +FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified") +FEMA_DISSOLVED_PATH <- paste0(base_dir, "/FEMA100_dissolved") +FEMA_EXPLODED_PATH <- paste0(base_dir, "/FEMA100_exploded") + # ---------------------------------------------------------------------------- # ---- Cross section point extraction constant variables ---- diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index 8732b0e..b7d745a 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -7,14 +7,61 @@ source("runners/cs_runner/config_vars.R") # ------------------------------------------------------------------------------------- -# ---- Create FEMA100/ directory (if it does NOT exist) ---- +# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ---- # ------------------------------------------------------------------------------------- - +# create FEMA FGB directory (if not exists) if (!dir.exists(FEMA_FGB_PATH)) { message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'")) dir.create(FEMA_FGB_PATH) } +# create geojsons directory (if not exists) +if (!dir.exists(FEMA_GEOJSON_PATH)) { + message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'")) + dir.create(FEMA_GEOJSON_PATH) +} + +# create directory for cleaned FEMA geometries (if not exists) +if (!dir.exists(FEMA_CLEAN_PATH)) { + message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'")) + dir.create(FEMA_CLEAN_PATH) +} + +# create directory for cleaned FEMA geometries as geopackages (if not exists) +if (!dir.exists(FEMA_GPKG_PATH)) { + message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) + dir.create(FEMA_GPKG_PATH) +} + +# create simplified geojsons directory (if not exists) +if (!dir.exists(FEMA_SIMPLIFIED_PATH)) { + message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'")) + dir.create(FEMA_SIMPLIFIED_PATH) +} + +# create simplified geojsons directory (if not exists) +if (!dir.exists(FEMA_DISSOLVED_PATH)) { + message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'")) + dir.create(FEMA_DISSOLVED_PATH) +} + +# create exploded geojsons directory (if not exists) +if (!dir.exists(FEMA_EXPLODED_PATH)) { + message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'")) + dir.create(FEMA_EXPLODED_PATH) +} + +# create FEMA GPKG Bounding Boxes directory (if not exists) +if (!dir.exists(FEMA_GPKG_BB_PATH)) { + message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) + dir.create(FEMA_GPKG_BB_PATH) +} + + +# ------------------------------------------------------------------------------------- +# ---- Get list of FEMA FGB files in S3 bucket ---- +# ------------------------------------------------------------------------------------- + # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern fema_list_command <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information @@ -62,3 +109,154 @@ for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) { message("File already exists at:\n > '", local_save_path, "'") } } +# ------------------------------------------------------------------------------------- +# ---- Run ogr2ogr to get FGB files into geojson ---- +# ------------------------------------------------------------------------------------- + +for (key in FEMA_BUCKET_KEYS) { + + local_fema_path <- paste0(FEMA_FGB_PATH, "/", key) + + geojson_filename <- gsub(".fgb", ".geojson", key) + geojson_save_path <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename) + + message("S3 Key: '", key, "'") + message("Converting \n > '", key, "' to geojson '", geojson_filename, "'") + + ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) + + system(ogr2ogr_command) + + message("Saved '", geojson_filename, "' saved to: \n > '", geojson_save_path, "'") + message() +} + +# ------------------------------------------------------------------------------------- +# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ---- +# ------------------------------------------------------------------------------------- + +# paths to FEMA 100 year flood plain files +FEMA_geojson_paths <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE) +# FEMA_BB_paths <- list.files(FEMA_GPKG_BB_PATH, full.names = TRUE) + +for (fema_file in FEMA_geojson_paths) { + message("Fema 100 year flood plain:\n > '", basename(fema_file), "'") + # message("Fema 100 year flood plain:\n > '", fema_file, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(fema_file)) + output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename) + + message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") + + # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path) + # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, + ' -simplify 0.15 visvalingam \\', + ' -dissolve \\', + ' -explode \\', + ' -o ', output_path + ) + system(mapshaper_command) + message("Mapshaper command: ", mapshaper_command) + message() +} + +# ------------------------------------------------------------------------------------- +# ---- Convert cleaned FEMA geometries to geopackages ---- +# ------------------------------------------------------------------------------------- + +# paths to FEMA 100 year flood plain files +FEMA_clean_paths <- list.files(FEMA_CLEAN_PATH, full.names = TRUE) + +for (fema_file in FEMA_clean_paths) { + message("Fema 100 year flood plain:\n > '", basename(fema_file), "'") + # message("Fema 100 year flood plain:\n > '", fema_file, "'") + output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file)) + output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) + + message("Converting geojson files to gpkg...") + + message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'") + + # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path) + # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" + ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file) + + # system(ogr2ogr_command) + + message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'") + message() +} + +# # ------------------------------------------------------------------------------------- +# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries ---- +# # ------------------------------------------------------------------------------------- +# +# # paths to FEMA 100 year flood plain files +# FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) +# +# for (fema_file in FEMA_gpkg_paths) { +# message("Applying final cleaning process to:\n > '", basename(fema_file), "'") +# +# fema <- sf::read_sf(fema_file) +# +# fema +# +# # message("Fema 100 year flood plain:\n > '", fema_file, "'") +# output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file)) +# output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) +# +# message("Converting geojson files to gpkg...") +# +# message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'") +# +# # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path) +# # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" +# ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file) +# +# system(ogr2ogr_command) +# +# message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'") +# message() +# } + +# ------------------------------------------------------------------------------------- +# ---- Generate bounding box gpkg for each FEMA FGB ---- +# ------------------------------------------------------------------------------------- + +for (key in FEMA_BUCKET_KEYS) { + + local_fema_path <- paste0(FEMA_FGB_PATH, "/", key) + + gpkg_filename <- gsub(".fgb", "_bb.gpkg", key) + bb_save_path <- paste0(FEMA_GPKG_BB_PATH, "/", gpkg_filename) + + message("S3 Key: '", key, "'") + message("Local FEMA file:\n > '", local_fema_path, "'") + message("Local output FEMA bounding box file:\n > '", bb_save_path, "'") + + # fema <- sf::read_sf(local_fema_path) + + fema_bb <- + local_fema_path %>% + sf::read_sf() %>% + sf::st_bbox() %>% + sf::st_as_sfc() %>% + sf::st_as_sf() %>% + dplyr::mutate( + fema_fgb = key, + fema_fgb_path = local_fema_path, + state = gsub("-100yr-flood_valid.fgb", "", key) + ) %>% + dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% + sf::st_transform(5070) + + message("Saving FEMA bounding box file:\n > '", bb_save_path, "'") + + sf::write_sf(fema_bb, bb_save_path) + message() +} + + + + + From d49133e09c88028b5fa7a738fd6c31a44914e15c Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 26 Apr 2024 14:38:57 -0700 Subject: [PATCH 19/64] added code to apply clean_geometries from hydrofab --- runners/cs_runner/download_fema100.R | 63 +++++++++++++++------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index b7d745a..c8a8dc2 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -166,6 +166,7 @@ for (fema_file in FEMA_geojson_paths) { # paths to FEMA 100 year flood plain files FEMA_clean_paths <- list.files(FEMA_CLEAN_PATH, full.names = TRUE) +# rmapshaper::ms_explode() for (fema_file in FEMA_clean_paths) { message("Fema 100 year flood plain:\n > '", basename(fema_file), "'") @@ -181,8 +182,8 @@ for (fema_file in FEMA_clean_paths) { # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file) - # system(ogr2ogr_command) - + system(ogr2ogr_command) + # message("ogr2ogr: ", ogr2ogr_command) message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'") message() } @@ -191,33 +192,37 @@ for (fema_file in FEMA_clean_paths) { # # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries ---- # # ------------------------------------------------------------------------------------- # -# # paths to FEMA 100 year flood plain files -# FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) -# -# for (fema_file in FEMA_gpkg_paths) { -# message("Applying final cleaning process to:\n > '", basename(fema_file), "'") -# -# fema <- sf::read_sf(fema_file) -# -# fema -# -# # message("Fema 100 year flood plain:\n > '", fema_file, "'") -# output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file)) -# output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) -# -# message("Converting geojson files to gpkg...") -# -# message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'") -# -# # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path) -# # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" -# ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file) -# -# system(ogr2ogr_command) -# -# message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'") -# message() -# } +# paths to FEMA 100 year flood plain files +FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +for (fema_file in FEMA_gpkg_paths) { + message("Applying final cleaning process to:\n > '", basename(fema_file), "'") + + fema <- + fema_file %>% + sf::read_sf() %>% + sf::st_transform(5070)%>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) %>% + dplyr::relocate(fema_id) + + fema_clean <- hydrofab::clean_geometry(catchments = sf::st_cast(fema, "POLYGON"), ID = "fema_id") + # mapview::mapview(fema, col.region = "red") + mapview::mapview(fema_clean, col.region = "green") + + # message("Fema 100 year flood plain:\n > '", fema_file, "'") + # output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file)) + # output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) + + message("Applying hydrofab::clean_geometry() \n > '", fema_file) + sf::write_sf( + fema_clean, + fema_file + ) + + message("Rewritting '", fema_file, "'") + message() +} # ------------------------------------------------------------------------------------- # ---- Generate bounding box gpkg for each FEMA FGB ---- From 633109ba9b78d5f16a25eb25fd8b0f7790d75a0a Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 30 Apr 2024 09:14:49 -0700 Subject: [PATCH 20/64] created a preprocess_fema.R script in cs_runner/ that takes FEMA FGBs and simplifies, and cleans FGBs and outputs cleaned GPKGs for use with nextgen flowlines and transect generation --- runners/cs_runner/00_fema.R | 86 +-------- runners/cs_runner/config_vars.R | 8 +- runners/cs_runner/download_fema100.R | 156 +---------------- runners/cs_runner/preprocess_fema.R | 249 +++++++++++++++++++++++++++ 4 files changed, 264 insertions(+), 235 deletions(-) create mode 100644 runners/cs_runner/preprocess_fema.R diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R index c2dfee9..5835ff3 100644 --- a/runners/cs_runner/00_fema.R +++ b/runners/cs_runner/00_fema.R @@ -1,58 +1,5 @@ library(dplyr) -source("runners/cs_runner/config.R") - -# transect bucket prefix -transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") - -# paths to nextgen datasets and model attribute parquet files -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -FEMA_FGB_files <- list.files(FEMA_FGB_PATH, full.names = TRUE) -FEMA_files <- list.files(FEMA_DISSOLVED_PATH, full.names = TRUE) -# FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE) - -transects_files <- list.files(transects_dir, full.names = FALSE) -transects_files <- transects_files[!grepl("updated", transects_files)] - -# string to fill in "cs_source" column in output datasets -net_source <- "hydrofabric3D" - -# ensure the files are in the same order and matched up by VPU -path_df <- align_files_by_vpu( - x = nextgen_files, - y = transects_files, - base = base_dir -) - -############################################################# -######################################################################## -# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket -for(i in 1:nrow(path_df)) { - # nextgen file and full path - nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) - - transect_file <- path_df$y[i] - transect_path <- paste0(transects_dir, transect_file) - - transect_path - - # # model attributes file and full path - # model_attr_file <- path_df$y[i] - # model_attr_path <- paste0(model_attr_dir, model_attr_file) - # FEMA_BB_files - - message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") - # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") - FEMA_files - - fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA100_dissolved/Tennessee-100yr-flood_valid_dissolved.geojson") - plot(fema$geometry) - - } - -library(dplyr) - # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") @@ -68,8 +15,7 @@ transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files nextgen_files <- list.files(nextgen_dir, full.names = FALSE) FEMA_files <- list.files(FEMA_FGB_PATH, full.names = FALSE) -FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE) - +FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE) transects_files <- list.files(transects_dir, full.names = FALSE) transects_files <- transects_files[!grepl("updated", transects_files)] @@ -83,32 +29,11 @@ path_df <- align_files_by_vpu( base = base_dir ) +path_df -# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket -for(i in 1:nrow(path_df)) { - - i = 8 - - # nextgen file and full path - nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) - - # transect_file <- path_df$y[i] - # transect_path <- paste0(transects_dir, transect_file) - # transect_path - - # # model attributes file and full path - # model_attr_file <- path_df$y[i] - # model_attr_path <- paste0(model_attr_dir, model_attr_file) - FEMA_BB_files - - message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") - # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") - - # read in nextgen data - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - -} +us_states <- + USAboundaries::us_states() %>% + sf::st_transform(5070) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket # for(i in 1:nrow(path_df)) { @@ -127,7 +52,6 @@ for(i in 1:nrow(path_df)) { # # model attributes file and full path # model_attr_file <- path_df$y[i] # model_attr_path <- paste0(model_attr_dir, model_attr_file) - FEMA_BB_files message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 3019047..71233c9 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -25,12 +25,12 @@ FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100") FEMA_GEOJSON_PATH <- paste0(base_dir, "/FEMA100_geojson") FEMA_CLEAN_PATH <- paste0(base_dir, "/FEMA100_clean") FEMA_GPKG_PATH <- paste0(base_dir, "/FEMA100_gpkg") -FEMA_GPKG_BB_PATH <- paste0(base_dir, "/FEMA100_bounding_box") +FEMA_GPKG_BB_PATH <- paste0(base_dir, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet # TODO: these can be deleted -FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified") -FEMA_DISSOLVED_PATH <- paste0(base_dir, "/FEMA100_dissolved") -FEMA_EXPLODED_PATH <- paste0(base_dir, "/FEMA100_exploded") +# FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified") +# FEMA_DISSOLVED_PATH <- paste0(base_dir, "/FEMA100_dissolved") +# FEMA_EXPLODED_PATH <- paste0(base_dir, "/FEMA100_exploded") # ---------------------------------------------------------------------------- diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index c8a8dc2..0606142 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -9,7 +9,7 @@ source("runners/cs_runner/config_vars.R") # ------------------------------------------------------------------------------------- # ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ---- # ------------------------------------------------------------------------------------- -# create FEMA FGB directory (if not exists) + if (!dir.exists(FEMA_FGB_PATH)) { message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'")) dir.create(FEMA_FGB_PATH) @@ -57,6 +57,10 @@ if (!dir.exists(FEMA_GPKG_BB_PATH)) { dir.create(FEMA_GPKG_BB_PATH) } +if (!dir.exists(FEMA_FGB_BB_PATH)) { + message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'")) + dir.create(FEMA_FGB_BB_PATH) +} # ------------------------------------------------------------------------------------- # ---- Get list of FEMA FGB files in S3 bucket ---- @@ -91,7 +95,7 @@ FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) # ------------------------------------------------------------------------------------- # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet -for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) { +for (key in FEMA_BUCKET_KEYS) { local_save_path <- paste0(FEMA_FGB_PATH, "/", key) if(!file.exists(local_save_path)) { @@ -109,157 +113,9 @@ for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) { message("File already exists at:\n > '", local_save_path, "'") } } -# ------------------------------------------------------------------------------------- -# ---- Run ogr2ogr to get FGB files into geojson ---- -# ------------------------------------------------------------------------------------- - -for (key in FEMA_BUCKET_KEYS) { - - local_fema_path <- paste0(FEMA_FGB_PATH, "/", key) - - geojson_filename <- gsub(".fgb", ".geojson", key) - geojson_save_path <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename) - - message("S3 Key: '", key, "'") - message("Converting \n > '", key, "' to geojson '", geojson_filename, "'") - - ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) - - system(ogr2ogr_command) - - message("Saved '", geojson_filename, "' saved to: \n > '", geojson_save_path, "'") - message() -} - -# ------------------------------------------------------------------------------------- -# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ---- -# ------------------------------------------------------------------------------------- - -# paths to FEMA 100 year flood plain files -FEMA_geojson_paths <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE) -# FEMA_BB_paths <- list.files(FEMA_GPKG_BB_PATH, full.names = TRUE) - -for (fema_file in FEMA_geojson_paths) { - message("Fema 100 year flood plain:\n > '", basename(fema_file), "'") - # message("Fema 100 year flood plain:\n > '", fema_file, "'") - output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(fema_file)) - output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename) - - message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") - - # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path) - # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" - mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, - ' -simplify 0.15 visvalingam \\', - ' -dissolve \\', - ' -explode \\', - ' -o ', output_path - ) - system(mapshaper_command) - message("Mapshaper command: ", mapshaper_command) - message() -} -# ------------------------------------------------------------------------------------- -# ---- Convert cleaned FEMA geometries to geopackages ---- -# ------------------------------------------------------------------------------------- -# paths to FEMA 100 year flood plain files -FEMA_clean_paths <- list.files(FEMA_CLEAN_PATH, full.names = TRUE) -# rmapshaper::ms_explode() -for (fema_file in FEMA_clean_paths) { - message("Fema 100 year flood plain:\n > '", basename(fema_file), "'") - # message("Fema 100 year flood plain:\n > '", fema_file, "'") - output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file)) - output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) - - message("Converting geojson files to gpkg...") - - message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'") - - # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path) - # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson" - ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file) - - system(ogr2ogr_command) - # message("ogr2ogr: ", ogr2ogr_command) - message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'") - message() -} - -# # ------------------------------------------------------------------------------------- -# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries ---- -# # ------------------------------------------------------------------------------------- -# -# paths to FEMA 100 year flood plain files -FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) - -for (fema_file in FEMA_gpkg_paths) { - message("Applying final cleaning process to:\n > '", basename(fema_file), "'") - - fema <- - fema_file %>% - sf::read_sf() %>% - sf::st_transform(5070)%>% - dplyr::mutate( - fema_id = 1:dplyr::n() - ) %>% - dplyr::relocate(fema_id) - - fema_clean <- hydrofab::clean_geometry(catchments = sf::st_cast(fema, "POLYGON"), ID = "fema_id") - # mapview::mapview(fema, col.region = "red") + mapview::mapview(fema_clean, col.region = "green") - - # message("Fema 100 year flood plain:\n > '", fema_file, "'") - # output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file)) - # output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) - - message("Applying hydrofab::clean_geometry() \n > '", fema_file) - sf::write_sf( - fema_clean, - fema_file - ) - - message("Rewritting '", fema_file, "'") - message() -} - -# ------------------------------------------------------------------------------------- -# ---- Generate bounding box gpkg for each FEMA FGB ---- -# ------------------------------------------------------------------------------------- - -for (key in FEMA_BUCKET_KEYS) { - - local_fema_path <- paste0(FEMA_FGB_PATH, "/", key) - - gpkg_filename <- gsub(".fgb", "_bb.gpkg", key) - bb_save_path <- paste0(FEMA_GPKG_BB_PATH, "/", gpkg_filename) - - message("S3 Key: '", key, "'") - message("Local FEMA file:\n > '", local_fema_path, "'") - message("Local output FEMA bounding box file:\n > '", bb_save_path, "'") - - # fema <- sf::read_sf(local_fema_path) - - fema_bb <- - local_fema_path %>% - sf::read_sf() %>% - sf::st_bbox() %>% - sf::st_as_sfc() %>% - sf::st_as_sf() %>% - dplyr::mutate( - fema_fgb = key, - fema_fgb_path = local_fema_path, - state = gsub("-100yr-flood_valid.fgb", "", key) - ) %>% - dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% - sf::st_transform(5070) - - message("Saving FEMA bounding box file:\n > '", bb_save_path, "'") - - sf::write_sf(fema_bb, bb_save_path) - message() -} diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R new file mode 100644 index 0000000..aa5c9aa --- /dev/null +++ b/runners/cs_runner/preprocess_fema.R @@ -0,0 +1,249 @@ +# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3 +# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages + +# Processing steps: +# - Convert FGBs to GEOJSON (via ogr2ogr) +# - Simplifies +# - Dissolves +# - Explodes +# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr) +# - Apply hydrofab::clean_geometry() +# - Partition FEMA 100 geometries by VPU # TODO still +# - Get FEMA bounding box geometries (maybe) + +# load config variables +source("runners/cs_runner/config_vars.R") + +library(dplyr) +library(sf) + +# ------------------------------------------------------------------------------------- +# ---- OVERWRITE_FEMA_FILES constant logical ---- +# ---- > if TRUE, processing steps will be run again +# and overwrite existing previously processed files +# ------------------------------------------------------------------------------------- + +# Default is TRUE (i.e. a fresh processing run is done from start to finish) +OVERWRITE_FEMA_FILES <- TRUE + +# ------------------------------------------------------------------------------------- +# ---- Create directories (if they do NOT exist) ---- +# ------------------------------------------------------------------------------------- + +# create geojsons directory (if not exists) +if (!dir.exists(FEMA_GEOJSON_PATH)) { + message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'")) + dir.create(FEMA_GEOJSON_PATH) +} + +# create directory for cleaned FEMA geometries (if not exists) +if (!dir.exists(FEMA_CLEAN_PATH)) { + message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'")) + dir.create(FEMA_CLEAN_PATH) +} + +# create directory for cleaned FEMA geometries as geopackages (if not exists) +if (!dir.exists(FEMA_GPKG_PATH)) { + message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) + dir.create(FEMA_GPKG_PATH) +} + +# create FEMA GPKG Bounding Boxes directory (if not exists) +if (!dir.exists(FEMA_GPKG_BB_PATH)) { + message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) + dir.create(FEMA_GPKG_BB_PATH) +} + +# ------------------------------------------------------------------------------------- +# ---- Get paths to downloaded FEMA 100 FGBs ---- +# ------------------------------------------------------------------------------------- + +FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) +FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) + +# ------------------------------------------------------------------------------------- +# ---- Run ogr2ogr to get FGB files into geojson ---- +# ------------------------------------------------------------------------------------- + +for (file in FEMA_FILENAMES) { + + local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) + + geojson_filename <- gsub(".fgb", ".geojson", file) + geojson_save_path <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename) + + message("FEMA filename: '", file, "'") + message("Converting \n > '", file, "' to geojson '", geojson_filename, "'") + + geojson_exists <- file.exists(geojson_save_path) + + message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + # ogr2ogr command converting FGBs to GEOJSON for mapshaper processing + ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) + + if (OVERWRITE_FEMA_FILES) { + system(ogr2ogr_command) + message("Writting '", geojson_filename, "' to: \n > '", geojson_save_path, "'") + } + + message() +} + +# ------------------------------------------------------------------------------------- +# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ---- +# ------------------------------------------------------------------------------------- + +# paths to FEMA 100 year flood plain files +FEMA_geojson_paths <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE) + +for (file in FEMA_geojson_paths) { + + message("Simplify, dissolve, explode > '", basename(file), "'") + # message("Fema 100 year flood plain:\n > '", file, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(file)) + output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename) + + clean_geojson_exists <- file.exists(output_path) + message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, + ' -simplify 0.15 visvalingam \\', + ' -dissolve \\', + ' -explode \\', + ' -o ', output_path + ) + + if (OVERWRITE_FEMA_FILES) { + message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") + system(mapshaper_command) + message("Writting '", output_clean_filename, "' to: \n > '", output_path, "'") + } + + message() +} + +# ------------------------------------------------------------------------------------- +# ---- Convert cleaned FEMA geometries to geopackages ---- +# ------------------------------------------------------------------------------------- + +# paths to FEMA 100 year flood plain files +FEMA_clean_paths <- list.files(FEMA_CLEAN_PATH, full.names = TRUE) + +for (file in FEMA_clean_paths) { + message("Fema 100 year flood plain:\n > '", basename(file), "'") + + output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(file)) + output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) + + message("Converting GEOJSON file to GPKG:\n > '", basename(file), "' > '", output_gpkg_filename, "'") + + # system(ogr2ogr_command) + + clean_gpkg_exists <- file.exists(output_path) + + message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file) + # ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file) + + if (OVERWRITE_FEMA_FILES) { + system(ogr2ogr_command) + message("Writting '", output_gpkg_filename, "' to: \n > '", output_path, "'") + } + message() +} + +# # ------------------------------------------------------------------------------------- +# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries ---- +# # ------------------------------------------------------------------------------------- +# +# paths to FEMA 100 year flood plain files +FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +for (file_path in FEMA_gpkg_paths) { + message("Applying hydrofab::clean_geometry() to:\n > '", basename(file_path), "'") + + fema <- + file_path %>% + sf::read_sf() %>% + sf::st_transform(5070) %>% + sf::st_cast("POLYGON") %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) %>% + dplyr::select(fema_id, geometry = geom) + + message(" > ", nrow(fema), " POLYGONs") + message("Start time: ", Sys.time()) + + fema_clean <- hydrofab::clean_geometry( + catchments = fema, + ID = "fema_id" + ) + + fema_clean <- + fema_clean %>% + dplyr::mutate( + source = basename(file_path), + state = gsub("-100yr-flood_valid_clean.gpkg", "", source) + ) %>% + dplyr::select(fema_id, source, state, areasqkm, geometry) + + message("End time: ", Sys.time()) + + # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ]) + # mapview::mapview(fema[1, ], col.regions = "red") + + # mapview::mapview(fema_clean[1, ], col.regions = "green") + + # mapview::mapview(geom_diff, col.regions = "white") + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") + sf::write_sf( + fema_clean, + file_path + ) + } + message() + +} + +# ------------------------------------------------------------------------------------- +# ---- Generate bounding box gpkg for each FEMA FGB ---- +# ------------------------------------------------------------------------------------- + +for (key in FEMA_FILENAMES) { + + local_fema_path <- paste0(FEMA_FGB_PATH, "/", key) + + gpkg_filename <- gsub(".fgb", "_bb.gpkg", key) + bb_save_path <- paste0(FEMA_FGB_BB_PATH, "/", gpkg_filename) + + message("S3 Key: '", key, "'") + message("Local FEMA file:\n > '", local_fema_path, "'") + message("Local output FEMA bounding box file:\n > '", bb_save_path, "'") + + # fema <- sf::read_sf(local_fema_path) + + fema_bb <- + local_fema_path %>% + sf::read_sf() %>% + sf::st_bbox() %>% + sf::st_as_sfc() %>% + sf::st_as_sf() %>% + dplyr::mutate( + fema_fgb = key, + fema_fgb_path = local_fema_path, + state = gsub("-100yr-flood_valid.fgb", "", key) + ) %>% + dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% + sf::st_transform(5070) + + message("Saving FEMA bounding box file:\n > '", bb_save_path, "'") + + sf::write_sf(fema_bb, bb_save_path) + message() +} From 703fadfd334dab339a8a32c60e552836a2d18900 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 30 Apr 2024 18:29:53 -0700 Subject: [PATCH 21/64] partioning fema floodplains by vpu code --- runners/cs_runner/config_vars.R | 12 +++ runners/cs_runner/preprocess_fema.R | 151 ++++++++++++++++++++++++++++ runners/cs_runner/utils.R | 28 ++++++ 3 files changed, 191 insertions(+) create mode 100644 runners/cs_runner/utils.R diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 71233c9..7774d5b 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -27,6 +27,18 @@ FEMA_CLEAN_PATH <- paste0(base_dir, "/FEMA100_clean") FEMA_GPKG_PATH <- paste0(base_dir, "/FEMA100_gpkg") FEMA_GPKG_BB_PATH <- paste0(base_dir, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet +FEMA_BY_VPU_PATH <- paste0(base_dir, "/FEMA_BY_VPU") +VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + +FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# FEMA_VPU_SUBFOLDERS <- paste0( +# FEMA_BY_VPU_PATH, "/VPU_", +# unlist( +# lapply(list.files(nextgen_dir, full.names = FALSE), function(vpu_file_names) { +# unlist(regmatches(vpu_file_names, gregexpr("\\d+[A-Za-z]*", vpu_file_names)))}) +# ) +# ) # TODO: these can be deleted # FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified") # FEMA_DISSOLVED_PATH <- paste0(base_dir, "/FEMA100_dissolved") diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index aa5c9aa..00d31e6 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -13,9 +13,12 @@ # load config variables source("runners/cs_runner/config_vars.R") +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") library(dplyr) library(sf) +library(geos) # ------------------------------------------------------------------------------------- # ---- OVERWRITE_FEMA_FILES constant logical ---- @@ -48,6 +51,24 @@ if (!dir.exists(FEMA_GPKG_PATH)) { dir.create(FEMA_GPKG_PATH) } +# create directory for FEMA geomteries partioned by VPU +if (!dir.exists(FEMA_BY_VPU_PATH)) { + message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'")) + dir.create(FEMA_BY_VPU_PATH) +} + +for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) { + # create directory for FEMA geomteries by VPU + if (!dir.exists(VPU_SUBFOLDER)) { + message("Creating FEMA VPU subfolder...") + message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\nCreating directory:\n > '", VPU_SUBFOLDER, "'")) + dir.create(VPU_SUBFOLDER) + } +} + +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + + # create FEMA GPKG Bounding Boxes directory (if not exists) if (!dir.exists(FEMA_GPKG_BB_PATH)) { message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) @@ -211,6 +232,136 @@ for (file_path in FEMA_gpkg_paths) { } +# # ------------------------------------------------------------------------------------- +# # ---- Partion parts of each FEMA GPKGs to the a Nextgen VPU ---- +# # ------------------------------------------------------------------------------------- + +# Clean FEMA GPKG files +FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +# paths to nextgen datasets and model attribute parquet files +NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) +NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) + +for (file_path in FEMA_CLEAN_GPKG_PATHS) { + fema_file <- basename(file_path) + message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'") + + # read in fema polygons + fema <- sf::read_sf(file_path) + + for (nextgen_path in NEXTGEN_FILE_PATHS) { + nextgen_basename <- basename(nextgen_path) + vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) + + message("VPU: ", vpu) + message("- nextgen gpkg:\n > '", nextgen_path, "'") + message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") + + # nextgen_path <- NEXTGEN_FILE_PATHS[13] + + # read in nextgen flowlines + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # get the FEMA polygons that intersect with the nextgen flowlines + fema_intersect <- polygons_with_line_intersects(fema, flines) + + fema_in_nextgen <- nrow(fema_intersect) != 0 + + message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) + + if(fema_in_nextgen) { + + # create filepaths + vpu_subfolder <- paste0("VPU_", vpu) + vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) + # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] + + fema_intersect <- + fema_intersect %>% + dplyr::mutate( + vpu = vpu + ) %>% + dplyr::select(vpu, fema_id, source, state, + areasqkm, geom) + + # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) + + fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) + fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) + + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") + + sf::write_sf( + fema_intersect, + fema_vpu_path + ) + } + + + } + message() + } + + + message( + "--------------------------------------------------------------\n", + "Completed all VPU intersections for: \n > '", fema_file, "'", + "\n--------------------------------------------------------------\n" + ) + +} +# +# text = "nextgen_03W.gpkg" +# VPU <- unlist(regmatches(text, +# gregexpr("\\d+[A-Za-z]*", text) +# )) +# +# fema <- +# file_path %>% +# sf::read_sf() %>% +# sf::st_transform(5070) %>% +# sf::st_cast("POLYGON") %>% +# dplyr::mutate( +# fema_id = 1:dplyr::n() +# ) %>% +# dplyr::select(fema_id, geometry = geom) +# +# message(" > ", nrow(fema), " POLYGONs") +# message("Start time: ", Sys.time()) +# +# fema_clean <- hydrofab::clean_geometry( +# catchments = fema, +# ID = "fema_id" +# ) +# +# fema_clean <- +# fema_clean %>% +# dplyr::mutate( +# source = basename(file_path), +# state = gsub("-100yr-flood_valid_clean.gpkg", "", source) +# ) %>% +# dplyr::select(fema_id, source, state, areasqkm, geometry) +# +# message("End time: ", Sys.time()) +# +# # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ]) +# # mapview::mapview(fema[1, ], col.regions = "red") + +# # mapview::mapview(fema_clean[1, ], col.regions = "green") + +# # mapview::mapview(geom_diff, col.regions = "white") +# +# if (OVERWRITE_FEMA_FILES) { +# message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") +# sf::write_sf( +# fema_clean, +# file_path +# ) +# } +# message() +# +# } # ------------------------------------------------------------------------------------- # ---- Generate bounding box gpkg for each FEMA FGB ---- # ------------------------------------------------------------------------------------- diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R new file mode 100644 index 0000000..ae40b70 --- /dev/null +++ b/runners/cs_runner/utils.R @@ -0,0 +1,28 @@ +#' Get the polygons that interesect with any of the linestring geometries +#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes +#' @param polygons polygon sf object. Default is NULL +#' @param lines linestring sf object. Default is NULL. +#' +#' @return sf dataframe of polygons that intersect with the linestrings +polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) { + + if (is.null(polygons)) { + stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries") + } + + if (is.null(lines)) { + stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries") + } + + # Convert the SF geometries to geos geometries + polygons_geos <- geos::as_geos_geometry(polygons) + lines_geos <- geos::as_geos_geometry(lines) + + # create an index between the polygons and linestrings + lines_index <- geos::geos_intersects_matrix(polygons_geos, lines_geos) + + # get the polygons that have atleast 1 intersection with the 'lines' + polygons_with_lines <- polygons[lengths(lines_index) != 0, ] + + return(polygons_with_lines) +} From 5d36e1ecd35967b5619d5fbdadda0f22f56af51c Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 1 May 2024 13:59:07 -0700 Subject: [PATCH 22/64] small minor cleanups --- runners/cs_runner/02_cs_pts.R | 15 +++------ runners/cs_runner/03_inject_ml.R | 23 ------------- runners/cs_runner/preprocess_fema.R | 51 ----------------------------- 3 files changed, 5 insertions(+), 84 deletions(-) diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 7d5cc8e..4b18b93 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -13,7 +13,6 @@ library(sf) # cross section bucket prefix cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/") -# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") # transect bucket prefix transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") @@ -48,9 +47,8 @@ path_df <- align_files_by_vpu( # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 13:nrow(path_df)) { - - # i = 1 +for (i in 1:nrow(path_df)) { + start <- Sys.time() @@ -102,8 +100,7 @@ for (i in 13:nrow(path_df)) { # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 1: Extract cs points from DEM ---- # ---------------------------------------------------------------------------------------------------------------- - # system.time({ - + # get cross section point elevations cs_pts <- hydrofabric3D::cross_section_pts( cs = transects, @@ -111,7 +108,7 @@ for (i in 13:nrow(path_df)) { min_pts_per_cs = 10, dem = DEM_URL ) - # }) + # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- # ---------------------------------------------------------------------------------------------------------------- @@ -313,9 +310,7 @@ for (i in 13:nrow(path_df)) { ) %>% dplyr::select(-cs_id, -tmp_id) %>% dplyr::rename(cs_id = new_cs_id) - - ###################################### - + # ---------------------------------------------------------------------------------------------------------------- # ---- Cross section points parquet to S3 ---- # ---------------------------------------------------------------------------------------------------------------- diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index cc0ee52..c607e9f 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -35,25 +35,13 @@ path_df <- align_files_by_vpu( # ML Outputs ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) -# cs_ml_data_path <- "/Users/anguswatters/Desktop/cs_pts_for_ml_tests/nextgen_06_cross_sections_for_ml.parquet" -# ml_output_path <- "/Users/anguswatters/Desktop/lynker-spatial/ml-outputs/channel_ml_outputs.parquet" -# conus_network_path <- 's3://lynker-spatial/v20.1/conus_net.parquet' - -# for (i in 1:5000) { -# start <- Sys.time() -# message(i, " - time: '", start, "'") -# } - # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { - # i = 8 - start <- round(Sys.time()) - # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -62,10 +50,6 @@ for (i in 1:nrow(path_df)) { cs_file <- path_df$y[i] cs_pts_path <- paste0(cs_pts_dir, cs_file) - # # model attributes file and full path - # ref_file <- path_df$ref_file[i] - # ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) - # current VPU being processed VPU = path_df$vpu[i] @@ -141,10 +125,6 @@ for (i in 1:nrow(path_df)) { # ---- Subset ML data to specific VPU and add "hy_id" column to ML data ----- # ---------------------------------------------------------------------------------------------------------------- - - # # ML Outputs - # ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) - # Join hy_id onto the ML outputs and then remove the rows WITHOUT matches in hy_id # this should give us a (nearly) one-to-one cross walk between "hy_id" in the cross section points # and "hf_id" in the ML outputs dataset @@ -198,9 +178,6 @@ for (i in 1:nrow(path_df)) { ) %>% dplyr::select(-bottom_length) - # cs_pts %>% dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) - # dplyr::left_join( stream_order, by = "hy_id") - # extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data # TODO: look at this stuff with Arash (04/09/2024) missing_cs <- diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 00d31e6..9e38680 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -258,8 +258,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { message("- nextgen gpkg:\n > '", nextgen_path, "'") message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") - # nextgen_path <- NEXTGEN_FILE_PATHS[13] - # read in nextgen flowlines flines <- sf::read_sf(nextgen_path, layer = "flowpaths") @@ -313,55 +311,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { ) } -# -# text = "nextgen_03W.gpkg" -# VPU <- unlist(regmatches(text, -# gregexpr("\\d+[A-Za-z]*", text) -# )) -# -# fema <- -# file_path %>% -# sf::read_sf() %>% -# sf::st_transform(5070) %>% -# sf::st_cast("POLYGON") %>% -# dplyr::mutate( -# fema_id = 1:dplyr::n() -# ) %>% -# dplyr::select(fema_id, geometry = geom) -# -# message(" > ", nrow(fema), " POLYGONs") -# message("Start time: ", Sys.time()) -# -# fema_clean <- hydrofab::clean_geometry( -# catchments = fema, -# ID = "fema_id" -# ) -# -# fema_clean <- -# fema_clean %>% -# dplyr::mutate( -# source = basename(file_path), -# state = gsub("-100yr-flood_valid_clean.gpkg", "", source) -# ) %>% -# dplyr::select(fema_id, source, state, areasqkm, geometry) -# -# message("End time: ", Sys.time()) -# -# # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ]) -# # mapview::mapview(fema[1, ], col.regions = "red") + -# # mapview::mapview(fema_clean[1, ], col.regions = "green") + -# # mapview::mapview(geom_diff, col.regions = "white") -# -# if (OVERWRITE_FEMA_FILES) { -# message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") -# sf::write_sf( -# fema_clean, -# file_path -# ) -# } -# message() -# -# } # ------------------------------------------------------------------------------------- # ---- Generate bounding box gpkg for each FEMA FGB ---- # ------------------------------------------------------------------------------------- From 59a8d0af49628ccf9ecf6966fc197535e3506f9e Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 2 May 2024 14:42:01 -0700 Subject: [PATCH 23/64] random cleanups --- runners/cs_runner/00_fema.R | 6 ------ runners/cs_runner/config_vars.R | 7 ------- 2 files changed, 13 deletions(-) diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R index 5835ff3..304268f 100644 --- a/runners/cs_runner/00_fema.R +++ b/runners/cs_runner/00_fema.R @@ -3,12 +3,6 @@ library(dplyr) # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") -# # # # load libraries -# library(hydrofabric3D) -# library(dplyr) -# library(sf) -# install.packages("devtools") - # transect bucket prefix transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 7774d5b..47483c9 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -3,7 +3,6 @@ # ---------------------------------------------------------------------------- # ---- General paths and constants variables ---- # ---------------------------------------------------------------------------- - base_dir <- '/Users/anguswatters/Desktop/lynker-spatial' # AWS profile to run CLI commands @@ -31,7 +30,6 @@ FEMA_BY_VPU_PATH <- paste0(base_dir, "/FEMA_BY_VPU") VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) - # FEMA_VPU_SUBFOLDERS <- paste0( # FEMA_BY_VPU_PATH, "/VPU_", # unlist( @@ -39,11 +37,6 @@ FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) # unlist(regmatches(vpu_file_names, gregexpr("\\d+[A-Za-z]*", vpu_file_names)))}) # ) # ) -# TODO: these can be deleted -# FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified") -# FEMA_DISSOLVED_PATH <- paste0(base_dir, "/FEMA100_dissolved") -# FEMA_EXPLODED_PATH <- paste0(base_dir, "/FEMA100_exploded") - # ---------------------------------------------------------------------------- # ---- Cross section point extraction constant variables ---- From d8742c152cf601b69177555a555e563b8d38cabf Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 6 May 2024 11:38:19 -0700 Subject: [PATCH 24/64] added ogr2ogr commands to merge together all femas within a VPU into single gpkgs --- runners/cs_runner/00_fema.R | 1 + runners/cs_runner/add_fema_to_transects.R | 70 +++++++++++++++++++++++ runners/cs_runner/preprocess_fema.R | 44 ++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 runners/cs_runner/add_fema_to_transects.R diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R index 304268f..11f55be 100644 --- a/runners/cs_runner/00_fema.R +++ b/runners/cs_runner/00_fema.R @@ -52,6 +52,7 @@ us_states <- # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + flines_bb <- flines %>% sf::st_bbox() %>% diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R new file mode 100644 index 0000000..5964c1f --- /dev/null +++ b/runners/cs_runner/add_fema_to_transects.R @@ -0,0 +1,70 @@ +library(dplyr) + +# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU +source("runners/cs_runner/config.R") + +# transect bucket prefix +transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") + +# paths to nextgen datasets and model attribute parquet files +nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +# FEMA_files <- list.files(FEMA_FGB_PATH, full.names = FALSE) +# FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE) +transects_files <- list.files(transects_dir, full.names = FALSE) +transects_files <- transects_files[!grepl("updated", transects_files)] + +FEMA_VPU_SUBFOLDERS +# string to fill in "cs_source" column in output datasets +FEMA_files +net_source <- "hydrofabric3D" + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = nextgen_files, + y = transects_files, + base = base_dir +) + +path_df + +us_states <- + USAboundaries::us_states() %>% + sf::st_transform(5070) + +# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket +# for(i in 1:nrow(path_df)) { + + i = 8 + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(nextgen_dir, nextgen_file) + + transect_file <- path_df$y[i] + transect_path <- paste0(transects_dir, transect_file) + + VPU <- path_df$vpu[i] + transect_path + + # # model attributes file and full path + # model_attr_file <- path_df$y[i] + # model_attr_path <- paste0(model_attr_dir, model_attr_file) + + message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + + FEMA_vpu_dir <- FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", VPU), basename(FEMA_VPU_SUBFOLDERS))] + + list.files(FEMA_vpu_dir) + + VPU + + fema <- sf::read_sf() + + + + \ No newline at end of file diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 9e38680..c1d9772 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -311,6 +311,50 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { ) } + +# ------------------------------------------------------------------------------------- +# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- +# ------------------------------------------------------------------------------------- + +DELETE_STAGING_GPKGS <- TRUE + +for (vpu_dir in FEMA_VPU_SUBFOLDERS) { + + message("Merging files in VPU dir '", vpu_dir, "'") + fema_vpu_gpkgs <- list.files(vpu_dir, full.names = TRUE) + + master_name <- paste0("fema_", tolower(basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + + for(gpkg_file in fema_vpu_gpkgs) { + message("- Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", + paste0(vpu_dir, "/", master_gpkg_name), "'") + + ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", + paste0(vpu_dir, "/", master_gpkg_name), + " ", gpkg_file + ) + + if (OVERWRITE_FEMA_FILES) { + system(ogr2ogr_merge_command) + } + + if(DELETE_STAGING_GPKGS) { + message("Deleting individual gpkgs from\n > '", vpu_dir, "'") + + files_to_delete <- fema_vpu_gpkgs[!grepl(master_gpkg_name, fema_vpu_gpkgs)] + remove_gpkg_cmds <- paste0("rm ", files_to_delete) + + for (remove_cmd in remove_gpkg_cmds) { + message(" > '", remove_cmd, "'") + system(remove_cmd) + } + } + message() + } + +} + # ------------------------------------------------------------------------------------- # ---- Generate bounding box gpkg for each FEMA FGB ---- # ------------------------------------------------------------------------------------- From 4d4d0570217c44e8cb52c9def50aaca4688d3ba9 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 6 May 2024 16:10:19 -0700 Subject: [PATCH 25/64] work in progress on extending transects according to fema polygons --- runners/cs_runner/add_fema_to_transects.R | 79 ++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 5964c1f..5cceadb 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -59,12 +59,87 @@ us_states <- FEMA_vpu_dir <- FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", VPU), basename(FEMA_VPU_SUBFOLDERS))] - list.files(FEMA_vpu_dir) + vpu_fema_file <- list.files(FEMA_vpu_dir, full.names = TRUE) + FEMA_vpu_dir VPU - fema <- sf::read_sf() + fema <- sf::read_sf(vpu_fema_file) + + + transects <- sf::read_sf(transect_path) + transects_geos <- geos::as_geos_geometry(transects) + fema_geos <- geos::as_geos_geometry(fema) + + + fema_geos + fema_transects_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) + transects_fema_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) + + fema_transects_matrix + # get the polygons that have atleast 1 intersection with the 'lines' + transects_with_fema <- transects[lengths(fema_transects_matrix) != 0, ] + fema_with_transects <- fema[lengths(transects_fema_matrix) != 0, ] + + lengths(transects_fema_matrix) + mapview::mapview(transects_with_fema, color = "green") + fema_with_transects + unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30] + transects %>% + hydrofabric3D::add_tmp_id(transects) %>% + .$tmp_id %>% + unique() %>% .[1:30] + trans_subset <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30]) + + fema_subset <- + fema %>% + dplyr::filter(fema_id == "1268") + + extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(500, nrow(trans_subset))) + extended + clipped_trans <- rmapshaper::ms_clip(extended, fema) + + rmapshaper::ms_clip(extended, fema_subset) + mapview::mapview(trans_subset, color = "red") + + mapview::mapview(extended, color = "yellow") + + # mapview::mapview( sf::st_difference(extended, fema_subset), color = "green") + + mapview::mapview(clipped_trans, color = "green") + + fema + + rep(50, nrow(trans_subset)) + extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(50, nrow(trans_subset))) + + + hydrofabric3D::geos_extend_line(trans_subset, 50) %>% + sf::st_as_sf() %>% mapview::mapview() + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 89242b264cc2e1457818e346be42d103fe10ac8b Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 7 May 2024 16:01:22 -0700 Subject: [PATCH 26/64] small changes to dir structure for final merged fema polygons --- runners/cs_runner/add_fema_to_transects.R | 17 +++++ runners/cs_runner/preprocess_fema.R | 85 +++++++++++++++++------ 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 5cceadb..1a33558 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -80,6 +80,23 @@ us_states <- fema_transects_matrix # get the polygons that have atleast 1 intersection with the 'lines' + fema_transects_matrix[[557]] + fema_tmp <- fema[fema_transects_matrix[[557]], ] + trans_tmp <- transects[557, ] + + fema_dissolve <- rmapshaper::ms_dissolve(fema_tmp) + fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% + # rmapshaper::ms_dissolve(field = "state") + sf::st_union() + mapview::mapview(trans_tmp, color = "green") + + mapview::mapview(fema_tmp[1, ], col.region = "red") + + mapview::mapview(fema_tmp[2, ], col.region = "dodgerblue") + + mapview::mapview(fema_simple, col.region = "yellow") + transects + + + fema_transects_matrix + transects_with_fema <- transects[lengths(fema_transects_matrix) != 0, ] fema_with_transects <- fema[lengths(transects_fema_matrix) != 0, ] diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index c1d9772..6b6c2f8 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -59,11 +59,32 @@ if (!dir.exists(FEMA_BY_VPU_PATH)) { for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) { # create directory for FEMA geomteries by VPU + # message(VPU_SUBFOLDER) + + state_dir = paste0(VPU_SUBFOLDER, "/states/") + merged_dir = paste0(VPU_SUBFOLDER, "/merged/") + if (!dir.exists(VPU_SUBFOLDER)) { message("Creating FEMA VPU subfolder...") - message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\nCreating directory:\n > '", VPU_SUBFOLDER, "'")) + message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n Creating directory:\n > '", VPU_SUBFOLDER, "'")) dir.create(VPU_SUBFOLDER) } + + if (!dir.exists(state_dir)) { + message("Creating FEMA VPU states subfolder...") + message(paste0("'/", basename(state_dir), "' directory does not exist...\n Creating directory:\n > '", state_dir, "'")) + + dir.create(state_dir) + + } + + if (!dir.exists(merged_dir)) { + message("Creating FEMA VPU merged subfolder...") + message(paste0("'/", basename(merged_dir), "' directory does not exist...\n Creating directory:\n > '", merged_dir, "'")) + + dir.create(merged_dir) + + } } # FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) @@ -242,6 +263,7 @@ FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) # paths to nextgen datasets and model attribute parquet files NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) +# OVERWRITE_FEMA_FILES for (file_path in FEMA_CLEAN_GPKG_PATHS) { fema_file <- basename(file_path) @@ -272,7 +294,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # create filepaths vpu_subfolder <- paste0("VPU_", vpu) - vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) + vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] fema_intersect <- @@ -316,43 +338,62 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- # ------------------------------------------------------------------------------------- -DELETE_STAGING_GPKGS <- TRUE +DELETE_STAGING_GPKGS <- FALSE + +# FEMA_VPU_SUBFOLDERS for (vpu_dir in FEMA_VPU_SUBFOLDERS) { + message("Merging files in '", basename(vpu_dir), "' directory...") + + # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06' + vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) + + states_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/states"), vpu_subdirs)] + merged_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/merged"), vpu_subdirs)] - message("Merging files in VPU dir '", vpu_dir, "'") - fema_vpu_gpkgs <- list.files(vpu_dir, full.names = TRUE) + # fema state geopackages partioned for the specific VPU + fema_state_gpkgs <- list.files(states_dir, full.names = TRUE) - master_name <- paste0("fema_", tolower(basename(vpu_dir))) + master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) master_gpkg_name <- paste0(master_name, ".gpkg") - for(gpkg_file in fema_vpu_gpkgs) { - message("- Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", - paste0(vpu_dir, "/", master_gpkg_name), "'") + # path to the merged directory where the final merged geopackge will end up + master_filepath <- paste0(merged_dir, "/", master_gpkg_name) + + for(gpkg_file in fema_state_gpkgs) { + # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", + # basename(gpkg_file), " > ", basename(master_filepath), + # "'") + message(" > '", + basename(gpkg_file), " > ", basename(master_filepath), + "'") ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", - paste0(vpu_dir, "/", master_gpkg_name), + master_filepath, " ", gpkg_file ) if (OVERWRITE_FEMA_FILES) { system(ogr2ogr_merge_command) } + } + + if(DELETE_STAGING_GPKGS) { + message(" - Deleting individual gpkgs from '/states' directory...") + # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'") - if(DELETE_STAGING_GPKGS) { - message("Deleting individual gpkgs from\n > '", vpu_dir, "'") - - files_to_delete <- fema_vpu_gpkgs[!grepl(master_gpkg_name, fema_vpu_gpkgs)] - remove_gpkg_cmds <- paste0("rm ", files_to_delete) - - for (remove_cmd in remove_gpkg_cmds) { - message(" > '", remove_cmd, "'") - system(remove_cmd) - } + remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) + + for (remove_cmd in remove_gpkg_cmds) { + message(" > '", remove_cmd, "'") + system(remove_cmd) } - message() } - + + # message() + message("Merge complete!") + message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'") + message() } # ------------------------------------------------------------------------------------- From c8a1ff174e28c692103dc9a5c86fa13df19e0e0f Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 8 May 2024 16:30:51 -0700 Subject: [PATCH 27/64] one more round of dissolving and exploding --- runners/cs_runner/preprocess_fema.R | 76 ++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 6b6c2f8..ba74369 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -342,9 +342,13 @@ DELETE_STAGING_GPKGS <- FALSE # FEMA_VPU_SUBFOLDERS -for (vpu_dir in FEMA_VPU_SUBFOLDERS) { +# for (vpu_dir in FEMA_VPU_SUBFOLDERS) { +for (i in 1:4) { + vpu_dir = FEMA_VPU_SUBFOLDERS[i] message("Merging files in '", basename(vpu_dir), "' directory...") +# } + # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06' vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) @@ -396,6 +400,76 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { message() } +# ------------------------------------------------------------------------------------- +# ---- Union each VPU geopackage (either on state or just touching predicate) ---- +# ------------------------------------------------------------------------------------- + +MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged") + +for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { + vpu_dir = FEMA_VPU_SUBFOLDERS[i] + + VPU <- basename(vpu_dir) + + message("Attempting to union FEMA polygons for '", VPU, "'...") + merged_dir <- paste0(vpu_dir, "/merged") + fema_vpu_file <- list.files(merged_dir, full.names = TRUE) + + has_fema_vpu_file <- ifelse(length(fema_vpu_file) > 0, TRUE, FALSE) + # message() + # fema_vpu_file +# } + if(!has_fema_vpu_file) { + message("No FEMA geometries in '", VPU, "'") + message() + next + } + + message("> Re-unioning and re-exploding geometries in '", basename(fema_vpu_file), "'") + + fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)] + + fema_vpu <- sf::read_sf(fema_vpu_file) + + # fema_ids <- c(695) + # fema_vpu <- + # fema_vpu %>% + # dplyr::group_by(source) %>% + # dplyr::summarise() %>% + # dplyr::ungroup() + + # 2633 = old number of polygons + fema_vpu <- rmapshaper::ms_dissolve(fema_vpu, + field = "source", + sys = TRUE, + sys_mem = 16 + ) + fema_vpu <- rmapshaper::ms_explode(fema_vpu, + sys = TRUE, + sys_mem = 16) + + fema_vpu <- + fema_vpu %>% + dplyr::group_by(source) %>% + dplyr::mutate( + state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), + vpu = gsub("VPU_", "", VPU), + fema_id = paste0(state, "_", 1:dplyr::n()) + ) %>% + dplyr::ungroup() %>% + dplyr::relocate(vpu, fema_id, source, state, geom) + + if (OVERWRITE_FEMA_FILES) { + union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) + message("> Writting '", basename(union_file_path), "' (unioned and exploded version)") + sf::write_sf( + fema_vpu, + union_file_path + ) + } + message() +} + # ------------------------------------------------------------------------------------- # ---- Generate bounding box gpkg for each FEMA FGB ---- # ------------------------------------------------------------------------------------- From b34d3b3a28f17a659a635c85866c7edaa13afbc0 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 10 May 2024 11:02:10 -0700 Subject: [PATCH 28/64] added code for splitting up transects to check within FEMA relations and began checking for min extension distances, WIP, needs cleanups --- runners/cs_runner/add_fema_to_transects.R | 557 ++++++++++++++++++++-- 1 file changed, 510 insertions(+), 47 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 1a33558..3ca2eb7 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -1,24 +1,23 @@ library(dplyr) +library(lwgeom) -# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU -source("runners/cs_runner/config.R") +# generate the flowlines layer for the final cross_sections_<vpu>.gpkg for each vpu +source("runners/cs_runner/config.r") # transect bucket prefix -transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") +transects_prefix <- paste0(s3_bucket, version_prefix, "/3d/transects/") # paths to nextgen datasets and model attribute parquet files nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -# FEMA_files <- list.files(FEMA_FGB_PATH, full.names = FALSE) -# FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE) +# fema_files <- list.files(fema_fgb_path, full.names = FALSE) +# fema_bb_files <- list.files(fema_fgb_bb_path, full.names = FALSE) transects_files <- list.files(transects_dir, full.names = FALSE) transects_files <- transects_files[!grepl("updated", transects_files)] -FEMA_VPU_SUBFOLDERS -# string to fill in "cs_source" column in output datasets -FEMA_files -net_source <- "hydrofabric3D" -# ensure the files are in the same order and matched up by VPU +net_source <- "hydrofabric3d" + +# ensure the files are in the same order and matched up by vpu path_df <- align_files_by_vpu( x = nextgen_files, y = transects_files, @@ -27,11 +26,7 @@ path_df <- align_files_by_vpu( path_df -us_states <- - USAboundaries::us_states() %>% - sf::st_transform(5070) - -# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket +# loop over each vpu and generate cross sections, then save locally and upload to s3 bucket # for(i in 1:nrow(path_df)) { i = 8 @@ -43,62 +38,530 @@ us_states <- transect_file <- path_df$y[i] transect_path <- paste0(transects_dir, transect_file) - VPU <- path_df$vpu[i] + vpu <- path_df$vpu[i] transect_path # # model attributes file and full path # model_attr_file <- path_df$y[i] # model_attr_path <- paste0(model_attr_dir, model_attr_file) - message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") - # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + message("creating vpu ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") + # message("creating vpu ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - - FEMA_vpu_dir <- FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", VPU), basename(FEMA_VPU_SUBFOLDERS))] - - vpu_fema_file <- list.files(FEMA_vpu_dir, full.names = TRUE) - - FEMA_vpu_dir - VPU - + fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + fema_vpu_dir + vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) + # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)] + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] + vpu_fema_file + # fema1 <- sf::read_sf(vpu_fema_file1) + # fema2 <- sf::read_sf(vpu_fema_filev2) fema <- sf::read_sf(vpu_fema_file) - + # fema %>% mapview::npts() + # fema2 %>% mapview::npts() + + fema <- + fema %>% + rmapshaper::ms_simplify(keep = 0.10) + fema %>% mapview::npts() + transects <- sf::read_sf(transect_path) transects_geos <- geos::as_geos_geometry(transects) - - fema_geos <- geos::as_geos_geometry(fema) + fema_geos <- geos::as_geos_geometry(fema) + length(fema_geos) - fema_geos - fema_transects_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) - transects_fema_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) + # fema_geos + transects_fema_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) + fema_transects_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) - fema_transects_matrix + # transects_fema_matrix # get the polygons that have atleast 1 intersection with the 'lines' - fema_transects_matrix[[557]] - fema_tmp <- fema[fema_transects_matrix[[557]], ] - trans_tmp <- transects[557, ] + # transects_fema_matrix[[557]] + # fema_tmp <- fema[transects_fema_matrix[[557]], ] + # trans_tmp <- transects[557, ] - fema_dissolve <- rmapshaper::ms_dissolve(fema_tmp) - fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% + # fema_dissolve <- rmapshaper::ms_dissolve(fema_tmp) + # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% # rmapshaper::ms_dissolve(field = "state") - sf::st_union() - mapview::mapview(trans_tmp, color = "green") + - mapview::mapview(fema_tmp[1, ], col.region = "red") + - mapview::mapview(fema_tmp[2, ], col.region = "dodgerblue") + - mapview::mapview(fema_simple, col.region = "yellow") - transects + + # mapview::mapview(trans_tmp, color = "green") + + # mapview::mapview(fema_tmp[1, ], col.region = "red") + + # mapview::mapview(fema_tmp[2, ], col.region = "dodgerblue") + + # mapview::mapview(fema_simple, col.region = "yellow") + + fema_transects_matrix + trans_fema <- transects[lengths(transects_fema_matrix) != 0, ] + # fema_polygons <- fema[lengths(fema_transects_matrix) != 0, ] + fema_polygons <- fema_geos[lengths(fema_transects_matrix) != 0] - fema_transects_matrix - transects_with_fema <- transects[lengths(fema_transects_matrix) != 0, ] - fema_with_transects <- fema[lengths(transects_fema_matrix) != 0, ] + fema_lines <- + fema_polygons %>% + sf::st_as_sf() %>% + sf::st_cast("MULTILINESTRING") %>% + geos::as_geos_geometry() %>% + geos::geos_simplify_preserve_topology(25) + + # length(fema_geos) + # nrow(fema) + # fema_geos <- geos::as_geos_geometry(fema_polygons) + + tmp_trans <- trans_fema[1:5000, ] + + # tmp_centroid <- sf::st_centroid(tmp_trans) + # sf::st_segmentize() + # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid) + # # tmp_trans + max_extension_distance <- 2500 + + # unlist(tmp_trans$geom) + # unlist(sf::st_segmentize(tmp_trans, 4)$geom) + min_segmentation <- min(tmp_trans$cs_lengthm %/% 2) + + # which.min(tmp_trans$cs_lengthm %/% 2) + + segmented_trans <- sf::st_segmentize(tmp_trans, min_segmentation) + + # unlist(segmented_trans$geom) + unique(lengths(segmented_trans$geom)) + length(lengths(segmented_trans$geom)) + lengths(segmented_trans$geom) + + # left_trans <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50) + # right_trans <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1) + + # mapview::mapview(left_trans, col.regions = "dodgerblue") + + # mapview::mapview(tmp_trans, color = "red") + + # mapview::mapview(tmp_trans[42, ], color = "yellow") + + # mapview::mapview(right_trans, color = "dodgerblue") + + # mapview::mapview(left_trans, color = "green") + + + left_trans <- + segmented_trans %>% + lwgeom::st_linesubstring(0, 0.50) %>% + dplyr::mutate( + partition = "left", + partition_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, geom) + + # Find the distances from the right side of transect lines + right_trans <- + segmented_trans %>% + lwgeom::st_linesubstring(0.50, 1) %>% + dplyr::mutate( + partition = "right", + partition_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, geom) + + # convert the transect geometries to geos types + # get the fema polygon indices for the transect halves that are completely within a fema polygon + # add the fema polygons index as a column to the transect dataframes + left_trans_geos <- geos::as_geos_geometry(left_trans) + right_trans_geos <- geos::as_geos_geometry(right_trans) + + left_within_matrix <- geos::geos_within_matrix(left_trans_geos, fema_polygons) + right_within_matrix <- geos::geos_within_matrix(right_trans_geos, fema_polygons) + + left_within_vect <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) + right_within_vect <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) + + # add the fema polygon indexes as columns + left_trans$left_fema_index <- left_within_vect + right_trans$right_fema_index <- right_within_vect + + # add boolean columns whether the transect is fully within the FEMA polygons + left_trans <- + left_trans %>% + dplyr::mutate( + left_is_within_fema = dplyr::case_when( + !is.na(left_fema_index) ~ TRUE, + TRUE ~ FALSE + ) + ) %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, + left_fema_index, left_is_within_fema, + geom + ) + + right_trans <- + right_trans %>% + dplyr::mutate( + right_is_within_fema = dplyr::case_when( + !is.na(right_fema_index) ~ TRUE, + TRUE ~ FALSE + ) + ) %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, + right_fema_index, right_is_within_fema, + geom + ) + # left_trans + # + # sf::st_within(left_trans, fema_polygons) + # geos::as_geos_geometry(fema_polygons) + # geos::as_geos_geometry(left_trans) + # geos::geos_within(geos::as_geos_geometry(left_trans), geos::as_geos_geometry(fema_polygons)) + + left_trans_geos <- geos::as_geos_geometry(left_trans) + left_within_matrix <- geos::geos_within_matrix(left_trans_geos, fema_polygons) + + left_within_vect <- lapply(left_within_matrix, function(i) { + if(length(i) > 0) { c(i) } else { c(NA) } } + ) + left_trans + + + left_trans <- + left_trans %>% + dplyr::mutate( + left_is_within_fema = dplyr::case_when( + !is.na(left_fema_index) ~ TRUE, + TRUE ~ FALSE + ) + ) %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, + left_fema_index, left_is_within_fema, + geom + ) + left_trans + # left_trans$geom2 <- left_trans_geos + + # left_trans <- + # left_trans %>% + # sf::st_drop_geometry() %>% + # dplyr::mutate( + # geom = geos::as_geos_geometry(left_trans_geos) + # # geom2 = left_trans_geos + # ) + + left_trans_geos + + # ---------------------------------------------------------------------------------------------------------------- + # Loop over every left and right halfs of transects and + # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary + # ---------------------------------------------------------------------------------------------------------------- + + left_ids <- left_trans$tmp_id + + left_fema_indexes <- left_trans$left_fema_index + left_fema_bool <- left_trans$left_is_within_fema + + # preallocate vector that stores the extension. distances + left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids))) + + # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool)) + # 1:length(left_ids) + extension_count = 0 + + for(i in 1:length(left_ids)) { + # i = 1 + tmp_id <- left_ids[i] + is_within_fema_polygon <- left_fema_bool[i] + fema_index <- left_fema_indexes[i] + + message("Transect: '", tmp_id, "' - (", i, ")") + # if(is_within_fema_polygon) { + # break + # } + # fema_index <- left_trans$left_fema_index[i] + # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE) + + if(is_within_fema_polygon) { + + message("- Left side of transect intersects with FEMA") + message("\t > FEMA index: ", fema_index) + extension_count = extension_count + 1 + message("\t > extension_count: ", extension_count) + + trans_geom <- left_trans_geos[i] + index_vect <- sort(unlist(fema_index)) + + # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(fema_lines[index_vect]) + + # fema_lines[index_vect] + + dist_to_fema <- hydrofabric3D:::geos_bs_distance( + distances = 1:5000, + line = trans_geom, + geoms_to_cut = fema_lines[index_vect], + direction = "both" + ) + + left_extension_dists[i] <- dist_to_fema + + } + message() + } + + left_trans_geos + vctrs::vec_c( + vctrs::vec_c( + left_trans$hy_id + ), + vctrs::vec_cast(left_trans$cs_id) + ) + + # + # # mapview::mapview(left_trans[477, ]) + fema_polygons[1888, ] + # left_trans <- + # left_trans %>% + # dplyr::mutate( + # fema_index = unlist(sf::st_within(., fema_polygons)) + # ) %>% + # dplyr::relocate(fema_index) + # + # left_trans_geos <- geos::as_geos_geometry(left_trans) + + sort(na.omit(unlist(unique(left_trans$fema_index)))) + fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] + + fema_polygons %>% na.omit() + + # NOTE: sorting the fema polygon indices (not sure if necessary) + left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))] + + left_fema + + left_fema %>% plot() + + geos::geos_make_linestring(geom = left_fema) + left_trans$fema_index + sort(na.omit(unlist(unique(left_trans$fema_index)))) + + # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] + left_fema_lines <- + left_fema %>% + sf::st_as_sf() %>% + sf::st_cast("MULTILINESTRING") %>% + geos::as_geos_geometry() %>% + geos::geos_simplify_preserve_topology(25) + + left_fema %>% + sf::st_as_sf() %>% + mapview::npts() + + geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% + geos::geos_num_coordinates() %>% + sum() + + geos::geos_num_coordinates(left_fema_lines) %>% sum() + + geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% + .[3] %>% + plot() + left_fema_lines[3] %>% plot() + + + left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { + + hydrofabric3D:::geos_bs_distance( + distances = 1:2000, + line = left_trans_geos[i], + geoms_to_cut = left_fema_lines, + direction = "head" + ) + }) %>% + unlist() + + left_trans$left_head_extension_dist <- left_extension_dists + + # Find the distances from the right side of transect lines + right_trans <- + segmented_trans %>% + lwgeom::st_linesubstring(0.50, 1) %>% + dplyr::mutate( + partition = "right", + partition_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + dplyr::select(hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, geom) + right_trans_geos <- geos::as_geos_geometry(right_trans) + right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons)) + + right_within_vect <- lapply(right_within_matrix, function(i) { + if(length(i) > 0) { c(i) } else { c(NA_real_) } } + ) + right_trans$fema_index <- right_within_vect +# +# right_trans <- +# right_trans %>% +# dplyr::mutate( +# fema_index = unlist(sf::st_within(., fema_polygons)) +# ) %>% +# dplyr::relocate(fema_index) +# + + right_fema <- fema_polygons[unique(right_trans$fema_index),] + right_fema_lines <- + right_fema %>% + sf::st_cast("LINESTRING") + + right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) { + + hydrofabric3D:::geos_bs_distance( + distances = 1:2000, + line = right_trans_geos[i], + geoms_to_cut = right_fema_lines, + direction = "tail" + ) + }) %>% + unlist() + + right_trans$right_tail_extension_dists <- right_extension_dists + right_trans$right_tail_extension_dists + unlist(sf::st_within(left_trans, fema_polygons)) + + left_trans <- + left_trans %>% + dplyr::mutate( + fema_index = unlist(sf::st_within(., fema_polygons)) + ) %>% + dplyr::relocate(fema_index) + + left_fema <- fema_polygons[unique(left_trans$fema_index),] + left_fema_lines <- + left_fema %>% + sf::st_cast("LINESTRING") + + geos::as_geos_geometry(left_fema) + mapview::mapview(left_fema, col.regions = "dodgerblue") + + mapview::mapview(left_ls, color = "yellow") + + mapview::mapview(left_trans, color = "red") + + mapview::mapview(right_trans, color = "green") + + left_trans_geos <- geos::as_geos_geometry(left_trans) + left_trans_geos %>% plot() + left_trans_geos[2] + + length(left_trans_geos) + + left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { + + hydrofabric3D:::geos_bs_distance( + distances = 1:2000, + line = left_trans_geos[i], + geoms_to_cut = left_fema_lines, + direction = "head" + ) + }) %>% + unlist() + + left_extensions <- geos::geos_empty() + + for (i in 1:length(left_extension_dists)) { + dist = left_extension_dists[i] + geos_line <- left_trans_geos[i] + message(glue::glue("i: {i}\ndist: {dist}")) + + extended <- hydrofabric3D::geos_extend_line( + geos_line, + dist, + "head" + ) + left_extensions <- vctrs::vec_c(left_extensions, extended) + + } + # index for only valid transects + # is_valid <- !geos::geos_is_empty(left_extensions) + left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)] + # !geos::geos_is_empty(left_extensions) + + new_left_trans <- + left_trans %>% + sf::st_drop_geometry() %>% + dplyr::mutate( + geom = sf::st_as_sfc(left_extensions) + ) %>% + sf::st_as_sf() + # geos::sf + mapview::mapview(left_fema, col.regions = "dodgerblue") + + mapview::mapview(left_ls, color = "yellow") + + mapview::mapview(left_trans, color = "red") + + mapview::mapview(new_left_trans, color = "green") + mapply(function(geom, dist) { + hydrofabric3D::geos_extend_line(geom, dist, "head") + }, + left_trans_geos, + left_extension_dists + ) + + left_extensions <- lapply(seq_along(left_trans_geos), function(i) { + + extend_dist <- hydrofabric3D:::geos_bs_distance( + distances = 1:2000, + line = left_trans_geos[i], + geoms_to_cut = left_fema_lines, + direction = "head" + ) + + hydrofabric3D::geos_extend_line(left_trans_geos[i], extend_dist, "head") + + }) + # unlist(left_extensions) + + left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { + hydrofabric3D:::geos_bs_distance( + distances = 1:2000, + line = left_trans_geos[i], + geoms_to_cut = left_fema_lines, + direction = "head" + ) + }) + + distance_to_extend <- + hydrofabric3D:::geos_bs_distance( + distances = 1:1500, + line = left_trans_geos[1], + geoms_to_cut = left_fema_lines, + direction = "head" + ) + + extended <- hydrofabric3D::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% + sf::st_as_sf() + mapview::mapview(left_fema, col.regions = "dodgerblue") + + mapview::mapview(left_ls, color = "yellow") + + mapview::mapview(left_trans, color = "red") + + mapview::mapview(extended, color = "green") + # mapview::mapview(right_trans, color = "green") + left_trans$geom %>% sf::st_length() + plot(segmented_trans$geom, col = "red", lwd =5) + plot(left_trans$geom, col = "green", lwd=3, add = TRUE) + plot(right_trans$geom, col = "blue", lwd=3, add = TRUE) + unlist(left_trans$geom) + unlist(right_trans$geom) + + unlist(tmp_trans$geom ) + unlist(split_trans$geom ) + split_trans %>% + sf::st_collection_extract("LINESTRING") + split_trans$geom + + tmp_trans$geom + nngeo::st_segments(tmp_trans) %>% + .$result %>% + plot() + + mapview::mapview(tmp_trans) + tmp_centroid + geos::geos_clip_by_rect() + + transects_with_ lengths(transects_fema_matrix) mapview::mapview(transects_with_fema, color = "green") + fema_with_transects From 75e93bd03259449f3147a15f6350a029273b2740 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 13 May 2024 15:50:00 -0700 Subject: [PATCH 29/64] continued work on generating extended transect lines from fema files --- runners/cs_runner/add_fema_to_transects.R | 702 ++++++++++++++-------- 1 file changed, 445 insertions(+), 257 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 3ca2eb7..d328e89 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -60,255 +60,443 @@ path_df # fema1 <- sf::read_sf(vpu_fema_file1) # fema2 <- sf::read_sf(vpu_fema_filev2) fema <- sf::read_sf(vpu_fema_file) - # fema %>% mapview::npts() - # fema2 %>% mapview::npts() - - fema <- - fema %>% - rmapshaper::ms_simplify(keep = 0.10) - - fema %>% mapview::npts() - transects <- sf::read_sf(transect_path) - transects_geos <- geos::as_geos_geometry(transects) - fema_geos <- geos::as_geos_geometry(fema) - - length(fema_geos) - - # fema_geos - transects_fema_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) - fema_transects_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) - - # transects_fema_matrix - # get the polygons that have atleast 1 intersection with the 'lines' - # transects_fema_matrix[[557]] - # fema_tmp <- fema[transects_fema_matrix[[557]], ] - # trans_tmp <- transects[557, ] - - # fema_dissolve <- rmapshaper::ms_dissolve(fema_tmp) - # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% - # rmapshaper::ms_dissolve(field = "state") - - # mapview::mapview(trans_tmp, color = "green") + - # mapview::mapview(fema_tmp[1, ], col.region = "red") + - # mapview::mapview(fema_tmp[2, ], col.region = "dodgerblue") + - # mapview::mapview(fema_simple, col.region = "yellow") - - fema_transects_matrix - - trans_fema <- transects[lengths(transects_fema_matrix) != 0, ] - # fema_polygons <- fema[lengths(fema_transects_matrix) != 0, ] - fema_polygons <- fema_geos[lengths(fema_transects_matrix) != 0] - - - fema_lines <- - fema_polygons %>% - sf::st_as_sf() %>% - sf::st_cast("MULTILINESTRING") %>% - geos::as_geos_geometry() %>% - geos::geos_simplify_preserve_topology(25) - - # length(fema_geos) - # nrow(fema) - # fema_geos <- geos::as_geos_geometry(fema_polygons) - - tmp_trans <- trans_fema[1:5000, ] - - # tmp_centroid <- sf::st_centroid(tmp_trans) - # sf::st_segmentize() - # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid) - # # tmp_trans - max_extension_distance <- 2500 - - # unlist(tmp_trans$geom) - # unlist(sf::st_segmentize(tmp_trans, 4)$geom) - min_segmentation <- min(tmp_trans$cs_lengthm %/% 2) - - # which.min(tmp_trans$cs_lengthm %/% 2) - - segmented_trans <- sf::st_segmentize(tmp_trans, min_segmentation) - - # unlist(segmented_trans$geom) - unique(lengths(segmented_trans$geom)) - length(lengths(segmented_trans$geom)) - lengths(segmented_trans$geom) - - # left_trans <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50) - # right_trans <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1) - - # mapview::mapview(left_trans, col.regions = "dodgerblue") + - # mapview::mapview(tmp_trans, color = "red") + - # mapview::mapview(tmp_trans[42, ], color = "yellow") + - # mapview::mapview(right_trans, color = "dodgerblue") + - # mapview::mapview(left_trans, color = "green") - - - left_trans <- - segmented_trans %>% - lwgeom::st_linesubstring(0, 0.50) %>% - dplyr::mutate( - partition = "left", - partition_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, geom) - - # Find the distances from the right side of transect lines - right_trans <- - segmented_trans %>% - lwgeom::st_linesubstring(0.50, 1) %>% - dplyr::mutate( - partition = "right", - partition_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, geom) - - # convert the transect geometries to geos types - # get the fema polygon indices for the transect halves that are completely within a fema polygon - # add the fema polygons index as a column to the transect dataframes - left_trans_geos <- geos::as_geos_geometry(left_trans) - right_trans_geos <- geos::as_geos_geometry(right_trans) + # fema %>% mapview::npts() + # fema2 %>% mapview::npts() - left_within_matrix <- geos::geos_within_matrix(left_trans_geos, fema_polygons) - right_within_matrix <- geos::geos_within_matrix(right_trans_geos, fema_polygons) + # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons") + # internal function for extending transect lines out to FEMA 100 year flood plain polygons + # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons) + # polygons, set of sf polygons that transect lines should be exteneded + # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons" + get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, max_extension_distance) { + + # transect_lines <- transects + # polygons <- fema + # max_extension_distance <- 2500 + + # keep 10% of the original points for speed + polygons <- rmapshaper::ms_simplify(polygons, keep = 0.10) + + # fema %>% mapview::npts() + + # transects <- sf::read_sf(transect_path) + + transects_geos <- geos::as_geos_geometry(transects) + polygons_geos <- geos::as_geos_geometry(polygons) - left_within_vect <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) - right_within_vect <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) + # length(polygons_geos) + + # polygons_geos + transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) + polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) + + # transects_polygons_matrix + # get the polygons that have atleast 1 intersection with the 'lines' + # transects_polygons_matrix[[557]] + # fema_tmp <- fema[transects_polygons_matrix[[557]], ] + # trans_tmp <- transects[557, ] + + # fema_dissolve <- rmapshaper::ms_dissolve(fema_tmp) + # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% + # rmapshaper::ms_dissolve(field = "state") - # add the fema polygon indexes as columns - left_trans$left_fema_index <- left_within_vect - right_trans$right_fema_index <- right_within_vect + # mapview::mapview(trans_tmp, color = "green") + + # mapview::mapview(fema_tmp[1, ], col.region = "red") + + # mapview::mapview(fema_tmp[2, ], col.region = "dodgerblue") + + # mapview::mapview(fema_simple, col.region = "yellow") - # add boolean columns whether the transect is fully within the FEMA polygons - left_trans <- - left_trans %>% - dplyr::mutate( - left_is_within_fema = dplyr::case_when( - !is.na(left_fema_index) ~ TRUE, - TRUE ~ FALSE - ) - ) %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, - left_fema_index, left_is_within_fema, - geom - ) + # polygons_transects_matrix + + # subset the transects and polygons to only those with intersections + intersect_transects <- transects[lengths(transects_polygons_matrix) != 0, ] + intersect_polygons <- polygons_geos[lengths(polygons_transects_matrix) != 0] + + # Convert our intersecting polygons to LINESTRINGS b/c we DON'T NEED polygons to calculate extension distances from our transect lines + # This can be done with just linestrings (not sure if this is actually more performent but I'm pretty sure it is....) + intersect_lines <- + intersect_polygons %>% + sf::st_as_sf() %>% + sf::st_cast("MULTILINESTRING") %>% + geos::as_geos_geometry() %>% + geos::geos_simplify_preserve_topology(25) + + # length(polygons_geos) + # nrow(fema) + # polygons_geos <- geos::as_geos_geometry(intersect_polygons) + + # tmp_trans <- intersect_transects[1:5000, ] + + # tmp_centroid <- sf::st_centroid(tmp_trans) + # sf::st_segmentize() + # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid) + # # tmp_trans + # max_extension_distance <- 2500 + + # unlist(tmp_trans$geom) + # unlist(sf::st_segmentize(tmp_trans, 4)$geom) + + # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?) + # TODO: Double check this logic. + min_segmentation <- min(intersect_transects$cs_lengthm %/% 2) + + # which.min(intersect_transects$cs_lengthm %/% 2) + + # make each transect line have way more segments so we can take a left and right half of each transect line + segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation) + + # unlist(segmented_trans$geom) + unique(lengths(segmented_trans$geom)) + length(lengths(segmented_trans$geom)) + lengths(segmented_trans$geom) + + # left_trans <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50) + # right_trans <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1) + + # mapview::mapview(left_trans, col.regions = "dodgerblue") + + # mapview::mapview(intersect_transects, color = "red") + + # mapview::mapview(intersect_transects[42, ], color = "yellow") + + # mapview::mapview(right_trans, color = "dodgerblue") + + # mapview::mapview(left_trans, color = "green") + + # Seperate the transect lines into LEFT and RIGHT halves + # We do this so we can check if a side of a transect is ENTIRELY WITHIN a polygon. + # If the half is entirely within a polygon, + left_trans <- + segmented_trans %>% + lwgeom::st_linesubstring(0, 0.50) %>% + dplyr::mutate( + partition = "left", + partition_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, geom) + + # Find the distances from the right side of transect lines + right_trans <- + segmented_trans %>% + lwgeom::st_linesubstring(0.50, 1) %>% + dplyr::mutate( + partition = "right", + partition_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, geom) - right_trans <- - right_trans %>% - dplyr::mutate( - right_is_within_fema = dplyr::case_when( - !is.na(right_fema_index) ~ TRUE, - TRUE ~ FALSE + # convert the transect geometries to geos types + # get the fema polygon indices for the transect halves that are completely within a fema polygon + # add the fema polygons index as a column to the transect dataframes + left_trans_geos <- geos::as_geos_geometry(left_trans) + right_trans_geos <- geos::as_geos_geometry(right_trans) + + left_within_matrix <- geos::geos_within_matrix(left_trans_geos, intersect_polygons) + right_within_matrix <- geos::geos_within_matrix(right_trans_geos, intersect_polygons) + + left_within_vect <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) + right_within_vect <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) + + # add the fema polygon indexes as columns + left_trans$left_fema_index <- left_within_vect + right_trans$right_fema_index <- right_within_vect + + # add boolean columns whether the transect is fully within the FEMA polygons + left_trans <- + left_trans %>% + dplyr::mutate( + left_is_within_fema = dplyr::case_when( + !is.na(left_fema_index) ~ TRUE, + TRUE ~ FALSE + ) + ) %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, + left_fema_index, left_is_within_fema, + geom ) - ) %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, - right_fema_index, right_is_within_fema, - geom - ) - # left_trans - # - # sf::st_within(left_trans, fema_polygons) - # geos::as_geos_geometry(fema_polygons) - # geos::as_geos_geometry(left_trans) - # geos::geos_within(geos::as_geos_geometry(left_trans), geos::as_geos_geometry(fema_polygons)) - - left_trans_geos <- geos::as_geos_geometry(left_trans) - left_within_matrix <- geos::geos_within_matrix(left_trans_geos, fema_polygons) - - left_within_vect <- lapply(left_within_matrix, function(i) { - if(length(i) > 0) { c(i) } else { c(NA) } } + + right_trans <- + right_trans %>% + dplyr::mutate( + right_is_within_fema = dplyr::case_when( + !is.na(right_fema_index) ~ TRUE, + TRUE ~ FALSE + ) + ) %>% + dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, + cs_lengthm, is_extended, partition, partition_lengthm, + right_fema_index, right_is_within_fema, + geom + ) + + left_distances <- calc_extension_distances( + geos_geoms = left_trans_geos, + ids = left_trans$tmp_id, + lines_to_cut = intersect_lines, + lines_to_cut_indices = left_trans$left_fema_index, + direction = "head", + max_extension_distance = max_extension_distance + ) + + right_distances <- calc_extension_distances( + geos_geoms = right_trans_geos, + ids = right_trans$tmp_id, + lines_to_cut = intersect_lines, + lines_to_cut_indices = right_trans$right_fema_index, + direction = "tail", + max_extension_distance = max_extension_distance + ) + + left_trans$left_distance <- left_distances + right_trans$right_distance <- right_distances + + extensions_by_id <- dplyr::left_join( + sf::st_drop_geometry( + dplyr::select(left_trans, + hy_id, cs_id, left_distance) + ), + sf::st_drop_geometry( + dplyr::select(right_trans, + hy_id, cs_id, + right_distance) + ), + by = c("hy_id", "cs_id") ) - left_trans - - - left_trans <- - left_trans %>% - dplyr::mutate( - left_is_within_fema = dplyr::case_when( - !is.na(left_fema_index) ~ TRUE, - TRUE ~ FALSE + transects_with_distances <- + transects %>% + dplyr::left_join( + extensions_by_id, + by = c("hy_id", "cs_id") + ) %>% + hydrofabric3D::add_tmp_id() + + fline_id_array <- flines$id + + # Convert the net object into a geos_geometry + flines_geos <- geos::as_geos_geometry(flines) + + transect_hy_id_array <- transects_with_distances$hy_id + transect_cs_id_array <- transects_with_distances$cs_id + + transect_geoms <- geos::as_geos_geometry(transects_with_distances$geom) + + left_distances <- transects_with_distances$left_distance + right_distances <- transects_with_distances$right_distance + + # preallocate vector that stores the extension. distances + new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_ids))) + # new_transects <- geos::geos_empty() + # measures <- vctrs::vec_c() + + for (i in seq_along(transect_ids)) { + message("i: ", i) + i = 1 + + current_trans <- transect_geoms[i] + + current_hy_id <- transect_hy_id_array[i] + current_cs_id <- transect_cs_id_array[i] + + current_fline <- flines_geos[fline_id_array == current_hy_id] + + # current_fline <- + + + left_distance_to_extend <- left_distances[i] + left_distance_to_extend + right_distance_to_extend <- right_distances[i] + right_distance_to_extend + + no_extension_required <- (left_distance_to_extend == 0 && right_distance_to_extend == 0) + message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")") + + if(no_extension_required) { + message("Skipping -left/right extension are both 0") + next + } + + trans + left_trans + curr_tmp_id + + curr_fema_index <- + left_trans %>% + dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% + .$left_fema_index %>% .[[1]] + + left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, left_distance_to_extend, "head") + right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, right_distance_to_extend, "tail") + + # neighbor_transects <- + left_intersects_fline <- geos::geos_intersection( + left_extended_trans, + current_fline ) - ) %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, - left_fema_index, left_is_within_fema, - geom - ) - left_trans - # left_trans$geom2 <- left_trans_geos - - # left_trans <- - # left_trans %>% - # sf::st_drop_geometry() %>% - # dplyr::mutate( - # geom = geos::as_geos_geometry(left_trans_geos) - # # geom2 = left_trans_geos - # ) - - left_trans_geos - - # ---------------------------------------------------------------------------------------------------------------- - # Loop over every left and right halfs of transects and - # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary - # ---------------------------------------------------------------------------------------------------------------- - - left_ids <- left_trans$tmp_id - - left_fema_indexes <- left_trans$left_fema_index - left_fema_bool <- left_trans$left_is_within_fema - - # preallocate vector that stores the extension. distances - left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids))) - - # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool)) - # 1:length(left_ids) - extension_count = 0 - - for(i in 1:length(left_ids)) { - # i = 1 - tmp_id <- left_ids[i] - is_within_fema_polygon <- left_fema_bool[i] - fema_index <- left_fema_indexes[i] - - message("Transect: '", tmp_id, "' - (", i, ")") - # if(is_within_fema_polygon) { - # break - # } - # fema_index <- left_trans$left_fema_index[i] - # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE) - - if(is_within_fema_polygon) { - message("- Left side of transect intersects with FEMA") - message("\t > FEMA index: ", fema_index) - extension_count = extension_count + 1 - message("\t > extension_count: ", extension_count) + left_intersects_fline_once = geos::geos_type(fline_intersects) == "point" + left_intersects_fline_once + + + fline_intersects + geos::geos_type(fline_intersects) == "point" + + geos::geos_intersects(current_trans, ) + + # if( + # geos::geos_type(fline_intersects) + # + # + # ) + mapview::mapview(sf::st_as_sf(trans), color = "green") + + mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red") + + mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red") + + mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + + + + + + } + + + left_trans$left_extension_distance + + length(right_distances) + + + # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within + calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) { + ##### ##### ##### ##### ##### + # geos_geoms <- left_trans_geos + # ids <- left_trans$tmp_id + # lines_to_cut <- intersect_lines + # lines_to_cut_indices <- left_trans$left_fema_index + # direction = "head" + # max_extension_distance = 2500 + # geos_geoms = left_trans_geos + # ids = left_trans$tmp_id + # lines_to_cut = intersect_lines + # lines_to_cut_indices = left_trans$left_fema_index + # direction = "head" + # max_extension_distance = max_extension_distance - trans_geom <- left_trans_geos[i] - index_vect <- sort(unlist(fema_index)) - # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(fema_lines[index_vect]) - # fema_lines[index_vect] + ##### ##### ##### ##### ##### - dist_to_fema <- hydrofabric3D:::geos_bs_distance( - distances = 1:5000, - line = trans_geom, - geoms_to_cut = fema_lines[index_vect], - direction = "both" - ) + if (!direction %in% c("head", "tail")) { + stop("Invalid 'direction' value, must be one of 'head' or 'tail'") + } - left_extension_dists[i] <- dist_to_fema + # preallocate vector that stores the extension. distances + extension_dists <- vctrs::vec_c(rep(0, length(ids))) + + # extension_dists <- vector(mode = "numeric", length = nrow(trans_data)) + for (i in seq_along(ids)) { + # i = 118 + curr_id <- ids[i] + is_within_polygon <- any(!is.na(lines_to_cut_indices[[i]])) + polygon_index <- lines_to_cut_indices[[i]] + # any(is_within_polygon) + message("Transect: '", curr_id, "' - (", i, ")") + + if (is_within_polygon) { + message("- Side of transect intersects with FEMA") + message("\t > FEMA index: ", polygon_index) + + curr_geom <- geos_geoms[[i]] + index_vect <- sort(unlist(polygon_index)) + + distance_to_extend <- hydrofabric3D:::geos_bs_distance( + distances = 1:max_extension_distance, + line = curr_geom, + geoms_to_cut = lines_to_cut[index_vect], + direction = direction + ) + + extension_dists[i] <- distance_to_extend + } + + } + + return(extension_dists) + } + + + # ---------------------------------------------------------------------------------------------------------------- + # Loop over every left and right halfs of transects and + # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary + # ---------------------------------------------------------------------------------------------------------------- + + left_ids <- left_trans$tmp_id + + left_fema_indexes <- left_trans$left_fema_index + left_fema_bool <- left_trans$left_is_within_fema + + # preallocate vector that stores the extension. distances + left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids))) + + # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool)) + # 1:length(left_ids) + extension_count = 0 + + for(i in 1:length(left_ids)) { + # i = 1 + tmp_id <- left_ids[i] + is_within_fema_polygon <- left_fema_bool[i] + fema_index <- left_fema_indexes[i] + + message("Transect: '", tmp_id, "' - (", i, ")") + # if(is_within_fema_polygon) { + # break + # } + # fema_index <- left_trans$left_fema_index[i] + # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE) + if(is_within_fema_polygon) { + + message("- Left side of transect intersects with FEMA") + message("\t > FEMA index: ", fema_index) + extension_count = extension_count + 1 + message("\t > extension_count: ", extension_count) + + trans_geom <- left_trans_geos[i] + index_vect <- sort(unlist(fema_index)) + + # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect]) + + # intersect_lines[index_vect] + + dist_to_fema <- hydrofabric3D:::geos_bs_distance( + distances = 1:max_extension_distance, + line = trans_geom, + geoms_to_cut = intersect_lines[index_vect], + direction = "head" + ) + + left_extension_dists[i] <- dist_to_fema + + } + message() } - message() + + left_trans$left_extension_dist <- left_extension_dists } + tmp <- left_trans[1:5, ] + tmp + tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist) + + tmp_extended + + fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index)) + + mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") + + mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green") + + sf::st_as_sf(intersect_polygons[fema_idx]) + + length(left_extension_dists) left_trans_geos vctrs::vec_c( @@ -319,11 +507,11 @@ path_df ) # - # # mapview::mapview(left_trans[477, ]) + fema_polygons[1888, ] + # # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ] # left_trans <- # left_trans %>% # dplyr::mutate( - # fema_index = unlist(sf::st_within(., fema_polygons)) + # fema_index = unlist(sf::st_within(., intersect_polygons)) # ) %>% # dplyr::relocate(fema_index) # @@ -334,7 +522,7 @@ path_df fema_polygons %>% na.omit() - # NOTE: sorting the fema polygon indices (not sure if necessary) + # note: sorting the fema polygon indices (not sure if necessary) left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))] left_fema @@ -349,7 +537,7 @@ path_df left_fema_lines <- left_fema %>% sf::st_as_sf() %>% - sf::st_cast("MULTILINESTRING") %>% + sf::st_cast("multilinestring") %>% geos::as_geos_geometry() %>% geos::geos_simplify_preserve_topology(25) @@ -371,7 +559,7 @@ path_df left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { - hydrofabric3D:::geos_bs_distance( + hydrofabric3d:::geos_bs_distance( distances = 1:2000, line = left_trans_geos[i], geoms_to_cut = left_fema_lines, @@ -382,7 +570,7 @@ path_df left_trans$left_head_extension_dist <- left_extension_dists - # Find the distances from the right side of transect lines + # find the distances from the right side of transect lines right_trans <- segmented_trans %>% lwgeom::st_linesubstring(0.50, 1) %>% @@ -396,7 +584,7 @@ path_df right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons)) right_within_vect <- lapply(right_within_matrix, function(i) { - if(length(i) > 0) { c(i) } else { c(NA_real_) } } + if(length(i) > 0) { c(i) } else { c(na_real_) } } ) right_trans$fema_index <- right_within_vect # @@ -411,11 +599,11 @@ path_df right_fema <- fema_polygons[unique(right_trans$fema_index),] right_fema_lines <- right_fema %>% - sf::st_cast("LINESTRING") + sf::st_cast("linestring") right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) { - hydrofabric3D:::geos_bs_distance( + hydrofabric3d:::geos_bs_distance( distances = 1:2000, line = right_trans_geos[i], geoms_to_cut = right_fema_lines, @@ -438,7 +626,7 @@ path_df left_fema <- fema_polygons[unique(left_trans$fema_index),] left_fema_lines <- left_fema %>% - sf::st_cast("LINESTRING") + sf::st_cast("linestring") geos::as_geos_geometry(left_fema) mapview::mapview(left_fema, col.regions = "dodgerblue") + @@ -454,7 +642,7 @@ path_df left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { - hydrofabric3D:::geos_bs_distance( + hydrofabric3d:::geos_bs_distance( distances = 1:2000, line = left_trans_geos[i], geoms_to_cut = left_fema_lines, @@ -470,7 +658,7 @@ path_df geos_line <- left_trans_geos[i] message(glue::glue("i: {i}\ndist: {dist}")) - extended <- hydrofabric3D::geos_extend_line( + extended <- hydrofabric3d::geos_extend_line( geos_line, dist, "head" @@ -496,7 +684,7 @@ path_df mapview::mapview(left_trans, color = "red") + mapview::mapview(new_left_trans, color = "green") mapply(function(geom, dist) { - hydrofabric3D::geos_extend_line(geom, dist, "head") + hydrofabric3d::geos_extend_line(geom, dist, "head") }, left_trans_geos, left_extension_dists @@ -504,20 +692,20 @@ path_df left_extensions <- lapply(seq_along(left_trans_geos), function(i) { - extend_dist <- hydrofabric3D:::geos_bs_distance( + extend_dist <- hydrofabric3d:::geos_bs_distance( distances = 1:2000, line = left_trans_geos[i], geoms_to_cut = left_fema_lines, direction = "head" ) - hydrofabric3D::geos_extend_line(left_trans_geos[i], extend_dist, "head") + hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") }) # unlist(left_extensions) left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { - hydrofabric3D:::geos_bs_distance( + hydrofabric3d:::geos_bs_distance( distances = 1:2000, line = left_trans_geos[i], geoms_to_cut = left_fema_lines, @@ -526,14 +714,14 @@ path_df }) distance_to_extend <- - hydrofabric3D:::geos_bs_distance( + hydrofabric3d:::geos_bs_distance( distances = 1:1500, line = left_trans_geos[1], geoms_to_cut = left_fema_lines, direction = "head" ) - extended <- hydrofabric3D::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% + extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% sf::st_as_sf() mapview::mapview(left_fema, col.regions = "dodgerblue") + mapview::mapview(left_ls, color = "yellow") + @@ -542,15 +730,15 @@ path_df # mapview::mapview(right_trans, color = "green") left_trans$geom %>% sf::st_length() plot(segmented_trans$geom, col = "red", lwd =5) - plot(left_trans$geom, col = "green", lwd=3, add = TRUE) - plot(right_trans$geom, col = "blue", lwd=3, add = TRUE) + plot(left_trans$geom, col = "green", lwd=3, add = true) + plot(right_trans$geom, col = "blue", lwd=3, add = true) unlist(left_trans$geom) unlist(right_trans$geom) - unlist(tmp_trans$geom ) + unlist(trans_fema$geom ) unlist(split_trans$geom ) split_trans %>% - sf::st_collection_extract("LINESTRING") + sf::st_collection_extract("linestring") split_trans$geom tmp_trans$geom @@ -563,17 +751,17 @@ path_df transects_with_ - lengths(transects_fema_matrix) + lengths(transects_polygons_matrix) mapview::mapview(transects_with_fema, color = "green") + fema_with_transects - unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30] + unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30] transects %>% - hydrofabric3D::add_tmp_id(transects) %>% + hydrofabric3d::add_tmp_id(transects) %>% .$tmp_id %>% unique() %>% .[1:30] trans_subset <- transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30]) + hydrofabric3d::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]) fema_subset <- fema %>% From 6056c857d3276804af715b594543460e77caccc3 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 14 May 2024 10:32:35 -0700 Subject: [PATCH 30/64] first cut of extensions across an entire vpu, added code to determine whether left, right, or both ends need to be extended from --- runners/cs_runner/add_fema_to_transects.R | 398 +++++++++++++++++----- 1 file changed, 315 insertions(+), 83 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index d328e89..32763a6 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -48,22 +48,43 @@ path_df message("creating vpu ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") # message("creating vpu ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") - # read in nextgen data - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - + fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") - fema_vpu_dir vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)] - vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] + + # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] vpu_fema_file - # fema1 <- sf::read_sf(vpu_fema_file1) - # fema2 <- sf::read_sf(vpu_fema_filev2) + + # fema polygons and transect lines fema <- sf::read_sf(vpu_fema_file) + # mapview::npts(fema) + transects <- sf::read_sf(transect_path) - # fema %>% mapview::npts() - # fema2 %>% mapview::npts() + # read in nextgen flowlines data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # Given 2 geos_geometry point geometries, create a line between the 2 points + # start: geos_geoemtry, point + # end: geos_geoemtry, point + # Returns geos_geometry linestring + make_line_from_start_and_end_pts <- function(start, end, line_crs) { + Y_start <- geos::geos_y(start) + X_start <- geos::geos_x(start) + Y_end <- geos::geos_y(end) + X_end <- geos::geos_x(end) + + # make the new transect line from the start and points + geos_ls <- geos::geos_make_linestring(x = c(X_start, X_end), + y = c(Y_start, Y_end), + crs = line_crs) + + return(geos_ls) + + + } # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons") # internal function for extending transect lines out to FEMA 100 year flood plain polygons @@ -72,43 +93,26 @@ path_df # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons" get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, max_extension_distance) { + ### ### ### ### ### ### ### + ### ### ### ### ### ### ### # transect_lines <- transects # polygons <- fema # max_extension_distance <- 2500 + ### ### ### ### ### ### ### + ### ### ### ### ### ### ### # keep 10% of the original points for speed polygons <- rmapshaper::ms_simplify(polygons, keep = 0.10) - - # fema %>% mapview::npts() # transects <- sf::read_sf(transect_path) transects_geos <- geos::as_geos_geometry(transects) polygons_geos <- geos::as_geos_geometry(polygons) - - # length(polygons_geos) - + # polygons_geos transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) - - # transects_polygons_matrix - # get the polygons that have atleast 1 intersection with the 'lines' - # transects_polygons_matrix[[557]] - # fema_tmp <- fema[transects_polygons_matrix[[557]], ] - # trans_tmp <- transects[557, ] - - # fema_dissolve <- rmapshaper::ms_dissolve(fema_tmp) - # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% - # rmapshaper::ms_dissolve(field = "state") - - # mapview::mapview(trans_tmp, color = "green") + - # mapview::mapview(fema_tmp[1, ], col.region = "red") + - # mapview::mapview(fema_tmp[2, ], col.region = "dodgerblue") + - # mapview::mapview(fema_simple, col.region = "yellow") - - # polygons_transects_matrix - + # subset the transects and polygons to only those with intersections intersect_transects <- transects[lengths(transects_polygons_matrix) != 0, ] intersect_polygons <- polygons_geos[lengths(polygons_transects_matrix) != 0] @@ -121,22 +125,7 @@ path_df sf::st_cast("MULTILINESTRING") %>% geos::as_geos_geometry() %>% geos::geos_simplify_preserve_topology(25) - - # length(polygons_geos) - # nrow(fema) - # polygons_geos <- geos::as_geos_geometry(intersect_polygons) - - # tmp_trans <- intersect_transects[1:5000, ] - - # tmp_centroid <- sf::st_centroid(tmp_trans) - # sf::st_segmentize() - # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid) - # # tmp_trans - # max_extension_distance <- 2500 - - # unlist(tmp_trans$geom) - # unlist(sf::st_segmentize(tmp_trans, 4)$geom) - + # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?) # TODO: Double check this logic. min_segmentation <- min(intersect_transects$cs_lengthm %/% 2) @@ -147,12 +136,9 @@ path_df segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation) # unlist(segmented_trans$geom) - unique(lengths(segmented_trans$geom)) - length(lengths(segmented_trans$geom)) - lengths(segmented_trans$geom) - - # left_trans <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50) - # right_trans <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1) + # unique(lengths(segmented_trans$geom)) + # length(lengths(segmented_trans$geom)) + # lengths(segmented_trans$geom) # mapview::mapview(left_trans, col.regions = "dodgerblue") + # mapview::mapview(intersect_transects, color = "red") + @@ -249,9 +235,10 @@ path_df max_extension_distance = max_extension_distance ) - left_trans$left_distance <- left_distances - right_trans$right_distance <- right_distances + left_trans$left_distance <- left_distances + right_trans$right_distance <- right_distances + # distance to extend LEFT and/or RIGHT for each hy_id/cs_id extensions_by_id <- dplyr::left_join( sf::st_drop_geometry( dplyr::select(left_trans, @@ -264,80 +251,323 @@ path_df ), by = c("hy_id", "cs_id") ) + + ######### ######## ######## ######## ####### + # TODO: This is temporary !!!!!! + fema_index_df <- dplyr::left_join( + sf::st_drop_geometry( + dplyr::select(left_trans, + hy_id, cs_id, left_distance, left_fema_index) + ), + sf::st_drop_geometry( + dplyr::select(right_trans, + hy_id, cs_id, + right_distance, right_fema_index) + ), + by = c("hy_id", "cs_id") + ) + ######## ######## ######## ######## ######## + + foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( + fema_id = fema_uids + ) + + polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( + fema_id = fema_uids + ) %>% + dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590)) + merged_polygon <- + polygons_to_merge %>% + sf::st_union() + merged_polygon %>% + rmapshaper::ms_explode() + mapview::mapview(foi, col.regions = "dodgerblue") + + mapview::mapview(polygons_to_merge, col.regions = "yellow") + + mapview::mapview(merged_polygon, col.regions = "green") + + mapview::mapview(toi, color = "red") + + mapview::mapview(og_trans, color = "green") + # polygons %>% + # dplyr::filter(fema_id ) + ######## ######## ######## ######## ######## + + # TODO: Add left/right extension distancces to transect data + # TODO: this can ultimately just be the "transects" variable, dont need to make new "transects_with_distances" variable transects_with_distances <- transects %>% dplyr::left_join( extensions_by_id, by = c("hy_id", "cs_id") ) %>% + dplyr::mutate( + left_distance = dplyr::case_when( + is.na(left_distance) ~ 0, + TRUE ~ left_distance + ), + right_distance = dplyr::case_when( + is.na(right_distance) ~ 0, + TRUE ~ right_distance + ) + # left_distance = dplyr::case_when( + # left_distance == 0 ~ NA, + # TRUE ~ left_distance + # ), + # right_distance = dplyr::case_when( + # right_distance == 0 ~ NA, + # TRUE ~ right_distance + # ) + ) %>% hydrofabric3D::add_tmp_id() - fline_id_array <- flines$id + # transects + # flines %>% + # dplyr::arrange(-tot_drainage_areasqkm) + # flines %>% + # dplyr::arrange(-hydroseq) %>% + # dplyr::filter(hydroseq == min(hydroseq) | hydroseq == max(hydroseq)) %>% + # # dplyr::filter(hydroseq == max(hydroseq)) %>% + # mapview::mapview() + + # extensions_by_id + transects_with_distances %>% + dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id) + + ######## ######## ######## ######## ######## ######## ######## ######## + left_extended_flag[1:20] + right_extended_flag[1:20] + both_extended_flag[1:20] + + range_vect <- 1:500 + + fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index), unlist(fema_index_df[range_vect, ]$right_fema_index))) + fema_uids <- fema_uids[!is.na(fema_uids)] +fema_uids + foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( + fema_id = fema_uids + ) + # ooi <- sf::st_as_sf() + # toi <- sf::st_as_sf(new_transects[1:20]) + toi <- sf::st_as_sf(transect_geoms[range_vect]) + toi + og_trans <- transects_with_distances[range_vect, ] + mapview::mapview(foi, col.regions = "dodgerblue") + + mapview::mapview(toi, color = "red") + + mapview::mapview(og_trans, color = "green") + ######## ######## ### ##### ######## ######## ######## ######## ######## + + left_extended_flag <- rep(FALSE, length(transect_ids)) + right_extended_flag <- rep(FALSE, length(transect_ids)) + both_extended_flag <- rep(FALSE, length(transect_ids)) + + fline_id_array <- flines$id # Convert the net object into a geos_geometry - flines_geos <- geos::as_geos_geometry(flines) + flines_geos <- geos::as_geos_geometry(flines) transect_hy_id_array <- transects_with_distances$hy_id transect_cs_id_array <- transects_with_distances$cs_id - transect_geoms <- geos::as_geos_geometry(transects_with_distances$geom) + transect_geoms <- geos::as_geos_geometry(transects_with_distances$geom) - left_distances <- transects_with_distances$left_distance - right_distances <- transects_with_distances$right_distance + left_distances <- transects_with_distances$left_distance + right_distances <- transects_with_distances$right_distance # preallocate vector that stores the extension. distances new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_ids))) # new_transects <- geos::geos_empty() - # measures <- vctrs::vec_c() + # # measures <- vctrs::vec_c() + # transects_with_distances[1:20, ] + # transects[1:20, ] + + # number of geometries that will be iterated over, keeping this variable to reference in message block + total <- length(transect_ids) + + # output a message every ~10% intervals + message_interval <- total %/% 10 + number_of_skips = 0 for (i in seq_along(transect_ids)) { - message("i: ", i) - i = 1 - + # message("i: ", i) + # if(i > 2000) { + # break + # } + # Check if the iteration is a multiple of 100 + if (i %% message_interval == 0) { + + # get the percent complete + percent_done <- round(i/total, 2) * 100 + + # Print the message every "message_interval" + # if(verbose) { + message(i, " > ", percent_done, "% ") + message("Number of skips: ", number_of_skips) + # } + # message("Iteration ", i, " / ", length(extended_trans), + # " - (", percent_done, "%) ") + + } + # i = 29 + # get the current transect, hy_id, cs_id, flowline, and extension distances current_trans <- transect_geoms[i] current_hy_id <- transect_hy_id_array[i] current_cs_id <- transect_cs_id_array[i] current_fline <- flines_geos[fline_id_array == current_hy_id] - - # current_fline <- - left_distance_to_extend <- left_distances[i] - left_distance_to_extend right_distance_to_extend <- right_distances[i] - right_distance_to_extend no_extension_required <- (left_distance_to_extend == 0 && right_distance_to_extend == 0) - message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")") + # no_extension_required <- is.na(left_distance_to_extend) && is.na(right_distance_to_extend) + # message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")") if(no_extension_required) { - message("Skipping -left/right extension are both 0") + # message("Skipping -left/right extension are both 0") + number_of_skips = number_of_skips + 1 + next } - trans - left_trans - curr_tmp_id - - curr_fema_index <- - left_trans %>% - dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% - .$left_fema_index %>% .[[1]] + +# curr_fema_index <- +# left_trans %>% +# dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% +# .$left_fema_index %>% .[[1]] + # message("Extending transect line left and right") + # extend the lines left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, left_distance_to_extend, "head") right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, right_distance_to_extend, "tail") # neighbor_transects <- + # message("Checking left and right intersections with flowline...") + + # Check that the extended transect lines only intersect the current flowline once left_intersects_fline <- geos::geos_intersection( left_extended_trans, current_fline ) - left_intersects_fline_once = geos::geos_type(fline_intersects) == "point" - left_intersects_fline_once + right_intersects_fline <- geos::geos_intersection( + right_extended_trans, + current_fline + ) + + # Define conditions to decide which version of the transect to use + # 1. Use transect with extension in BOTH directions + # 2. Use transect with LEFT extension only + # 3. Use transect with RIGHT extension only + left_intersects_fline_once <- geos::geos_type(left_intersects_fline) == "point" + right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point" + + # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) + # left_does_not_intersect_other_transects <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) + # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) + # + # use_left_extension <- left_intersects_fline_once && left_does_not_intersect_other_transects + # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects + # use_both_extensions <- use_left_extension && use_right_extension + + + # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read + left_intersects_other_transects <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) + right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) + + # # make sure the extended transects don't hit any of the newly extended transects + # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect + # left_intersects_new_transects <- any(geos::geos_intersects(left_extended_trans, new_transects)) + # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects)) + + # make TRUE/FALSE flags stating which transect should we use + # - BOTH extensions + # - LEFT ONLY extensions + # - RIGHT only extensions + use_left_extension <- left_intersects_fline_once && !left_intersects_other_transects + use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects + use_both_extensions <- use_left_extension && use_right_extension + + # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans) + # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING")) + # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") + + # mapview::mapview(sf::st_as_sf(left_start), col.region = "red") + + # mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + + # mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") + + # mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue") + + # if(use_both_extensions) { + + # Get the start and end of both extended tranects + left_start <- geos::geos_point_start(left_extended_trans) + left_end <- geos::geos_point_end(left_extended_trans) + right_start <- geos::geos_point_start(right_extended_trans) + right_end <- geos::geos_point_end(right_extended_trans) + # } + # Extend in BOTH directions + if(use_both_extensions) { + # message("Extend direction: BOTH") + start <- left_start + end <- right_end + + # extend ONLY the left side + } else if(use_left_extension && !use_right_extension) { + # message("Extend direction: LEFT") + start <- left_start + end <- left_end + + # Extend ONLY the right side + } else if(!use_left_extension && use_right_extension) { + # message("Extend direction: RIGHT") + start <- right_start + end <- right_end + + # DO NOT extend either direction + } else { + # message("No extension") + # TODO: Really dont need to do anything + # TODO: in this scenario because we just use the original transect line + start <- left_end + end <- right_start + } + + # trans_needs_extension <- use_left_extension || use_right_extension + + # if(trans_needs_extension) { + + line_crs <- wk::wk_crs(current_trans) + updated_trans <- make_line_from_start_and_end_pts(start, end, line_crs) + + # } + + if(use_left_extension) { + left_extended_flag[i] <- TRUE + } + + if(use_right_extension) { + right_extended_flag[i] <- TRUE + } + + if(use_both_extensions) { + both_extended_flag[i] <- TRUE + } + + # new_transects[i] <- updated_trans + transect_geoms[i] <- updated_trans + + # start %>% class() + + + } + # make the new transect line from the start and points + final_line <- geos::geos_make_linestring(x = c(X_start, X_end), + y = c(Y_start, Y_end), + crs = wk::wk_crs(current_trans) + ) + geos::geos_make_collection(start, type_id = "LINESTRING") + + + left_start <- geos::geos_point_start(left_extended_trans) + right_end <- geos::geos_point_end(right_extended_trans) fline_intersects geos::geos_type(fline_intersects) == "point" @@ -352,7 +582,9 @@ path_df mapview::mapview(sf::st_as_sf(trans), color = "green") + mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red") + mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red") + - mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + + mapview::mapview( sf::st_as_sf(final_line), color = "yellow") + From 1e511232d32339cc1178d9addb4e2e4b14d65331 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 15 May 2024 16:26:09 -0700 Subject: [PATCH 31/64] changed extensions to check that they only interest a flowline only a single time, makes sure flowlines outside the current flowline are NOT interesting as well --- runners/cs_runner/add_fema_to_transects.R | 210 +++++++++++++++++----- 1 file changed, 165 insertions(+), 45 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 32763a6..7f81f51 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -53,8 +53,8 @@ path_df vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)] - # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] - vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] + # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] vpu_fema_file # fema polygons and transect lines @@ -66,6 +66,28 @@ path_df # read in nextgen flowlines data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + # fema$fema_id %>% unique() %>% length() + + # # union then explode FEMA polygons + # fema <- + # fema %>% + # sf::st_union() + # + # fema <- rmapshaper::ms_explode(fema) + # # fema %>% mapview::npts() + # + # # reassign IDs and change geometry column name + # fema <- + # fema %>% + # sf::st_as_sf() %>% + # dplyr::mutate(fema_id = 1:dplyr::n()) %>% + # dplyr::select(fema_id, geom = x) + + # sf::st_union() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sf() %>% + # dplyr::mutate(fema_id = 1:dplyr::n()) %>% + # dplyr::select(fema_id, geom = x) # Given 2 geos_geometry point geometries, create a line between the 2 points # start: geos_geoemtry, point # end: geos_geoemtry, point @@ -91,24 +113,34 @@ path_df # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons) # polygons, set of sf polygons that transect lines should be exteneded # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons" - get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, max_extension_distance) { + get_transect_extension_distances_to_polygons <- function(transect_lines, + polygons, + flines, + max_extension_distance) { ### ### ### ### ### ### ### ### ### ### ### ### ### ### - # transect_lines <- transects - # polygons <- fema - # max_extension_distance <- 2500 - ### ### ### ### ### ### ### + transect_lines <- transects + polygons <- fema + # # flines <- flines + max_extension_distance <- 3000 + # ### ### ### ### ### ### ### ### ### ### ### ### ### ### # keep 10% of the original points for speed - polygons <- rmapshaper::ms_simplify(polygons, keep = 0.10) + polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10) + mapview::npts(fema) + mapview::npts(polygons) # transects <- sf::read_sf(transect_path) + # polygons transects_geos <- geos::as_geos_geometry(transects) polygons_geos <- geos::as_geos_geometry(polygons) - + + # geos::geos_union( polygons_geos, polygons_geos) %>% length() + # polygons_geos %>% length() + # polygons # polygons_geos transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) @@ -121,11 +153,21 @@ path_df # This can be done with just linestrings (not sure if this is actually more performent but I'm pretty sure it is....) intersect_lines <- intersect_polygons %>% + # geos::geos_make_valid() %>% sf::st_as_sf() %>% + # sf::st_union() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sf() %>% + # dplyr::mutate(fema_id = 1:dplyr::n()) %>% + # dplyr::select(fema_id, geom = x) %>% sf::st_cast("MULTILINESTRING") %>% geos::as_geos_geometry() %>% geos::geos_simplify_preserve_topology(25) - + + # intersect_polygons %>% + # geos::geos_make_valid() %>% + # geos::geos_is_valid() %>% all() + # is.null(intersect_lines$geometry ) # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?) # TODO: Double check this logic. min_segmentation <- min(intersect_transects$cs_lengthm %/% 2) @@ -217,6 +259,13 @@ path_df geom ) + max_extension_distance = 3000 + which(transects_with_distances$hy_id == "wb-1003839") + + left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm + right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm + which(right_trans$hy_id == "wb-1003839") + left_distances <- calc_extension_distances( geos_geoms = left_trans_geos, ids = left_trans$tmp_id, @@ -238,6 +287,8 @@ path_df left_trans$left_distance <- left_distances right_trans$right_distance <- right_distances + extensions_by_id %>% + dplyr::filter(hy_id == "wb-1003839") # distance to extend LEFT and/or RIGHT for each hy_id/cs_id extensions_by_id <- dplyr::left_join( sf::st_drop_geometry( @@ -252,7 +303,7 @@ path_df by = c("hy_id", "cs_id") ) - ######### ######## ######## ######## ####### + ######### ######## ######## ######## ####### # TODO: This is temporary !!!!!! fema_index_df <- dplyr::left_join( sf::st_drop_geometry( @@ -268,24 +319,24 @@ path_df ) ######## ######## ######## ######## ######## - foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( - fema_id = fema_uids - ) - - polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( - fema_id = fema_uids - ) %>% - dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590)) - merged_polygon <- - polygons_to_merge %>% - sf::st_union() - merged_polygon %>% - rmapshaper::ms_explode() - mapview::mapview(foi, col.regions = "dodgerblue") + - mapview::mapview(polygons_to_merge, col.regions = "yellow") + - mapview::mapview(merged_polygon, col.regions = "green") + - mapview::mapview(toi, color = "red") + - mapview::mapview(og_trans, color = "green") + # foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( + # fema_id = fema_uids + # ) + # + # polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( + # fema_id = fema_uids + # ) %>% + # dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590)) + # merged_polygon <- + # polygons_to_merge %>% + # sf::st_union() + # merged_polygon %>% + # rmapshaper::ms_explode() + # mapview::mapview(foi, col.regions = "dodgerblue") + + # mapview::mapview(polygons_to_merge, col.regions = "yellow") + + # mapview::mapview(merged_polygon, col.regions = "green") + + # mapview::mapview(toi, color = "red") + + # mapview::mapview(og_trans, color = "green") # polygons %>% # dplyr::filter(fema_id ) ######## ######## ######## ######## ######## @@ -318,6 +369,8 @@ path_df ) %>% hydrofabric3D::add_tmp_id() + transects_with_distances %>% + dplyr::filter(hy_id == "wb-1003839") # transects # flines %>% # dplyr::arrange(-tot_drainage_areasqkm) @@ -354,10 +407,6 @@ fema_uids mapview::mapview(og_trans, color = "green") ######## ######## ### ##### ######## ######## ######## ######## ######## - left_extended_flag <- rep(FALSE, length(transect_ids)) - right_extended_flag <- rep(FALSE, length(transect_ids)) - both_extended_flag <- rep(FALSE, length(transect_ids)) - fline_id_array <- flines$id # Convert the net object into a geos_geometry @@ -372,24 +421,30 @@ fema_uids right_distances <- transects_with_distances$right_distance # preallocate vector that stores the extension. distances - new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_ids))) + new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_hy_id_array))) + + left_extended_flag <- rep(FALSE, length(transect_hy_id_array)) + right_extended_flag <- rep(FALSE, length(transect_hy_id_array)) + both_extended_flag <- rep(FALSE, length(transect_hy_id_array)) + # new_transects <- geos::geos_empty() # # measures <- vctrs::vec_c() # transects_with_distances[1:20, ] # transects[1:20, ] # number of geometries that will be iterated over, keeping this variable to reference in message block - total <- length(transect_ids) + total <- length(transect_hy_id_array) # output a message every ~10% intervals - message_interval <- total %/% 10 + message_interval <- total %/% 5 number_of_skips = 0 - for (i in seq_along(transect_ids)) { + for (i in seq_along(transect_hy_id_array)) { # message("i: ", i) # if(i > 2000) { # break # } + # i = 1 # Check if the iteration is a multiple of 100 if (i %% message_interval == 0) { @@ -405,7 +460,8 @@ fema_uids # " - (", percent_done, "%) ") } - # i = 29 + # which(transects_with_distances$hy_id == "wb-1003839") + # i = 9587 # get the current transect, hy_id, cs_id, flowline, and extension distances current_trans <- transect_geoms[i] @@ -428,7 +484,9 @@ fema_uids next } - + # transects_with_distances %>% + # dplyr::filter(hy_id == "wb-1003839") + # transects # curr_fema_index <- # left_trans %>% # dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% @@ -445,20 +503,54 @@ fema_uids # Check that the extended transect lines only intersect the current flowline once left_intersects_fline <- geos::geos_intersection( left_extended_trans, - current_fline + # current_fline + flines_geos ) right_intersects_fline <- geos::geos_intersection( right_extended_trans, - current_fline + # current_fline + flines_geos ) + # sum(geos::geos_type(left_intersects_fline) == "point") + # sum(geos::geos_type(right_intersects_fline) == "point") + + + # which(geos::geos_type(left_intersects_fline) == "point") + + # transects %>% + # dplyr::filter(hy_id == current_hy_id) %>% + # sf::st_length() + # current_hy_id + # + # sf::st_as_sf(current_trans) %>% + # sf::st_length() + # tmp <- sf::read_sf(transect_path) + # + # tmp %>% + # dplyr::filter(hy_id == current_hy_id) %>% + # sf::st_length() + + # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) + + # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + + # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + + # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") # Define conditions to decide which version of the transect to use + # 1. Use transect with extension in BOTH directions # 2. Use transect with LEFT extension only # 3. Use transect with RIGHT extension only - left_intersects_fline_once <- geos::geos_type(left_intersects_fline) == "point" - right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point" + + # left_intersects_fline_once <- geos::geos_type(left_intersects_fline) == "point" + # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point" + left_intersects_fline_once <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 + right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 && sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 + + # sum(geos::geos_type(left_intersects_fline) == "point") == 1 + # sum(geos::geos_type(right_intersects_fline) == "point") == 1 + # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 + # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) # left_does_not_intersect_other_transects <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) @@ -558,6 +650,29 @@ fema_uids } + + # Update the "transects_to_extend" with new geos geometries ("geos_list") + sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms)) + + transects$left_is_extended <- left_extended_flag + transects$right_is_extended <- right_extended_flag + transects %>% + dplyr::filter(left_is_extended | right_is_extended) + left_only_extended <- + transects %>% + dplyr::filter(!left_is_extended, right_is_extended) + left_only_flines <- + flines %>% + dplyr::filter(id %in% left_only_extended$hy_id) + # %>% + mapview::mapview(left_only_flines, color = "dodgerblue") + + mapview::mapview(left_only_extended, color = "red") + # transects$cs_lengthm <- length_list + + transects + length(transect_geoms) + + # make the new transect line from the start and points final_line <- geos::geos_make_linestring(x = c(X_start, X_end), y = c(Y_start, Y_end), @@ -613,8 +728,13 @@ fema_uids # lines_to_cut_indices = left_trans$left_fema_index # direction = "head" # max_extension_distance = max_extension_distance - - + # geos_geoms = left_trans_geos + # ids = left_trans$tmp_id + # lines_to_cut = intersect_lines + # lines_to_cut_indices = left_trans$left_fema_index + # direction = "head" + # max_extension_distance = max_extension_distance + # ##### ##### ##### ##### ##### From f9bf310a398a9a9201860103fc14fae1b68e9776 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 16 May 2024 15:31:13 -0700 Subject: [PATCH 32/64] continuing improvement of transect extensions using FEMA --- runners/cs_runner/add_fema_to_transects.R | 110 ++++++++++++++++++++-- 1 file changed, 101 insertions(+), 9 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 7f81f51..27f5163 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -651,22 +651,114 @@ fema_uids } + transects2 <- + transects %>% + dplyr::mutate( + new_cs_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm) + + # Update the "transects_to_extend" with new geos geometries ("geos_list") - sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms)) + sf::st_geometry(transects2) <- sf::st_geometry(sf::st_as_sf(transect_geoms)) + + transects2 <- + transects2 %>% + dplyr::mutate( + new_cs_lengthm = as.numeric(sf::st_length(geom)) + ) %>% + dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm) + + # transects2 %>% + # dplyr::filter( + # new_cs_lengthm > cs_lengthm + # ) + # + + transects2$left_is_extended <- left_extended_flag + transects2$right_is_extended <- right_extended_flag + + transects2 %>% + dplyr::filter(left_is_extended, right_is_extended) + + both_extended <- + transects2 %>% + dplyr::filter(left_is_extended, right_is_extended) + + both_flines <- + flines %>% + dplyr::filter(id %in% both_extended$hy_id) - transects$left_is_extended <- left_extended_flag - transects$right_is_extended <- right_extended_flag - transects %>% - dplyr::filter(left_is_extended | right_is_extended) left_only_extended <- - transects %>% - dplyr::filter(!left_is_extended, right_is_extended) + transects2 %>% + dplyr::filter(left_is_extended, !right_is_extended) + left_only_flines <- flines %>% dplyr::filter(id %in% left_only_extended$hy_id) + + right_only_extended <- + transects2 %>% + dplyr::filter(!left_is_extended, right_is_extended) + + right_only_flines <- + flines %>% + dplyr::filter(id %in% right_only_extended$hy_id) + + # left_fema_polygons <- + # left_trans %>% + + fema_indexes_in_aoi <- + dplyr::bind_rows( + sf::st_drop_geometry( + dplyr::rename(left_trans, fema_index = left_fema_index) + ), + sf::st_drop_geometry( + dplyr::rename(right_trans, + fema_index = right_fema_index) + ) + ) %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) + # dplyr::filter( + # # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id | + # # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id + # + # tmp_id %in% unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id) + # + # ) %>% + # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% + dplyr::slice(1:200) %>% + .$fema_index %>% + unlist() %>% + na.omit() %>% + unique() %>% + sort() + # length() + + polygons[fema_indexes_in_aoi, ] + + extended_for_map <- + both_extended %>% + dplyr::slice(1:5000) + + og_transects_for_map <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) + + # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id + # transects_with_distances # %>% - mapview::mapview(left_only_flines, color = "dodgerblue") + - mapview::mapview(left_only_extended, color = "red") + mapview::mapview(polygons[fema_indexes_in_aoi, ], col.regions = "yellow") + + mapview::mapview(both_flines, color = "dodgerblue") + + mapview::mapview(og_transects_for_map, color = "red") + + mapview::mapview(extended_for_map, color = "green") + + # mapview::mapview(left_only_flines, color = "dodgerblue") + + # mapview::mapview(right_only_flines, color = "dodgerblue") + + # mapview::mapview(left_only_extended, color = "red") + + # mapview::mapview(right_only_extended, color = "green") # transects$cs_lengthm <- length_list transects From 3e5876798aa77b20103f61871c3cce06f2c8d677 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 17 May 2024 13:59:05 -0700 Subject: [PATCH 33/64] starting to cleanup scripts and make it so transects will try to be extended halfway in directions that wouldnt normally be extended due to intersections with flowlines or other transects --- runners/cs_runner/add_fema_to_transects.R | 114 ++++++++++++---------- 1 file changed, 60 insertions(+), 54 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 27f5163..b649aef 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -123,7 +123,8 @@ path_df transect_lines <- transects polygons <- fema # # flines <- flines - max_extension_distance <- 3000 + # max_extension_distance <- 3000 + max_extension_distance = 3500 # ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -131,7 +132,7 @@ path_df polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10) mapview::npts(fema) - mapview::npts(polygons) + mapview::npts(polygons) # transects <- sf::read_sf(transect_path) # polygons @@ -162,7 +163,10 @@ path_df # dplyr::select(fema_id, geom = x) %>% sf::st_cast("MULTILINESTRING") %>% geos::as_geos_geometry() %>% - geos::geos_simplify_preserve_topology(25) + geos::geos_simplify_preserve_topology(20) + + # mapview::npts(sf::st_as_sf(intersect_lines)) + # intersect_polygons %>% # geos::geos_make_valid() %>% @@ -259,12 +263,13 @@ path_df geom ) - max_extension_distance = 3000 - which(transects_with_distances$hy_id == "wb-1003839") + # max_extension_distance = 3000 + # which(transects_with_distances$hy_id == "wb-1003839") left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm - which(right_trans$hy_id == "wb-1003839") + + # which(right_trans$hy_id == "wb-1003839") left_distances <- calc_extension_distances( geos_geoms = left_trans_geos, @@ -289,6 +294,7 @@ path_df extensions_by_id %>% dplyr::filter(hy_id == "wb-1003839") + # distance to extend LEFT and/or RIGHT for each hy_id/cs_id extensions_by_id <- dplyr::left_join( sf::st_drop_geometry( @@ -651,12 +657,11 @@ fema_uids } - transects2 <- - transects %>% - dplyr::mutate( - new_cs_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm) + transects2 <- transects + # dplyr::mutate( + # new_cs_lengthm = as.numeric(sf::st_length(geom)) + # ) %>% + # dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm) # Update the "transects_to_extend" with new geos geometries ("geos_list") @@ -681,33 +686,44 @@ fema_uids transects2 %>% dplyr::filter(left_is_extended, right_is_extended) - both_extended <- + any_extended <- transects2 %>% - dplyr::filter(left_is_extended, right_is_extended) + dplyr::filter(left_is_extended | right_is_extended) - both_flines <- + any_flines <- flines %>% - dplyr::filter(id %in% both_extended$hy_id) + dplyr::filter(id %in% any_extended$hy_id) - left_only_extended <- - transects2 %>% - dplyr::filter(left_is_extended, !right_is_extended) - - left_only_flines <- - flines %>% - dplyr::filter(id %in% left_only_extended$hy_id) - - right_only_extended <- - transects2 %>% - dplyr::filter(!left_is_extended, right_is_extended) - - right_only_flines <- - flines %>% - dplyr::filter(id %in% right_only_extended$hy_id) + # left_only_extended <- + # transects2 %>% + # dplyr::filter(left_is_extended, !right_is_extended) + # + # left_only_flines <- + # flines %>% + # dplyr::filter(id %in% left_only_extended$hy_id) + # + # right_only_extended <- + # transects2 %>% + # dplyr::filter(!left_is_extended, right_is_extended) + # + # right_only_flines <- + # flines %>% + # dplyr::filter(id %in% right_only_extended$hy_id) # left_fema_polygons <- # left_trans %>% - + # ---------------------------------------------------------------- + # ------- Subset data for mapping ----------- + # ---------------------------------------------------------------- + extended_for_map <- + any_extended %>% + dplyr::slice(1:5000) + + og_transects_for_map <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) + fema_indexes_in_aoi <- dplyr::bind_rows( sf::st_drop_geometry( @@ -719,7 +735,7 @@ fema_uids ) ) %>% hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) + dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% # dplyr::filter( # # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id | # # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id @@ -728,42 +744,32 @@ fema_uids # # ) %>% # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% - dplyr::slice(1:200) %>% + # dplyr::slice(1:200) %>% .$fema_index %>% unlist() %>% na.omit() %>% unique() %>% - sort() + sort() # length() - polygons[fema_indexes_in_aoi, ] - - extended_for_map <- - both_extended %>% - dplyr::slice(1:5000) - - og_transects_for_map <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) + sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]) # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id # transects_with_distances # %>% - mapview::mapview(polygons[fema_indexes_in_aoi, ], col.regions = "yellow") + - mapview::mapview(both_flines, color = "dodgerblue") + - mapview::mapview(og_transects_for_map, color = "red") + - mapview::mapview(extended_for_map, color = "green") - + mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]), col.regions = "lightblue") + + mapview::mapview(any_flines, color = "dodgerblue") + + mapview::mapview(og_transects_for_map, color = "green") + + mapview::mapview(extended_for_map, color = "red") + # mapview::mapview(left_only_flines, color = "dodgerblue") + # mapview::mapview(right_only_flines, color = "dodgerblue") + # mapview::mapview(left_only_extended, color = "red") + # mapview::mapview(right_only_extended, color = "green") # transects$cs_lengthm <- length_list - - transects - length(transect_geoms) - + # ---------------------------------------------------------------- + # ---------------------------------------------------------------- + # ---------------------------------------------------------------- # make the new transect line from the start and points final_line <- geos::geos_make_linestring(x = c(X_start, X_end), From 9e4405b62da4de1bfeb5abafa4125bc6ad521de2 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 21 May 2024 12:44:51 -0700 Subject: [PATCH 34/64] added a generic transect line checker that makes sure that a transect line doesn;'t intersect more than one flowline or another transect, working on added in something to try a shorter extension if one of the direction turns out to be unextendable --- runners/cs_runner/add_fema_to_transects.R | 352 +++++++++++++++------- runners/cs_runner/preprocess_fema.R | 74 ++++- 2 files changed, 311 insertions(+), 115 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index b649aef..61a599e 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -50,11 +50,13 @@ path_df fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)] - vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] - # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] + # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] vpu_fema_file # fema polygons and transect lines @@ -65,7 +67,36 @@ path_df # read in nextgen flowlines data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - + # library(nngeo) + # + # fema + # + # fema %>% + # dplyr::group_by(fema_id) %>% + # dplyr::mutate( + # n = dplyr::n() + # ) %>% + # dplyr::arrange(-n) %>% + # dplyr::relocate(n) + # fema_sub <- + # fema %>% + # dplyr::filter(fema_id %in% c(1726)) + # + # # fema %>% + # # dplyr::filter(fema_id %in% c(1726)) %>% + # mapview::mapview(fema_sub[1, ], col.regions = "red") + + # mapview::mapview(sf::st_buffer(fema_sub[2, ], 500), col.regions = "green") + # fema_no_holes <- nngeo::st_remove_holes(fema) + # fema_no_holes_union <- sf::st_union(fema_no_holes) + # + # touching_list = sf::st_touches(fema_no_holes) + # mapview::npts(fema) + # mapview::npts(fema_no_holes) + # mapview::npts(fema_no_holes_union) + # fema_no_holes %>% + # dplyr::mutate(new_fema_id = 1:dplyr::n()) %>% + # dplyr::group_by(new_fema_id) + # fema_no_holes_union # fema$fema_id %>% unique() %>% length() # # union then explode FEMA polygons @@ -106,7 +137,49 @@ path_df return(geos_ls) - } + } + + #' Check if an updated transect line is valid relative to the other transects and flowlines in the network + #' The 'transect_to_check' should be 'used' (i.e. function returns TRUE) if + #' the 'transect_to_check' does NOT interesect any other transects ('transect_lines') AND it only intersects a single flowline ONCE. + #' If the 'transect_to_check' intersects ANY other transects OR intersects a flowline more + #' than once (OR more than one flowline in the network) then the function returns FALSE. + #' @param transect_to_check geos_geometry, linestring + #' @param transect_lines geos_geometry, linestring + #' @param flowlines geos_geometry, linestring + #' + #' @return TRUE if the extension should be used, FALSE if it shouldn't be used + #' @importFrom geos geos_intersection geos_type + is_valid_transect_line <- function(transect_to_check, transect_lines, flowlines) { + + # ### ## ## ## ## ## ## ## ## ## + # extension_line <- left_extended_trans + # transect_lines <- transect_geoms + # flowlines <- flines_geos + # ### ## ## ## ## ## ## ## ## ## + + # Define conditions to decide which version of the transect to use + + # 1. Use transect with extension in BOTH directions + # 2. Use transect with LEFT extension only + # 3. Use transect with RIGHT extension only + + # Check that the extended transect lines only intersect a single flowline in the network only ONCE + intersects_with_flowlines <- geos::geos_intersection( + transect_to_check, + flowlines + ) + intersects_flowline_only_once <- sum(geos::geos_type(intersects_with_flowlines) == "point") == 1 && + sum(geos::geos_type(intersects_with_flowlines) == "multipoint") == 0 + + # check that the extended transect line does NOT intersect other transect lines (other than SELF) + intersects_other_transects <- sum(geos::geos_intersects(transect_to_check, transect_lines)) > 1 + + # TRUE == Only one flowline is intersected a single time AND no other transect lines are intersected + use_transect <- intersects_flowline_only_once && !intersects_other_transects + + return(use_transect) + } # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons") # internal function for extending transect lines out to FEMA 100 year flood plain polygons @@ -163,7 +236,7 @@ path_df # dplyr::select(fema_id, geom = x) %>% sf::st_cast("MULTILINESTRING") %>% geos::as_geos_geometry() %>% - geos::geos_simplify_preserve_topology(20) + geos::geos_simplify_preserve_topology(25) # mapview::npts(sf::st_as_sf(intersect_lines)) @@ -433,6 +506,10 @@ fema_uids right_extended_flag <- rep(FALSE, length(transect_hy_id_array)) both_extended_flag <- rep(FALSE, length(transect_hy_id_array)) + + updated_left_distances <- rep(0, length(transect_hy_id_array)) + updated_right_distances <- rep(0, length(transect_hy_id_array)) + # new_transects <- geos::geos_empty() # # measures <- vctrs::vec_c() # transects_with_distances[1:20, ] @@ -442,12 +519,14 @@ fema_uids total <- length(transect_hy_id_array) # output a message every ~10% intervals - message_interval <- total %/% 5 + message_interval <- total %/% 20 number_of_skips = 0 for (i in seq_along(transect_hy_id_array)) { # message("i: ", i) + # i = 13 # if(i > 2000) { + # message("-----> STOP BECAUSE at i", i) # break # } # i = 1 @@ -490,108 +569,155 @@ fema_uids next } - # transects_with_distances %>% - # dplyr::filter(hy_id == "wb-1003839") - # transects -# curr_fema_index <- -# left_trans %>% -# dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% -# .$left_fema_index %>% .[[1]] - # message("Extending transect line left and right") # extend the lines - left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, left_distance_to_extend, "head") - right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, right_distance_to_extend, "tail") - - # neighbor_transects <- - # message("Checking left and right intersections with flowline...") + left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, + left_distance_to_extend, "head") + right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, + right_distance_to_extend, "tail") - # Check that the extended transect lines only intersect the current flowline once - left_intersects_fline <- geos::geos_intersection( - left_extended_trans, - # current_fline - flines_geos - ) - - right_intersects_fline <- geos::geos_intersection( - right_extended_trans, - # current_fline - flines_geos - ) + # initial check to make sure the extended versions of the transects are valid + use_left_extension <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos) + use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos) + # use_both_extensions <- use_left_extension && use_right_extension - # sum(geos::geos_type(left_intersects_fline) == "point") - # sum(geos::geos_type(right_intersects_fline) == "point") + used_half_of_left <- FALSE + used_half_of_right <- FALSE + # TODO: Probably should precompute this division BEFORE the loop... + half_left_distance <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0) + half_right_distance <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0) - # which(geos::geos_type(left_intersects_fline) == "point") + # if we CAN'T use the original LEFT extension distance, + # we try HALF the distance (or some distane less than we extended by in the first place) + if (!use_left_extension) { + + # half_left_distance <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0) + left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, + half_left_distance, "head") + use_left_extension <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos) + + used_half_of_left <- ifelse(use_left_extension, TRUE, FALSE) + } - # transects %>% - # dplyr::filter(hy_id == current_hy_id) %>% - # sf::st_length() - # current_hy_id + # if we CAN'T use the original RIGHT extension distance, + # we try HALF the distance (or some distance less than we extended by in the first place) + if (!use_right_extension) { + + # half_right_distance <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0) + right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, + half_right_distance, "tail") + use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos) + + used_half_of_right <- ifelse(use_right_extension, TRUE, FALSE) + + # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + + # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + + # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") + + # mapview::mapview(sf::st_as_sf(left_extended_trans2), color = "dodgerblue") + + # mapview::mapview(sf::st_as_sf(right_extended_trans2), color = "dodgerblue") + + } + + use_both_extensions <- use_left_extension && use_right_extension + + # # message("Checking left and right intersections with flowline...") + # # --------------------------------------------------------------------------------- + # # TODO: UNCOMMENT BELOW ---> this was my original method + # # --------------------------------------------------------------------------------- + # # Check that the extended transect lines only intersect the current flowline once + # left_intersects_fline <- geos::geos_intersection( + # left_extended_trans, + # # current_fline + # flines_geos + # ) # - # sf::st_as_sf(current_trans) %>% - # sf::st_length() - # tmp <- sf::read_sf(transect_path) + # right_intersects_fline <- geos::geos_intersection( + # right_extended_trans, + # # current_fline + # flines_geos + # ) # - # tmp %>% - # dplyr::filter(hy_id == current_hy_id) %>% - # sf::st_length() - - # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) + - # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + - # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + - # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") - # Define conditions to decide which version of the transect to use - - # 1. Use transect with extension in BOTH directions - # 2. Use transect with LEFT extension only - # 3. Use transect with RIGHT extension only - - # left_intersects_fline_once <- geos::geos_type(left_intersects_fline) == "point" - # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point" - left_intersects_fline_once <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 - right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 && sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 - - # sum(geos::geos_type(left_intersects_fline) == "point") == 1 - # sum(geos::geos_type(right_intersects_fline) == "point") == 1 - # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 - - - # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) - # left_does_not_intersect_other_transects <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) - # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) # - # use_left_extension <- left_intersects_fline_once && left_does_not_intersect_other_transects - # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects + # # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) + + # # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + + # # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + + # # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") + # + # # Define conditions to decide which version of the transect to use + # + # # 1. Use transect with extension in BOTH directions + # # 2. Use transect with LEFT extension only + # # 3. Use transect with RIGHT extension only + # + # # left_intersects_fline_once <- geos::geos_type(left_intersects_fline) == "point" + # # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point" + # left_intersects_fline_once <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && + # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 + # + # right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 && + # sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 + # + # # sum(geos::geos_type(left_intersects_fline) == "point") == 1 + # # sum(geos::geos_type(right_intersects_fline) == "point") == 1 + # # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 + # + # + # + # # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) + # # left_does_not_intersect_other_transects <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) + # # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) + # # + # # use_left_extension <- left_intersects_fline_once && left_does_not_intersect_other_transects + # # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects + # # use_both_extensions <- use_left_extension && use_right_extension + # + # + # # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read + # left_intersects_other_transects <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) + # right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) + # + # # # make sure the extended transects don't hit any of the newly extended transects + # # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect + # # left_intersects_new_transects <- any(geos::geos_intersects(left_extended_trans, new_transects)) + # # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects)) + # + # # make TRUE/FALSE flags stating which transect should we use + # # - BOTH extensions + # # - LEFT ONLY extensions + # # - RIGHT only extensions + # use_left_extension <- left_intersects_fline_once && !left_intersects_other_transects + # use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects # use_both_extensions <- use_left_extension && use_right_extension - - - # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read - left_intersects_other_transects <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) - right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) - - # # make sure the extended transects don't hit any of the newly extended transects - # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect - # left_intersects_new_transects <- any(geos::geos_intersects(left_extended_trans, new_transects)) - # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects)) - - # make TRUE/FALSE flags stating which transect should we use - # - BOTH extensions - # - LEFT ONLY extensions - # - RIGHT only extensions - use_left_extension <- left_intersects_fline_once && !left_intersects_other_transects - use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects - use_both_extensions <- use_left_extension && use_right_extension - - # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans) - # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING")) - # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") + - # mapview::mapview(sf::st_as_sf(left_start), col.region = "red") + - # mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + - # mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") + - # mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue") - + # + # new_use_left_extension <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos) + # new_use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos) + # new_use_both_extensions <- new_use_left_extension && new_use_right_extension + # + # message("--------------------------------------------") + # message("Left intersects FLINE ONCE: ", left_intersects_fline_once) + # message("Right intersects FLINE ONCE: ", right_intersects_fline_once) + # message() + # message("Left intersects OTHER TRANSECTS: ", left_intersects_other_transects) + # message("Right intersects OTHER TRANSECTS: ", right_intersects_other_transects) + # message() + # message("Use LEFT extension intersects: ", use_left_extension) + # message("Use RIGHT extension intersects: ", use_right_extension) + # message("Use BOTH extension intersects: ", use_both_extensions) + # message() + # message("--------------------------------------------") + # message() + # # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans) + # # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING")) + # # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") + + # # mapview::mapview(sf::st_as_sf(left_start), col.region = "red") + + # # mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + + # # mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") + + # # mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue") + # # --------------------------------------------------------------------------------- + # # TODO: UNCOMMENT ABOVE ---> this was my original method + # # --------------------------------------------------------------------------------- + # # if(use_both_extensions) { # Get the start and end of both extended tranects @@ -635,22 +761,46 @@ fema_uids line_crs <- wk::wk_crs(current_trans) updated_trans <- make_line_from_start_and_end_pts(start, end, line_crs) - # } + # touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ] + # mapview::mapview(touched_flines, color = "dodgerblue") + + # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + + # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + + # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") + + # mapview::mapview(sf::st_as_sf(updated_trans), color = "yellow") + # nrow(flines) + # touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ] + # flines[lengths(right_intersects_fline) == 0, ] + # length(right_intersects_fline) + # } + # --------------------------------------------------- + # TODO: UNCOMMENT BELOW + # --------------------------------------------------- if(use_left_extension) { left_extended_flag[i] <- TRUE } - + if(use_right_extension) { right_extended_flag[i] <- TRUE } - + if(use_both_extensions) { both_extended_flag[i] <- TRUE } + if(used_half_of_left) { + updated_left_distances[i] <- half_left_distance + } + if(used_half_of_right) { + updated_right_distances[i] <- half_right_distance + } + # new_transects[i] <- updated_trans transect_geoms[i] <- updated_trans + + # --------------------------------------------------- + # TODO: UNCOMMENT ABOVE + # --------------------------------------------------- # start %>% class() @@ -717,7 +867,7 @@ fema_uids # ---------------------------------------------------------------- extended_for_map <- any_extended %>% - dplyr::slice(1:5000) + dplyr::slice(1:1000) og_transects_for_map <- transects %>% diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index ba74369..52653d3 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -342,8 +342,8 @@ DELETE_STAGING_GPKGS <- FALSE # FEMA_VPU_SUBFOLDERS -# for (vpu_dir in FEMA_VPU_SUBFOLDERS) { -for (i in 1:4) { +for (vpu_dir in FEMA_VPU_SUBFOLDERS) { +# for (i in 1:4) { vpu_dir = FEMA_VPU_SUBFOLDERS[i] message("Merging files in '", basename(vpu_dir), "' directory...") @@ -407,15 +407,19 @@ for (i in 1:4) { MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged") for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { + # i = 8 + # i vpu_dir = FEMA_VPU_SUBFOLDERS[i] VPU <- basename(vpu_dir) message("Attempting to union FEMA polygons for '", VPU, "'...") + merged_dir <- paste0(vpu_dir, "/merged") fema_vpu_file <- list.files(merged_dir, full.names = TRUE) has_fema_vpu_file <- ifelse(length(fema_vpu_file) > 0, TRUE, FALSE) + # has_fema_vpu_file # message() # fema_vpu_file # } @@ -430,34 +434,76 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)] fema_vpu <- sf::read_sf(fema_vpu_file) - + + # fema_vpu + # fema_ids <- c(695) # fema_vpu <- # fema_vpu %>% # dplyr::group_by(source) %>% # dplyr::summarise() %>% # dplyr::ungroup() + # fema_vpu + + # fema_snapped <- sf::st_snap(fema_vpu, fema_vpu, tolerance = 10) + # sf::st_ + # message("Removing holes before dissolve...") + fema_vpu <- nngeo::st_remove_holes(fema_vpu) + # + # message("Making valid geometries...") + # fema_vpu <- sf::st_make_valid(fema_vpu) + + # fema_vpu <- + # fema_vpu %>% + # sf::st_cast("MULTIPOLYGON") + + message("Dissolving...") # 2633 = old number of polygons - fema_vpu <- rmapshaper::ms_dissolve(fema_vpu, - field = "source", - sys = TRUE, - sys_mem = 16 - ) - fema_vpu <- rmapshaper::ms_explode(fema_vpu, - sys = TRUE, - sys_mem = 16) + fema_vpu <- rmapshaper::ms_dissolve( + input = fema_vpu, + field = "source", + sys = TRUE, + sys_mem = 16 + ) + + message("Exploding...") + # mapview::npts(fema_vpu) + # mapview::npts(fema_vpu_dissolve) + fema_vpu <- rmapshaper::ms_explode( + input = fema_vpu, + sys = TRUE, + sys_mem = 16 + ) + # mapview::npts(fema_exp) + message("Removing holes after explosion...") + fema_vpu <- nngeo::st_remove_holes(fema_vpu) + # mapview::npts(fema_exp_noholes) + + # slice_subset = 1:50 + # fema_exp_noholes[slice_subset, ] + # mapview::mapview( fema_vpu[1:100, ], col.regions = "dodgerblue")+ + # mapview::mapview( fema_exp[slice_subset, ], col.regions = "red") + + # mapview::mapview( fema_exp_noholes[slice_subset, ], col.regions = "green") + # fema_vpu <- rmapshaper::ms_dissolve(fema_vpu, + # field = "source", + # sys = TRUE, + # sys_mem = 16 + # # ) + # fema_vpu <- rmapshaper::ms_explode(fema_vpu, + # sys = TRUE, + # sys_mem = 16) fema_vpu <- fema_vpu %>% - dplyr::group_by(source) %>% + # dplyr::group_by(source) %>% dplyr::mutate( state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), vpu = gsub("VPU_", "", VPU), - fema_id = paste0(state, "_", 1:dplyr::n()) + fema_id = 1:dplyr::n() ) %>% dplyr::ungroup() %>% - dplyr::relocate(vpu, fema_id, source, state, geom) + dplyr::select(vpu, fema_id, state, geom = geometry) if (OVERWRITE_FEMA_FILES) { union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) From 568925b85558ff349ff821283b4ac02da01d56d2 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 21 May 2024 21:18:20 -0700 Subject: [PATCH 35/64] slowly putting all the code into individual functions for extending transects via fema floodplain polygons --- runners/cs_runner/add_fema_to_transects.R | 1164 +++++++++++---------- 1 file changed, 585 insertions(+), 579 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 61a599e..d23b67e 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -67,10 +67,14 @@ path_df # read in nextgen flowlines data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # library(nngeo) - # - # fema + system.time({ + extended_transects <- get_transect_extension_distances_to_polygons(transects, + fema, + flines, + 3000) # + }) + # library(nngeo) # fema %>% # dplyr::group_by(fema_id) %>% # dplyr::mutate( @@ -81,7 +85,6 @@ path_df # fema_sub <- # fema %>% # dplyr::filter(fema_id %in% c(1726)) - # # # fema %>% # # dplyr::filter(fema_id %in% c(1726)) %>% # mapview::mapview(fema_sub[1, ], col.regions = "red") + @@ -180,7 +183,7 @@ path_df return(use_transect) } - + # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons") # internal function for extending transect lines out to FEMA 100 year flood plain polygons # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons) @@ -193,19 +196,19 @@ path_df ### ### ### ### ### ### ### ### ### ### ### ### ### ### - transect_lines <- transects - polygons <- fema - # # flines <- flines - # max_extension_distance <- 3000 - max_extension_distance = 3500 + # transect_lines <- transects + # polygons <- fema + # # # flines <- flines + # # max_extension_distance <- 3000 + # max_extension_distance = 3500 # ### ### ### ### ### ### ### ### ### ### ### ### ### ### # keep 10% of the original points for speed polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10) - mapview::npts(fema) - mapview::npts(polygons) + # mapview::npts(fema) + # mapview::npts(polygons) # transects <- sf::read_sf(transect_path) # polygons @@ -338,10 +341,10 @@ path_df # max_extension_distance = 3000 # which(transects_with_distances$hy_id == "wb-1003839") - - left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm - right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm - + # + # left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm + # right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm + # # which(right_trans$hy_id == "wb-1003839") left_distances <- calc_extension_distances( @@ -365,9 +368,9 @@ path_df left_trans$left_distance <- left_distances right_trans$right_distance <- right_distances - extensions_by_id %>% - dplyr::filter(hy_id == "wb-1003839") - + # extensions_by_id %>% + # dplyr::filter(hy_id == "wb-1003839") + # # distance to extend LEFT and/or RIGHT for each hy_id/cs_id extensions_by_id <- dplyr::left_join( sf::st_drop_geometry( @@ -382,20 +385,20 @@ path_df by = c("hy_id", "cs_id") ) - ######### ######## ######## ######## ####### - # TODO: This is temporary !!!!!! - fema_index_df <- dplyr::left_join( - sf::st_drop_geometry( - dplyr::select(left_trans, - hy_id, cs_id, left_distance, left_fema_index) - ), - sf::st_drop_geometry( - dplyr::select(right_trans, - hy_id, cs_id, - right_distance, right_fema_index) - ), - by = c("hy_id", "cs_id") - ) + # ######### ######## ######## ######## ####### + # # TODO: This is temporary !!!!!! + # fema_index_df <- dplyr::left_join( + # sf::st_drop_geometry( + # dplyr::select(left_trans, + # hy_id, cs_id, left_distance, left_fema_index) + # ), + # sf::st_drop_geometry( + # dplyr::select(right_trans, + # hy_id, cs_id, + # right_distance, right_fema_index) + # ), + # by = c("hy_id", "cs_id") + # ) ######## ######## ######## ######## ######## # foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( @@ -422,7 +425,7 @@ path_df # TODO: Add left/right extension distancces to transect data # TODO: this can ultimately just be the "transects" variable, dont need to make new "transects_with_distances" variable - transects_with_distances <- + transects <- transects %>% dplyr::left_join( extensions_by_id, @@ -448,9 +451,9 @@ path_df ) %>% hydrofabric3D::add_tmp_id() - transects_with_distances %>% - dplyr::filter(hy_id == "wb-1003839") - # transects + # transects_with_distances %>% + # dplyr::filter(hy_id == "wb-1003839") + # # transects # flines %>% # dplyr::arrange(-tot_drainage_areasqkm) # flines %>% @@ -459,31 +462,31 @@ path_df # # dplyr::filter(hydroseq == max(hydroseq)) %>% # mapview::mapview() - # extensions_by_id - transects_with_distances %>% - dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id) - - ######## ######## ######## ######## ######## ######## ######## ######## - left_extended_flag[1:20] - right_extended_flag[1:20] - both_extended_flag[1:20] - - range_vect <- 1:500 - - fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index), unlist(fema_index_df[range_vect, ]$right_fema_index))) - fema_uids <- fema_uids[!is.na(fema_uids)] -fema_uids - foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( - fema_id = fema_uids - ) - # ooi <- sf::st_as_sf() - # toi <- sf::st_as_sf(new_transects[1:20]) - toi <- sf::st_as_sf(transect_geoms[range_vect]) - toi - og_trans <- transects_with_distances[range_vect, ] - mapview::mapview(foi, col.regions = "dodgerblue") + - mapview::mapview(toi, color = "red") + - mapview::mapview(og_trans, color = "green") + # # extensions_by_id + # transects_with_distances %>% + # dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id) + +# ######## ######## ######## ######## ######## ######## ######## ######## +# left_extended_flag[1:20] +# right_extended_flag[1:20] +# both_extended_flag[1:20] +# +# range_vect <- 1:500 +# +# fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index), unlist(fema_index_df[range_vect, ]$right_fema_index))) +# fema_uids <- fema_uids[!is.na(fema_uids)] +# fema_uids +# foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( +# fema_id = fema_uids +# ) +# # ooi <- sf::st_as_sf() +# # toi <- sf::st_as_sf(new_transects[1:20]) +# toi <- sf::st_as_sf(transect_geoms[range_vect]) +# toi +# og_trans <- transects_with_distances[range_vect, ] +# mapview::mapview(foi, col.regions = "dodgerblue") + +# mapview::mapview(toi, color = "red") + +# mapview::mapview(og_trans, color = "green") ######## ######## ### ##### ######## ######## ######## ######## ######## fline_id_array <- flines$id @@ -491,13 +494,13 @@ fema_uids # Convert the net object into a geos_geometry flines_geos <- geos::as_geos_geometry(flines) - transect_hy_id_array <- transects_with_distances$hy_id - transect_cs_id_array <- transects_with_distances$cs_id + transect_hy_id_array <- transects$hy_id + transect_cs_id_array <- transects$cs_id - transect_geoms <- geos::as_geos_geometry(transects_with_distances$geom) + transect_geoms <- geos::as_geos_geometry(transects$geom) - left_distances <- transects_with_distances$left_distance - right_distances <- transects_with_distances$right_distance + left_distances <- transects$left_distance + right_distances <- transects$right_distance # preallocate vector that stores the extension. distances new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_hy_id_array))) @@ -807,7 +810,7 @@ fema_uids } - transects2 <- transects + # transects2 <- transects # dplyr::mutate( # new_cs_lengthm = as.numeric(sf::st_length(geom)) # ) %>% @@ -815,10 +818,10 @@ fema_uids # Update the "transects_to_extend" with new geos geometries ("geos_list") - sf::st_geometry(transects2) <- sf::st_geometry(sf::st_as_sf(transect_geoms)) + sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms)) - transects2 <- - transects2 %>% + transects <- + transects %>% dplyr::mutate( new_cs_lengthm = as.numeric(sf::st_length(geom)) ) %>% @@ -830,138 +833,141 @@ fema_uids # ) # - transects2$left_is_extended <- left_extended_flag - transects2$right_is_extended <- right_extended_flag - - transects2 %>% - dplyr::filter(left_is_extended, right_is_extended) - - any_extended <- - transects2 %>% - dplyr::filter(left_is_extended | right_is_extended) - - any_flines <- - flines %>% - dplyr::filter(id %in% any_extended$hy_id) - - # left_only_extended <- - # transects2 %>% - # dplyr::filter(left_is_extended, !right_is_extended) - # - # left_only_flines <- - # flines %>% - # dplyr::filter(id %in% left_only_extended$hy_id) - # - # right_only_extended <- - # transects2 %>% - # dplyr::filter(!left_is_extended, right_is_extended) - # - # right_only_flines <- - # flines %>% - # dplyr::filter(id %in% right_only_extended$hy_id) - - # left_fema_polygons <- - # left_trans %>% - # ---------------------------------------------------------------- - # ------- Subset data for mapping ----------- - # ---------------------------------------------------------------- - extended_for_map <- - any_extended %>% - dplyr::slice(1:1000) - - og_transects_for_map <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) - - fema_indexes_in_aoi <- - dplyr::bind_rows( - sf::st_drop_geometry( - dplyr::rename(left_trans, fema_index = left_fema_index) - ), - sf::st_drop_geometry( - dplyr::rename(right_trans, - fema_index = right_fema_index) - ) - ) %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% - # dplyr::filter( - # # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id | - # # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id - # - # tmp_id %in% unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id) - # - # ) %>% - # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% - # dplyr::slice(1:200) %>% - .$fema_index %>% - unlist() %>% - na.omit() %>% - unique() %>% - sort() - # length() - - sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]) - - # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id - # transects_with_distances - # %>% - mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]), col.regions = "lightblue") + - mapview::mapview(any_flines, color = "dodgerblue") + - mapview::mapview(og_transects_for_map, color = "green") + - mapview::mapview(extended_for_map, color = "red") - - # mapview::mapview(left_only_flines, color = "dodgerblue") + - # mapview::mapview(right_only_flines, color = "dodgerblue") + - # mapview::mapview(left_only_extended, color = "red") + - # mapview::mapview(right_only_extended, color = "green") - # transects$cs_lengthm <- length_list - # ---------------------------------------------------------------- - # ---------------------------------------------------------------- - # ---------------------------------------------------------------- - - # make the new transect line from the start and points - final_line <- geos::geos_make_linestring(x = c(X_start, X_end), - y = c(Y_start, Y_end), - crs = wk::wk_crs(current_trans) - ) - geos::geos_make_collection(start, type_id = "LINESTRING") - - - left_start <- geos::geos_point_start(left_extended_trans) - right_end <- geos::geos_point_end(right_extended_trans) - - fline_intersects - geos::geos_type(fline_intersects) == "point" - - geos::geos_intersects(current_trans, ) - - # if( - # geos::geos_type(fline_intersects) - # - # - # ) - mapview::mapview(sf::st_as_sf(trans), color = "green") + - mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red") + - mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red") + - mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + - mapview::mapview( sf::st_as_sf(final_line), color = "yellow") - - - - - - - } - - - left_trans$left_extension_distance + transects$left_is_extended <- left_extended_flag + transects$right_is_extended <- right_extended_flag - length(right_distances) + return(transects) - - # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within + } + # transects2 %>% + # dplyr::filter(left_is_extended, right_is_extended) +# +# any_extended <- +# transects2 %>% +# dplyr::filter(left_is_extended | right_is_extended) +# +# any_flines <- +# flines %>% +# dplyr::filter(id %in% any_extended$hy_id) +# +# # left_only_extended <- +# # transects2 %>% +# # dplyr::filter(left_is_extended, !right_is_extended) +# # +# # left_only_flines <- +# # flines %>% +# # dplyr::filter(id %in% left_only_extended$hy_id) +# # +# # right_only_extended <- +# # transects2 %>% +# # dplyr::filter(!left_is_extended, right_is_extended) +# # +# # right_only_flines <- +# # flines %>% +# # dplyr::filter(id %in% right_only_extended$hy_id) +# +# # left_fema_polygons <- +# # left_trans %>% +# # ---------------------------------------------------------------- +# # ------- Subset data for mapping ----------- +# # ---------------------------------------------------------------- +# extended_for_map <- +# any_extended %>% +# dplyr::slice(1:1000) +# +# og_transects_for_map <- +# transects %>% +# hydrofabric3D::add_tmp_id() %>% +# dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) +# +# fema_indexes_in_aoi <- +# dplyr::bind_rows( +# sf::st_drop_geometry( +# dplyr::rename(left_trans, fema_index = left_fema_index) +# ), +# sf::st_drop_geometry( +# dplyr::rename(right_trans, +# fema_index = right_fema_index) +# ) +# ) %>% +# hydrofabric3D::add_tmp_id() %>% +# dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% +# # dplyr::filter( +# # # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id | +# # # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id +# # +# # tmp_id %in% unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id) +# # +# # ) %>% +# # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% +# # dplyr::slice(1:200) %>% +# .$fema_index %>% +# unlist() %>% +# na.omit() %>% +# unique() %>% +# sort() +# # length() +# +# sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]) +# +# # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id +# # transects_with_distances +# # %>% +# mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]), col.regions = "lightblue") + +# mapview::mapview(any_flines, color = "dodgerblue") + +# mapview::mapview(og_transects_for_map, color = "green") + +# mapview::mapview(extended_for_map, color = "red") +# +# # mapview::mapview(left_only_flines, color = "dodgerblue") + +# # mapview::mapview(right_only_flines, color = "dodgerblue") + +# # mapview::mapview(left_only_extended, color = "red") + +# # mapview::mapview(right_only_extended, color = "green") +# # transects$cs_lengthm <- length_list +# # ---------------------------------------------------------------- +# # ---------------------------------------------------------------- +# # ---------------------------------------------------------------- +# +# # make the new transect line from the start and points +# final_line <- geos::geos_make_linestring(x = c(X_start, X_end), +# y = c(Y_start, Y_end), +# crs = wk::wk_crs(current_trans) +# ) +# geos::geos_make_collection(start, type_id = "LINESTRING") +# +# +# left_start <- geos::geos_point_start(left_extended_trans) +# right_end <- geos::geos_point_end(right_extended_trans) +# +# fline_intersects +# geos::geos_type(fline_intersects) == "point" +# +# geos::geos_intersects(current_trans, ) +# +# # if( +# # geos::geos_type(fline_intersects) +# # +# # +# # ) +# mapview::mapview(sf::st_as_sf(trans), color = "green") + +# mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red") + +# mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red") + +# mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + +# mapview::mapview( sf::st_as_sf(final_line), color = "yellow") +# +# +# +# +# +# +# } +# +# +# left_trans$left_extension_distance +# +# length(right_distances) +# +# +# # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) { ##### ##### ##### ##### ##### # geos_geoms <- left_trans_geos @@ -982,14 +988,14 @@ fema_uids # lines_to_cut_indices = left_trans$left_fema_index # direction = "head" # max_extension_distance = max_extension_distance - # - + # + ##### ##### ##### ##### ##### - + if (!direction %in% c("head", "tail")) { stop("Invalid 'direction' value, must be one of 'head' or 'tail'") } - + # preallocate vector that stores the extension. distances extension_dists <- vctrs::vec_c(rep(0, length(ids))) @@ -1001,400 +1007,400 @@ fema_uids polygon_index <- lines_to_cut_indices[[i]] # any(is_within_polygon) message("Transect: '", curr_id, "' - (", i, ")") - + if (is_within_polygon) { message("- Side of transect intersects with FEMA") message("\t > FEMA index: ", polygon_index) - + curr_geom <- geos_geoms[[i]] index_vect <- sort(unlist(polygon_index)) - + distance_to_extend <- hydrofabric3D:::geos_bs_distance( distances = 1:max_extension_distance, line = curr_geom, geoms_to_cut = lines_to_cut[index_vect], direction = direction ) - + extension_dists[i] <- distance_to_extend } - + } - + return(extension_dists) } - - - # ---------------------------------------------------------------------------------------------------------------- - # Loop over every left and right halfs of transects and - # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary - # ---------------------------------------------------------------------------------------------------------------- - - left_ids <- left_trans$tmp_id - - left_fema_indexes <- left_trans$left_fema_index - left_fema_bool <- left_trans$left_is_within_fema - - # preallocate vector that stores the extension. distances - left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids))) - - # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool)) - # 1:length(left_ids) - extension_count = 0 - - for(i in 1:length(left_ids)) { - # i = 1 - tmp_id <- left_ids[i] - is_within_fema_polygon <- left_fema_bool[i] - fema_index <- left_fema_indexes[i] - - message("Transect: '", tmp_id, "' - (", i, ")") - # if(is_within_fema_polygon) { - # break - # } - # fema_index <- left_trans$left_fema_index[i] - # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE) - - if(is_within_fema_polygon) { - - message("- Left side of transect intersects with FEMA") - message("\t > FEMA index: ", fema_index) - extension_count = extension_count + 1 - message("\t > extension_count: ", extension_count) - - trans_geom <- left_trans_geos[i] - index_vect <- sort(unlist(fema_index)) - - # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect]) - - # intersect_lines[index_vect] - - dist_to_fema <- hydrofabric3D:::geos_bs_distance( - distances = 1:max_extension_distance, - line = trans_geom, - geoms_to_cut = intersect_lines[index_vect], - direction = "head" - ) - - left_extension_dists[i] <- dist_to_fema - - } - message() - } - - left_trans$left_extension_dist <- left_extension_dists - } - tmp <- left_trans[1:5, ] - tmp - tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist) - - tmp_extended - - fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index)) - - mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") + - mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green") + - sf::st_as_sf(intersect_polygons[fema_idx]) - - length(left_extension_dists) - - left_trans_geos - vctrs::vec_c( - vctrs::vec_c( - left_trans$hy_id - ), - vctrs::vec_cast(left_trans$cs_id) - ) - - # - # # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ] - # left_trans <- - # left_trans %>% - # dplyr::mutate( - # fema_index = unlist(sf::st_within(., intersect_polygons)) - # ) %>% - # dplyr::relocate(fema_index) - # - # left_trans_geos <- geos::as_geos_geometry(left_trans) - - sort(na.omit(unlist(unique(left_trans$fema_index)))) - fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] - - fema_polygons %>% na.omit() - - # note: sorting the fema polygon indices (not sure if necessary) - left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))] - - left_fema - - left_fema %>% plot() - - geos::geos_make_linestring(geom = left_fema) - left_trans$fema_index - sort(na.omit(unlist(unique(left_trans$fema_index)))) - - # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] - left_fema_lines <- - left_fema %>% - sf::st_as_sf() %>% - sf::st_cast("multilinestring") %>% - geos::as_geos_geometry() %>% - geos::geos_simplify_preserve_topology(25) - - left_fema %>% - sf::st_as_sf() %>% - mapview::npts() - - geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% - geos::geos_num_coordinates() %>% - sum() - - geos::geos_num_coordinates(left_fema_lines) %>% sum() - - geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% - .[3] %>% - plot() - left_fema_lines[3] %>% plot() - - - left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { - - hydrofabric3d:::geos_bs_distance( - distances = 1:2000, - line = left_trans_geos[i], - geoms_to_cut = left_fema_lines, - direction = "head" - ) - }) %>% - unlist() - - left_trans$left_head_extension_dist <- left_extension_dists - - # find the distances from the right side of transect lines - right_trans <- - segmented_trans %>% - lwgeom::st_linesubstring(0.50, 1) %>% - dplyr::mutate( - partition = "right", - partition_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - dplyr::select(hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, geom) - right_trans_geos <- geos::as_geos_geometry(right_trans) - right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons)) - - right_within_vect <- lapply(right_within_matrix, function(i) { - if(length(i) > 0) { c(i) } else { c(na_real_) } } - ) - right_trans$fema_index <- right_within_vect -# -# right_trans <- -# right_trans %>% + +# +# # ---------------------------------------------------------------------------------------------------------------- +# # Loop over every left and right halfs of transects and +# # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary +# # ---------------------------------------------------------------------------------------------------------------- +# +# left_ids <- left_trans$tmp_id +# +# left_fema_indexes <- left_trans$left_fema_index +# left_fema_bool <- left_trans$left_is_within_fema +# +# # preallocate vector that stores the extension. distances +# left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids))) +# +# # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool)) +# # 1:length(left_ids) +# extension_count = 0 +# +# for(i in 1:length(left_ids)) { +# # i = 1 +# tmp_id <- left_ids[i] +# is_within_fema_polygon <- left_fema_bool[i] +# fema_index <- left_fema_indexes[i] +# +# message("Transect: '", tmp_id, "' - (", i, ")") +# # if(is_within_fema_polygon) { +# # break +# # } +# # fema_index <- left_trans$left_fema_index[i] +# # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE) +# +# if(is_within_fema_polygon) { +# +# message("- Left side of transect intersects with FEMA") +# message("\t > FEMA index: ", fema_index) +# extension_count = extension_count + 1 +# message("\t > extension_count: ", extension_count) +# +# trans_geom <- left_trans_geos[i] +# index_vect <- sort(unlist(fema_index)) +# +# # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect]) +# +# # intersect_lines[index_vect] +# +# dist_to_fema <- hydrofabric3D:::geos_bs_distance( +# distances = 1:max_extension_distance, +# line = trans_geom, +# geoms_to_cut = intersect_lines[index_vect], +# direction = "head" +# ) +# +# left_extension_dists[i] <- dist_to_fema +# +# } +# message() +# } +# +# left_trans$left_extension_dist <- left_extension_dists +# } +# tmp <- left_trans[1:5, ] +# tmp +# tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist) +# +# tmp_extended +# +# fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index)) +# +# mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") + +# mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green") + +# sf::st_as_sf(intersect_polygons[fema_idx]) +# +# length(left_extension_dists) +# +# left_trans_geos +# vctrs::vec_c( +# vctrs::vec_c( +# left_trans$hy_id +# ), +# vctrs::vec_cast(left_trans$cs_id) +# ) +# +# # +# # # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ] +# # left_trans <- +# # left_trans %>% +# # dplyr::mutate( +# # fema_index = unlist(sf::st_within(., intersect_polygons)) +# # ) %>% +# # dplyr::relocate(fema_index) +# # +# # left_trans_geos <- geos::as_geos_geometry(left_trans) +# +# sort(na.omit(unlist(unique(left_trans$fema_index)))) +# fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] +# +# fema_polygons %>% na.omit() +# +# # note: sorting the fema polygon indices (not sure if necessary) +# left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))] +# +# left_fema +# +# left_fema %>% plot() +# +# geos::geos_make_linestring(geom = left_fema) +# left_trans$fema_index +# sort(na.omit(unlist(unique(left_trans$fema_index)))) +# +# # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] +# left_fema_lines <- +# left_fema %>% +# sf::st_as_sf() %>% +# sf::st_cast("multilinestring") %>% +# geos::as_geos_geometry() %>% +# geos::geos_simplify_preserve_topology(25) +# +# left_fema %>% +# sf::st_as_sf() %>% +# mapview::npts() +# +# geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% +# geos::geos_num_coordinates() %>% +# sum() +# +# geos::geos_num_coordinates(left_fema_lines) %>% sum() +# +# geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% +# .[3] %>% +# plot() +# left_fema_lines[3] %>% plot() +# +# +# left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { +# +# hydrofabric3d:::geos_bs_distance( +# distances = 1:2000, +# line = left_trans_geos[i], +# geoms_to_cut = left_fema_lines, +# direction = "head" +# ) +# }) %>% +# unlist() +# +# left_trans$left_head_extension_dist <- left_extension_dists +# +# # find the distances from the right side of transect lines +# right_trans <- +# segmented_trans %>% +# lwgeom::st_linesubstring(0.50, 1) %>% +# dplyr::mutate( +# partition = "right", +# partition_lengthm = as.numeric(sf::st_length(geom)) +# ) %>% +# dplyr::select(hy_id, cs_source, cs_id, cs_measure, +# cs_lengthm, is_extended, partition, partition_lengthm, geom) +# right_trans_geos <- geos::as_geos_geometry(right_trans) +# right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons)) +# +# right_within_vect <- lapply(right_within_matrix, function(i) { +# if(length(i) > 0) { c(i) } else { c(na_real_) } } +# ) +# right_trans$fema_index <- right_within_vect +# # +# # right_trans <- +# # right_trans %>% +# # dplyr::mutate( +# # fema_index = unlist(sf::st_within(., fema_polygons)) +# # ) %>% +# # dplyr::relocate(fema_index) +# # +# +# right_fema <- fema_polygons[unique(right_trans$fema_index),] +# right_fema_lines <- +# right_fema %>% +# sf::st_cast("linestring") +# +# right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) { +# +# hydrofabric3d:::geos_bs_distance( +# distances = 1:2000, +# line = right_trans_geos[i], +# geoms_to_cut = right_fema_lines, +# direction = "tail" +# ) +# }) %>% +# unlist() +# +# right_trans$right_tail_extension_dists <- right_extension_dists +# right_trans$right_tail_extension_dists +# unlist(sf::st_within(left_trans, fema_polygons)) +# +# left_trans <- +# left_trans %>% # dplyr::mutate( # fema_index = unlist(sf::st_within(., fema_polygons)) # ) %>% -# dplyr::relocate(fema_index) +# dplyr::relocate(fema_index) +# +# left_fema <- fema_polygons[unique(left_trans$fema_index),] +# left_fema_lines <- +# left_fema %>% +# sf::st_cast("linestring") +# +# geos::as_geos_geometry(left_fema) +# mapview::mapview(left_fema, col.regions = "dodgerblue") + +# mapview::mapview(left_ls, color = "yellow") + +# mapview::mapview(left_trans, color = "red") + +# mapview::mapview(right_trans, color = "green") +# +# left_trans_geos <- geos::as_geos_geometry(left_trans) +# left_trans_geos %>% plot() +# left_trans_geos[2] +# +# length(left_trans_geos) +# +# left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { +# +# hydrofabric3d:::geos_bs_distance( +# distances = 1:2000, +# line = left_trans_geos[i], +# geoms_to_cut = left_fema_lines, +# direction = "head" +# ) +# }) %>% +# unlist() +# +# left_extensions <- geos::geos_empty() +# +# for (i in 1:length(left_extension_dists)) { +# dist = left_extension_dists[i] +# geos_line <- left_trans_geos[i] +# message(glue::glue("i: {i}\ndist: {dist}")) +# +# extended <- hydrofabric3d::geos_extend_line( +# geos_line, +# dist, +# "head" +# ) +# left_extensions <- vctrs::vec_c(left_extensions, extended) +# +# } +# # index for only valid transects +# # is_valid <- !geos::geos_is_empty(left_extensions) +# left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)] +# # !geos::geos_is_empty(left_extensions) +# +# new_left_trans <- +# left_trans %>% +# sf::st_drop_geometry() %>% +# dplyr::mutate( +# geom = sf::st_as_sfc(left_extensions) +# ) %>% +# sf::st_as_sf() +# # geos::sf +# mapview::mapview(left_fema, col.regions = "dodgerblue") + +# mapview::mapview(left_ls, color = "yellow") + +# mapview::mapview(left_trans, color = "red") + +# mapview::mapview(new_left_trans, color = "green") +# mapply(function(geom, dist) { +# hydrofabric3d::geos_extend_line(geom, dist, "head") +# }, +# left_trans_geos, +# left_extension_dists +# ) +# +# left_extensions <- lapply(seq_along(left_trans_geos), function(i) { +# +# extend_dist <- hydrofabric3d:::geos_bs_distance( +# distances = 1:2000, +# line = left_trans_geos[i], +# geoms_to_cut = left_fema_lines, +# direction = "head" +# ) +# +# hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") +# +# }) +# # unlist(left_extensions) +# +# left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { +# hydrofabric3d:::geos_bs_distance( +# distances = 1:2000, +# line = left_trans_geos[i], +# geoms_to_cut = left_fema_lines, +# direction = "head" +# ) +# }) +# +# distance_to_extend <- +# hydrofabric3d:::geos_bs_distance( +# distances = 1:1500, +# line = left_trans_geos[1], +# geoms_to_cut = left_fema_lines, +# direction = "head" +# ) +# +# extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% +# sf::st_as_sf() +# mapview::mapview(left_fema, col.regions = "dodgerblue") + +# mapview::mapview(left_ls, color = "yellow") + +# mapview::mapview(left_trans, color = "red") + +# mapview::mapview(extended, color = "green") +# # mapview::mapview(right_trans, color = "green") +# left_trans$geom %>% sf::st_length() +# plot(segmented_trans$geom, col = "red", lwd =5) +# plot(left_trans$geom, col = "green", lwd=3, add = true) +# plot(right_trans$geom, col = "blue", lwd=3, add = true) +# unlist(left_trans$geom) +# unlist(right_trans$geom) +# +# unlist(trans_fema$geom ) +# unlist(split_trans$geom ) +# split_trans %>% +# sf::st_collection_extract("linestring") +# split_trans$geom +# +# tmp_trans$geom +# nngeo::st_segments(tmp_trans) %>% +# .$result %>% +# plot() +# +# mapview::mapview(tmp_trans) + tmp_centroid +# geos::geos_clip_by_rect() +# +# transects_with_ +# +# lengths(transects_polygons_matrix) +# mapview::mapview(transects_with_fema, color = "green") + fema_with_transects +# unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30] +# transects %>% +# hydrofabric3d::add_tmp_id(transects) %>% +# .$tmp_id %>% +# unique() %>% .[1:30] +# trans_subset <- +# transects %>% +# hydrofabric3d::add_tmp_id() %>% +# dplyr::filter(tmp_id %in% unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]) +# +# fema_subset <- +# fema %>% +# dplyr::filter(fema_id == "1268") +# +# extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(500, nrow(trans_subset))) +# extended +# clipped_trans <- rmapshaper::ms_clip(extended, fema) +# +# rmapshaper::ms_clip(extended, fema_subset) +# mapview::mapview(trans_subset, color = "red") + +# mapview::mapview(extended, color = "yellow") + +# # mapview::mapview( sf::st_difference(extended, fema_subset), color = "green") + +# mapview::mapview(clipped_trans, color = "green") + +# fema +# +# rep(50, nrow(trans_subset)) +# extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(50, nrow(trans_subset))) +# +# +# hydrofabric3D::geos_extend_line(trans_subset, 50) %>% +# sf::st_as_sf() %>% mapview::mapview() +# +# +# +# +# +# +# +# +# +# # - - right_fema <- fema_polygons[unique(right_trans$fema_index),] - right_fema_lines <- - right_fema %>% - sf::st_cast("linestring") - - right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) { - - hydrofabric3d:::geos_bs_distance( - distances = 1:2000, - line = right_trans_geos[i], - geoms_to_cut = right_fema_lines, - direction = "tail" - ) - }) %>% - unlist() - - right_trans$right_tail_extension_dists <- right_extension_dists - right_trans$right_tail_extension_dists - unlist(sf::st_within(left_trans, fema_polygons)) - - left_trans <- - left_trans %>% - dplyr::mutate( - fema_index = unlist(sf::st_within(., fema_polygons)) - ) %>% - dplyr::relocate(fema_index) - - left_fema <- fema_polygons[unique(left_trans$fema_index),] - left_fema_lines <- - left_fema %>% - sf::st_cast("linestring") - - geos::as_geos_geometry(left_fema) - mapview::mapview(left_fema, col.regions = "dodgerblue") + - mapview::mapview(left_ls, color = "yellow") + - mapview::mapview(left_trans, color = "red") + - mapview::mapview(right_trans, color = "green") - - left_trans_geos <- geos::as_geos_geometry(left_trans) - left_trans_geos %>% plot() - left_trans_geos[2] - - length(left_trans_geos) - - left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { - - hydrofabric3d:::geos_bs_distance( - distances = 1:2000, - line = left_trans_geos[i], - geoms_to_cut = left_fema_lines, - direction = "head" - ) - }) %>% - unlist() - - left_extensions <- geos::geos_empty() - - for (i in 1:length(left_extension_dists)) { - dist = left_extension_dists[i] - geos_line <- left_trans_geos[i] - message(glue::glue("i: {i}\ndist: {dist}")) - - extended <- hydrofabric3d::geos_extend_line( - geos_line, - dist, - "head" - ) - left_extensions <- vctrs::vec_c(left_extensions, extended) - - } - # index for only valid transects - # is_valid <- !geos::geos_is_empty(left_extensions) - left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)] - # !geos::geos_is_empty(left_extensions) - - new_left_trans <- - left_trans %>% - sf::st_drop_geometry() %>% - dplyr::mutate( - geom = sf::st_as_sfc(left_extensions) - ) %>% - sf::st_as_sf() - # geos::sf - mapview::mapview(left_fema, col.regions = "dodgerblue") + - mapview::mapview(left_ls, color = "yellow") + - mapview::mapview(left_trans, color = "red") + - mapview::mapview(new_left_trans, color = "green") - mapply(function(geom, dist) { - hydrofabric3d::geos_extend_line(geom, dist, "head") - }, - left_trans_geos, - left_extension_dists - ) - - left_extensions <- lapply(seq_along(left_trans_geos), function(i) { - - extend_dist <- hydrofabric3d:::geos_bs_distance( - distances = 1:2000, - line = left_trans_geos[i], - geoms_to_cut = left_fema_lines, - direction = "head" - ) - - hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") - - }) - # unlist(left_extensions) - - left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { - hydrofabric3d:::geos_bs_distance( - distances = 1:2000, - line = left_trans_geos[i], - geoms_to_cut = left_fema_lines, - direction = "head" - ) - }) - - distance_to_extend <- - hydrofabric3d:::geos_bs_distance( - distances = 1:1500, - line = left_trans_geos[1], - geoms_to_cut = left_fema_lines, - direction = "head" - ) - - extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% - sf::st_as_sf() - mapview::mapview(left_fema, col.regions = "dodgerblue") + - mapview::mapview(left_ls, color = "yellow") + - mapview::mapview(left_trans, color = "red") + - mapview::mapview(extended, color = "green") - # mapview::mapview(right_trans, color = "green") - left_trans$geom %>% sf::st_length() - plot(segmented_trans$geom, col = "red", lwd =5) - plot(left_trans$geom, col = "green", lwd=3, add = true) - plot(right_trans$geom, col = "blue", lwd=3, add = true) - unlist(left_trans$geom) - unlist(right_trans$geom) - - unlist(trans_fema$geom ) - unlist(split_trans$geom ) - split_trans %>% - sf::st_collection_extract("linestring") - split_trans$geom - - tmp_trans$geom - nngeo::st_segments(tmp_trans) %>% - .$result %>% - plot() - - mapview::mapview(tmp_trans) + tmp_centroid - geos::geos_clip_by_rect() - - transects_with_ - - lengths(transects_polygons_matrix) - mapview::mapview(transects_with_fema, color = "green") + fema_with_transects - unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30] - transects %>% - hydrofabric3d::add_tmp_id(transects) %>% - .$tmp_id %>% - unique() %>% .[1:30] - trans_subset <- - transects %>% - hydrofabric3d::add_tmp_id() %>% - dplyr::filter(tmp_id %in% unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]) - - fema_subset <- - fema %>% - dplyr::filter(fema_id == "1268") - - extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(500, nrow(trans_subset))) - extended - clipped_trans <- rmapshaper::ms_clip(extended, fema) - - rmapshaper::ms_clip(extended, fema_subset) - mapview::mapview(trans_subset, color = "red") + - mapview::mapview(extended, color = "yellow") + - # mapview::mapview( sf::st_difference(extended, fema_subset), color = "green") + - mapview::mapview(clipped_trans, color = "green") + - fema - - rep(50, nrow(trans_subset)) - extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(50, nrow(trans_subset))) - - - hydrofabric3D::geos_extend_line(trans_subset, 50) %>% - sf::st_as_sf() %>% mapview::mapview() - - - - - - - - - - - From 248d5401b5b38fb83fabb33eaaa622cf033dfa0c Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 23 May 2024 13:35:52 -0700 Subject: [PATCH 36/64] small stuff --- runners/cs_runner/add_fema_to_transects.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index d23b67e..dbbe236 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -200,7 +200,7 @@ path_df # polygons <- fema # # # flines <- flines # # max_extension_distance <- 3000 - # max_extension_distance = 3500 + # max_extension_distance = 3500 # ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -243,7 +243,6 @@ path_df # mapview::npts(sf::st_as_sf(intersect_lines)) - # intersect_polygons %>% # geos::geos_make_valid() %>% # geos::geos_is_valid() %>% all() From f4f7832e50d6a8977de5b4278142211fb32e4c74 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 23 May 2024 13:36:57 -0700 Subject: [PATCH 37/64] random cleanups --- runners/cs_runner/add_fema_to_transects.R | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index dbbe236..a36b369 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -9,8 +9,6 @@ transects_prefix <- paste0(s3_bucket, version_prefix, "/3d/transects/") # paths to nextgen datasets and model attribute parquet files nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -# fema_files <- list.files(fema_fgb_path, full.names = FALSE) -# fema_bb_files <- list.files(fema_fgb_bb_path, full.names = FALSE) transects_files <- list.files(transects_dir, full.names = FALSE) transects_files <- transects_files[!grepl("updated", transects_files)] @@ -29,7 +27,7 @@ path_df # loop over each vpu and generate cross sections, then save locally and upload to s3 bucket # for(i in 1:nrow(path_df)) { - i = 8 + # i = 8 # nextgen file and full path nextgen_file <- path_df$x[i] @@ -72,7 +70,6 @@ path_df fema, flines, 3000) - # }) # library(nngeo) # fema %>% @@ -91,7 +88,6 @@ path_df # mapview::mapview(sf::st_buffer(fema_sub[2, ], 500), col.regions = "green") # fema_no_holes <- nngeo::st_remove_holes(fema) # fema_no_holes_union <- sf::st_union(fema_no_holes) - # # touching_list = sf::st_touches(fema_no_holes) # mapview::npts(fema) # mapview::npts(fema_no_holes) @@ -101,15 +97,10 @@ path_df # dplyr::group_by(new_fema_id) # fema_no_holes_union # fema$fema_id %>% unique() %>% length() - # # union then explode FEMA polygons # fema <- # fema %>% # sf::st_union() - # - # fema <- rmapshaper::ms_explode(fema) - # # fema %>% mapview::npts() - # # # reassign IDs and change geometry column name # fema <- # fema %>% @@ -194,7 +185,6 @@ path_df flines, max_extension_distance) { - ### ### ### ### ### ### ### ### ### ### ### ### ### ### # transect_lines <- transects # polygons <- fema @@ -202,7 +192,6 @@ path_df # # max_extension_distance <- 3000 # max_extension_distance = 3500 # ### ### ### ### ### ### ### - ### ### ### ### ### ### ### # keep 10% of the original points for speed polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10) @@ -256,11 +245,6 @@ path_df # make each transect line have way more segments so we can take a left and right half of each transect line segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation) - # unlist(segmented_trans$geom) - # unique(lengths(segmented_trans$geom)) - # length(lengths(segmented_trans$geom)) - # lengths(segmented_trans$geom) - # mapview::mapview(left_trans, col.regions = "dodgerblue") + # mapview::mapview(intersect_transects, color = "red") + # mapview::mapview(intersect_transects[42, ], color = "yellow") + From b81e4778a9bc34fc979e8c1ec5a6ecb67dd17897 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 29 May 2024 15:35:29 -0700 Subject: [PATCH 38/64] small cleanups as im migrating fema functions to hydrofabric3D --- runners/cs_runner/add_fema_to_transects.R | 413 +--------------------- 1 file changed, 4 insertions(+), 409 deletions(-) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index a36b369..c96efb3 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -936,35 +936,18 @@ path_df # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red") + # mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + # mapview::mapview( sf::st_as_sf(final_line), color = "yellow") -# -# -# -# -# + # # } -# -# + # left_trans$left_extension_distance -# + # length(right_distances) -# -# # # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) { ##### ##### ##### ##### ##### # geos_geoms <- left_trans_geos - # ids <- left_trans$tmp_id - # lines_to_cut <- intersect_lines - # lines_to_cut_indices <- left_trans$left_fema_index - # direction = "head" - # max_extension_distance = 2500 - # geos_geoms = left_trans_geos - # ids = left_trans$tmp_id - # lines_to_cut = intersect_lines - # lines_to_cut_indices = left_trans$left_fema_index - # direction = "head" - # max_extension_distance = max_extension_distance + # geos_geoms = left_trans_geos # ids = left_trans$tmp_id # lines_to_cut = intersect_lines @@ -1012,391 +995,3 @@ path_df return(extension_dists) } - -# -# # ---------------------------------------------------------------------------------------------------------------- -# # Loop over every left and right halfs of transects and -# # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary -# # ---------------------------------------------------------------------------------------------------------------- -# -# left_ids <- left_trans$tmp_id -# -# left_fema_indexes <- left_trans$left_fema_index -# left_fema_bool <- left_trans$left_is_within_fema -# -# # preallocate vector that stores the extension. distances -# left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids))) -# -# # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool)) -# # 1:length(left_ids) -# extension_count = 0 -# -# for(i in 1:length(left_ids)) { -# # i = 1 -# tmp_id <- left_ids[i] -# is_within_fema_polygon <- left_fema_bool[i] -# fema_index <- left_fema_indexes[i] -# -# message("Transect: '", tmp_id, "' - (", i, ")") -# # if(is_within_fema_polygon) { -# # break -# # } -# # fema_index <- left_trans$left_fema_index[i] -# # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE) -# -# if(is_within_fema_polygon) { -# -# message("- Left side of transect intersects with FEMA") -# message("\t > FEMA index: ", fema_index) -# extension_count = extension_count + 1 -# message("\t > extension_count: ", extension_count) -# -# trans_geom <- left_trans_geos[i] -# index_vect <- sort(unlist(fema_index)) -# -# # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect]) -# -# # intersect_lines[index_vect] -# -# dist_to_fema <- hydrofabric3D:::geos_bs_distance( -# distances = 1:max_extension_distance, -# line = trans_geom, -# geoms_to_cut = intersect_lines[index_vect], -# direction = "head" -# ) -# -# left_extension_dists[i] <- dist_to_fema -# -# } -# message() -# } -# -# left_trans$left_extension_dist <- left_extension_dists -# } -# tmp <- left_trans[1:5, ] -# tmp -# tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist) -# -# tmp_extended -# -# fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index)) -# -# mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") + -# mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green") + -# sf::st_as_sf(intersect_polygons[fema_idx]) -# -# length(left_extension_dists) -# -# left_trans_geos -# vctrs::vec_c( -# vctrs::vec_c( -# left_trans$hy_id -# ), -# vctrs::vec_cast(left_trans$cs_id) -# ) -# -# # -# # # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ] -# # left_trans <- -# # left_trans %>% -# # dplyr::mutate( -# # fema_index = unlist(sf::st_within(., intersect_polygons)) -# # ) %>% -# # dplyr::relocate(fema_index) -# # -# # left_trans_geos <- geos::as_geos_geometry(left_trans) -# -# sort(na.omit(unlist(unique(left_trans$fema_index)))) -# fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] -# -# fema_polygons %>% na.omit() -# -# # note: sorting the fema polygon indices (not sure if necessary) -# left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))] -# -# left_fema -# -# left_fema %>% plot() -# -# geos::geos_make_linestring(geom = left_fema) -# left_trans$fema_index -# sort(na.omit(unlist(unique(left_trans$fema_index)))) -# -# # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))] -# left_fema_lines <- -# left_fema %>% -# sf::st_as_sf() %>% -# sf::st_cast("multilinestring") %>% -# geos::as_geos_geometry() %>% -# geos::geos_simplify_preserve_topology(25) -# -# left_fema %>% -# sf::st_as_sf() %>% -# mapview::npts() -# -# geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% -# geos::geos_num_coordinates() %>% -# sum() -# -# geos::geos_num_coordinates(left_fema_lines) %>% sum() -# -# geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% -# .[3] %>% -# plot() -# left_fema_lines[3] %>% plot() -# -# -# left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { -# -# hydrofabric3d:::geos_bs_distance( -# distances = 1:2000, -# line = left_trans_geos[i], -# geoms_to_cut = left_fema_lines, -# direction = "head" -# ) -# }) %>% -# unlist() -# -# left_trans$left_head_extension_dist <- left_extension_dists -# -# # find the distances from the right side of transect lines -# right_trans <- -# segmented_trans %>% -# lwgeom::st_linesubstring(0.50, 1) %>% -# dplyr::mutate( -# partition = "right", -# partition_lengthm = as.numeric(sf::st_length(geom)) -# ) %>% -# dplyr::select(hy_id, cs_source, cs_id, cs_measure, -# cs_lengthm, is_extended, partition, partition_lengthm, geom) -# right_trans_geos <- geos::as_geos_geometry(right_trans) -# right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons)) -# -# right_within_vect <- lapply(right_within_matrix, function(i) { -# if(length(i) > 0) { c(i) } else { c(na_real_) } } -# ) -# right_trans$fema_index <- right_within_vect -# # -# # right_trans <- -# # right_trans %>% -# # dplyr::mutate( -# # fema_index = unlist(sf::st_within(., fema_polygons)) -# # ) %>% -# # dplyr::relocate(fema_index) -# # -# -# right_fema <- fema_polygons[unique(right_trans$fema_index),] -# right_fema_lines <- -# right_fema %>% -# sf::st_cast("linestring") -# -# right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) { -# -# hydrofabric3d:::geos_bs_distance( -# distances = 1:2000, -# line = right_trans_geos[i], -# geoms_to_cut = right_fema_lines, -# direction = "tail" -# ) -# }) %>% -# unlist() -# -# right_trans$right_tail_extension_dists <- right_extension_dists -# right_trans$right_tail_extension_dists -# unlist(sf::st_within(left_trans, fema_polygons)) -# -# left_trans <- -# left_trans %>% -# dplyr::mutate( -# fema_index = unlist(sf::st_within(., fema_polygons)) -# ) %>% -# dplyr::relocate(fema_index) -# -# left_fema <- fema_polygons[unique(left_trans$fema_index),] -# left_fema_lines <- -# left_fema %>% -# sf::st_cast("linestring") -# -# geos::as_geos_geometry(left_fema) -# mapview::mapview(left_fema, col.regions = "dodgerblue") + -# mapview::mapview(left_ls, color = "yellow") + -# mapview::mapview(left_trans, color = "red") + -# mapview::mapview(right_trans, color = "green") -# -# left_trans_geos <- geos::as_geos_geometry(left_trans) -# left_trans_geos %>% plot() -# left_trans_geos[2] -# -# length(left_trans_geos) -# -# left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { -# -# hydrofabric3d:::geos_bs_distance( -# distances = 1:2000, -# line = left_trans_geos[i], -# geoms_to_cut = left_fema_lines, -# direction = "head" -# ) -# }) %>% -# unlist() -# -# left_extensions <- geos::geos_empty() -# -# for (i in 1:length(left_extension_dists)) { -# dist = left_extension_dists[i] -# geos_line <- left_trans_geos[i] -# message(glue::glue("i: {i}\ndist: {dist}")) -# -# extended <- hydrofabric3d::geos_extend_line( -# geos_line, -# dist, -# "head" -# ) -# left_extensions <- vctrs::vec_c(left_extensions, extended) -# -# } -# # index for only valid transects -# # is_valid <- !geos::geos_is_empty(left_extensions) -# left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)] -# # !geos::geos_is_empty(left_extensions) -# -# new_left_trans <- -# left_trans %>% -# sf::st_drop_geometry() %>% -# dplyr::mutate( -# geom = sf::st_as_sfc(left_extensions) -# ) %>% -# sf::st_as_sf() -# # geos::sf -# mapview::mapview(left_fema, col.regions = "dodgerblue") + -# mapview::mapview(left_ls, color = "yellow") + -# mapview::mapview(left_trans, color = "red") + -# mapview::mapview(new_left_trans, color = "green") -# mapply(function(geom, dist) { -# hydrofabric3d::geos_extend_line(geom, dist, "head") -# }, -# left_trans_geos, -# left_extension_dists -# ) -# -# left_extensions <- lapply(seq_along(left_trans_geos), function(i) { -# -# extend_dist <- hydrofabric3d:::geos_bs_distance( -# distances = 1:2000, -# line = left_trans_geos[i], -# geoms_to_cut = left_fema_lines, -# direction = "head" -# ) -# -# hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") -# -# }) -# # unlist(left_extensions) -# -# left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) { -# hydrofabric3d:::geos_bs_distance( -# distances = 1:2000, -# line = left_trans_geos[i], -# geoms_to_cut = left_fema_lines, -# direction = "head" -# ) -# }) -# -# distance_to_extend <- -# hydrofabric3d:::geos_bs_distance( -# distances = 1:1500, -# line = left_trans_geos[1], -# geoms_to_cut = left_fema_lines, -# direction = "head" -# ) -# -# extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% -# sf::st_as_sf() -# mapview::mapview(left_fema, col.regions = "dodgerblue") + -# mapview::mapview(left_ls, color = "yellow") + -# mapview::mapview(left_trans, color = "red") + -# mapview::mapview(extended, color = "green") -# # mapview::mapview(right_trans, color = "green") -# left_trans$geom %>% sf::st_length() -# plot(segmented_trans$geom, col = "red", lwd =5) -# plot(left_trans$geom, col = "green", lwd=3, add = true) -# plot(right_trans$geom, col = "blue", lwd=3, add = true) -# unlist(left_trans$geom) -# unlist(right_trans$geom) -# -# unlist(trans_fema$geom ) -# unlist(split_trans$geom ) -# split_trans %>% -# sf::st_collection_extract("linestring") -# split_trans$geom -# -# tmp_trans$geom -# nngeo::st_segments(tmp_trans) %>% -# .$result %>% -# plot() -# -# mapview::mapview(tmp_trans) + tmp_centroid -# geos::geos_clip_by_rect() -# -# transects_with_ -# -# lengths(transects_polygons_matrix) -# mapview::mapview(transects_with_fema, color = "green") + fema_with_transects -# unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30] -# transects %>% -# hydrofabric3d::add_tmp_id(transects) %>% -# .$tmp_id %>% -# unique() %>% .[1:30] -# trans_subset <- -# transects %>% -# hydrofabric3d::add_tmp_id() %>% -# dplyr::filter(tmp_id %in% unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]) -# -# fema_subset <- -# fema %>% -# dplyr::filter(fema_id == "1268") -# -# extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(500, nrow(trans_subset))) -# extended -# clipped_trans <- rmapshaper::ms_clip(extended, fema) -# -# rmapshaper::ms_clip(extended, fema_subset) -# mapview::mapview(trans_subset, color = "red") + -# mapview::mapview(extended, color = "yellow") + -# # mapview::mapview( sf::st_difference(extended, fema_subset), color = "green") + -# mapview::mapview(clipped_trans, color = "green") + -# fema -# -# rep(50, nrow(trans_subset)) -# extended <- hydrofabric3D:::extend_by_length(trans_subset, rep(50, nrow(trans_subset))) -# -# -# hydrofabric3D::geos_extend_line(trans_subset, 50) %>% -# sf::st_as_sf() %>% mapview::mapview() -# -# -# -# -# -# -# -# -# -# -# - - - - - - - - - - - - - - - - \ No newline at end of file From 7914a4526db5ece3d238487934fcfaee50674df6 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 13 Jun 2024 10:16:29 +0300 Subject: [PATCH 39/64] updating cs_runner/01_transects.R to use FEMA VPU polygons to extend transects out to the FEMA floodplain when possible --- runners/cs_runner/01_transects.R | 34 ++- runners/cs_runner/add_fema_to_transects.R | 278 ++++++++++++++-------- 2 files changed, 210 insertions(+), 102 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index b3727ba..4d6c746 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -11,7 +11,7 @@ source("runners/cs_runner/config.R") transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +nextgen_files <- list.files(nextgen_dir, full.names = FALSE) model_attr_files <- list.files(model_attr_dir, full.names = FALSE) # string to fill in "cs_source" column in output datasets @@ -26,16 +26,32 @@ path_df <- align_files_by_vpu( # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { - + # i = 8 # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) + vpu <- path_df$vpu[i] + + # Get FEMA by VPU directory and files for current VPU + fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + + vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] + + # fema polygons and transect lines + fema <- sf::read_sf(vpu_fema_file) + # # model attributes file and full path # model_attr_file <- path_df$y[i] # model_attr_path <- paste0(model_attr_dir, model_attr_file) - message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'") + message("Creating VPU ", vpu, " transects:", + "\n - flowpaths: '", + nextgen_file, "'", + "\n - FEMA polygons: ", basename(vpu_fema_file) + ) # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") # read in nextgen data @@ -114,6 +130,15 @@ for(i in 1:nrow(path_df)) { cs_source = net_source ) + # TODO: make sure this 3000m extension distance is appropriate across VPUs + # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + transects <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transect_lines = transects, + polygons = fema, + flines = flines, + max_extension_distance = 3000 + ) + # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg) sf::write_sf( # save dataset with only subset of columns to upload to S3 @@ -129,6 +154,9 @@ for(i in 1:nrow(path_df)) { out_path ) + transects <- sf::read_sf(out_path) + + # command to copy transects geopackage to S3 copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index c96efb3..1cae9ca 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -27,7 +27,7 @@ path_df # loop over each vpu and generate cross sections, then save locally and upload to s3 bucket # for(i in 1:nrow(path_df)) { - # i = 8 + i = 8 # nextgen file and full path nextgen_file <- path_df$x[i] @@ -65,13 +65,181 @@ path_df # read in nextgen flowlines data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - system.time({ - extended_transects <- get_transect_extension_distances_to_polygons(transects, - fema, - flines, - 3000) - }) - # library(nngeo) + # + # system.time({ + # extended_transects <- get_transect_extension_distances_to_polygons(transects, fema, flines, 3000) + # }) + # + # sf::write_sf(extended_transects, '/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg') + # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- + # ----- Generate plots of extensions ---- + # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- + extended_transects <- sf::read_sf('/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg') %>% + hydrofabric3D::add_tmp_id() + # ids_of_interest = c("wb-1002167", "wb-1002166", "wb-1002165", "wb-1002164") + # ids_of_interest = c( "wb-1002166", "wb-1002165") + # ids_of_interest = c("wb-1014540", "wb-1014541", "wb-1014542", "wb-1014570", "wb-1014574", + # "wb-1014575", "wb-1014572" ,"wb-1014573" ,"wb-1014571", + # "wb-1014568", "wb-1014569", "wb-1014567", "wb-1014543") + + # ids_of_interest = c( "wb-1014542") + + any_extended <- + extended_transects %>% + dplyr::filter( + (left_is_extended & right_is_extended) | (left_is_extended & !right_is_extended) | (!left_is_extended & right_is_extended) + ) %>% + dplyr::mutate( + extend_status = dplyr::case_when( + (left_is_extended & right_is_extended) ~ "both", + (left_is_extended & !right_is_extended) ~ "left_only", + (!left_is_extended & right_is_extended) ~ "right_only", + TRUE ~ "no_extension" + ) + ) %>% + dplyr::mutate( + extend_distance = left_distance + right_distance + ) %>% + dplyr::relocate(extend_status, extend_distance) %>% + dplyr::filter(hy_id == "wb-1002550") + # dplyr::slice(1:1500) + + fema_subset_intersects <- sf::st_intersects(fema, any_extended) + fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ] + fema_polygons + + og_trans <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% any_extended$tmp_id | hy_id %in% any_extended$hy_id) %>% + dplyr::mutate( + extend_status = "original" + ) + + Flowlines <- + flines %>% + dplyr::filter(id %in% unique(any_extended$hy_id)) + + FEMA = fema_polygons + Extended = sf::st_buffer(any_extended, 10) + Original = sf::st_buffer(og_trans, 10) + + mapview::mapview(FEMA, col.regions = "dodgerblue") + + mapview::mapview(Flowlines, color = "darkblue") + + mapview::mapview(Extended, col.regions = "green") + + mapview::mapview(Original, col.regions = "red") + # mapview::mapview(Extended, color = "green") + + # mapview::mapview(Original, color = "red") + extend_subset <- + any_extended %>% + dplyr::filter(hy_id %in% ids_of_interest) + # dplyr::group_by(extend_status) %>% + # dplyr::arrange(-extend_distance, .by_group = TRUE) %>% + # dplyr::slice( + # which.min(extend_distance), + # which.max(extend_distance) + # ) + # dplyr::slice(1) + extend_subset <- + any_extended %>% + dplyr::group_by(extend_status) %>% + dplyr::arrange(-extend_distance, .by_group = TRUE) %>% + # dplyr::slice( + # which.min(extend_distance), + # which.max(extend_distance) + # ) + dplyr::slice(2000:2020) + extend_subset + + fline_subset <- + flines %>% + dplyr::filter(id %in% unique(extend_subset$hy_id)) + + plot_data <- + dplyr::bind_rows( + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% + dplyr::mutate( + extend_status = "original" + ), + dplyr::filter(extend_subset, + extend_status != "no_extension") + ) %>% + dplyr::relocate(extend_status, extend_distance) %>% + dplyr::mutate( + extend_status = dplyr::case_when( + extend_status != "original" ~ "extended", + TRUE ~ extend_status + ) + ) + + fema_subset_intersects <- sf::st_intersects(fema, fline_subset) + fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ] + fema_polygons + + flines[lengths(sf::st_intersects(flines, fema_polygons)) > 0, ] %>% + dplyr::pull(id) %>% + unique() + fema_polygons + sf::st_crop(fema_polygons, fline_subset) + ggplot2::ggplot() + + ggplot2::geom_sf(data = sf::st_crop(fema_polygons, fline_subset), fill = "grey") + + ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 1) + + # ggplot2::geom_sf(data = plot_data, ggplot2::aes(color = extend_status)) + ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "extended"), + color = "green") + + ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "original"), + color = "red") + + + mapview::mapview(fema_polygons, col.regions = "dodgerblue") + + mapview::mapview(fline_subset, color = "dodgerblue") + + mapview::mapview(dplyr::filter(plot_data, extend_status == "extended"), color = "green") + + mapview::mapview(dplyr::filter(plot_data, extend_status == "original"), color = "red") + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% + dplyr::mutate( + extend_status = "original" + ) + + extend_subset + + ggplot2::ggplot() + + ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 5) + + ggplot2::geom_sf(data = extend_subset, ggplot2::aes(color = extend_status)) + + mapview::mapview(fline_subset, color = "dodgerblue") + + mapview::mapview(extend_subset, color = "green") + + + both_extended <- + extended_transects %>% + dplyr::filter(left_is_extended, right_is_extended) + + left_only_extended <- + extended_transects %>% + dplyr::filter(left_is_extended, !right_is_extended) + + right_only_extended <- + extended_transects %>% + dplyr::filter(!left_is_extended, right_is_extended) + + unique(both_extended$hy_id) + + extended_transects %>% + dplyr::filter( + # tmp_id %in% unique(left_only_extended$tmp_id), + tmp_id %in% unique(right_only_extended$tmp_id) || + tmp_id %in% unique(both_extended$tmp_id) + ) + + unique(extended_transects$hy_id) + # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- + # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- + # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- + # library(nngeo) # fema %>% # dplyr::group_by(fema_id) %>% # dplyr::mutate( @@ -180,10 +348,7 @@ path_df # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons) # polygons, set of sf polygons that transect lines should be exteneded # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons" - get_transect_extension_distances_to_polygons <- function(transect_lines, - polygons, - flines, - max_extension_distance) { + get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, flines, max_extension_distance) { ### ### ### ### ### ### ### # transect_lines <- transects @@ -789,9 +954,7 @@ path_df # --------------------------------------------------- # start %>% class() - - - } + } # transects2 <- transects # dplyr::mutate( @@ -822,90 +985,7 @@ path_df return(transects) } - # transects2 %>% - # dplyr::filter(left_is_extended, right_is_extended) -# -# any_extended <- -# transects2 %>% -# dplyr::filter(left_is_extended | right_is_extended) -# -# any_flines <- -# flines %>% -# dplyr::filter(id %in% any_extended$hy_id) -# -# # left_only_extended <- -# # transects2 %>% -# # dplyr::filter(left_is_extended, !right_is_extended) -# # -# # left_only_flines <- -# # flines %>% -# # dplyr::filter(id %in% left_only_extended$hy_id) -# # -# # right_only_extended <- -# # transects2 %>% -# # dplyr::filter(!left_is_extended, right_is_extended) -# # -# # right_only_flines <- -# # flines %>% -# # dplyr::filter(id %in% right_only_extended$hy_id) -# -# # left_fema_polygons <- -# # left_trans %>% -# # ---------------------------------------------------------------- -# # ------- Subset data for mapping ----------- -# # ---------------------------------------------------------------- -# extended_for_map <- -# any_extended %>% -# dplyr::slice(1:1000) -# -# og_transects_for_map <- -# transects %>% -# hydrofabric3D::add_tmp_id() %>% -# dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) -# -# fema_indexes_in_aoi <- -# dplyr::bind_rows( -# sf::st_drop_geometry( -# dplyr::rename(left_trans, fema_index = left_fema_index) -# ), -# sf::st_drop_geometry( -# dplyr::rename(right_trans, -# fema_index = right_fema_index) -# ) -# ) %>% -# hydrofabric3D::add_tmp_id() %>% -# dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% -# # dplyr::filter( -# # # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id | -# # # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id -# # -# # tmp_id %in% unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id) -# # -# # ) %>% -# # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% -# # dplyr::slice(1:200) %>% -# .$fema_index %>% -# unlist() %>% -# na.omit() %>% -# unique() %>% -# sort() -# # length() -# -# sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]) -# -# # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id -# # transects_with_distances -# # %>% -# mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]), col.regions = "lightblue") + -# mapview::mapview(any_flines, color = "dodgerblue") + -# mapview::mapview(og_transects_for_map, color = "green") + -# mapview::mapview(extended_for_map, color = "red") -# -# # mapview::mapview(left_only_flines, color = "dodgerblue") + -# # mapview::mapview(right_only_flines, color = "dodgerblue") + -# # mapview::mapview(left_only_extended, color = "red") + -# # mapview::mapview(right_only_extended, color = "green") -# # transects$cs_lengthm <- length_list + # # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # ---------------------------------------------------------------- From 1ba56d4a5f52bac07f33f00aa73c1935df3933ee Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 11 Jul 2024 15:54:28 -0700 Subject: [PATCH 40/64] reworked final step in producing fema geometries to better resolve internal boundaries --- .gitignore | 3 +- runners/cs_runner/01_transects.R | 198 ++++++++++++++++++++-- runners/cs_runner/add_fema_to_transects.R | 3 + runners/cs_runner/preprocess_fema.R | 135 +++++++++++---- runners/cs_runner/utils.R | 117 +++++++++++++ 5 files changed, 405 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index 3d01a85..182b0e0 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ inst/doc vignettes/cihro-data data check -.Rapp.history \ No newline at end of file +.Rapp.history +runners/cs_runner/fema_extension_plotting.R \ No newline at end of file diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 4d6c746..e8d0476 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -26,7 +26,7 @@ path_df <- align_files_by_vpu( # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { - # i = 8 + # i = 8 # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -40,9 +40,7 @@ for(i in 1:nrow(path_df)) { vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] - # fema polygons and transect lines - fema <- sf::read_sf(vpu_fema_file) - + # # model attributes file and full path # model_attr_file <- path_df$y[i] # model_attr_path <- paste0(model_attr_dir, model_attr_file) @@ -50,8 +48,10 @@ for(i in 1:nrow(path_df)) { message("Creating VPU ", vpu, " transects:", "\n - flowpaths: '", nextgen_file, "'", - "\n - FEMA polygons: ", basename(vpu_fema_file) + "\n - FEMA polygons: '", + basename(vpu_fema_file), "'" ) + # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") # read in nextgen data @@ -85,6 +85,7 @@ for(i in 1:nrow(path_df)) { lengthkm, tot_drainage_areasqkm, bf_width, + mainstem, geometry = geom ) @@ -111,7 +112,9 @@ for(i in 1:nrow(path_df)) { # precision = 1, add = TRUE # whether to add back the original data ) - + + gc() + time2 <- Sys.time() time_diff <- round(as.numeric(time2 - time1 ), 2) @@ -121,7 +124,6 @@ for(i in 1:nrow(path_df)) { out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg") out_path <- paste0(transects_dir, out_file) - message("Saving transects to:\n - filepath: '", out_path, "'") # add cs_source column and rename cs_widths to cs_lengthm transects <- @@ -129,16 +131,177 @@ for(i in 1:nrow(path_df)) { dplyr::mutate( cs_source = net_source ) + # --------------------------------------------------------------------- + # --- Extend transects out to FEMA 100yr floodplains + # --------------------------------------------------------------------- + message("Reading in FEMA polygons...") + # fema polygons and transect lines + fema <- sf::read_sf(vpu_fema_file) + + # mapview::npts(fema) + message("Simplifying FEMA polygons...") + # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. + # keep 10% of the original points for speed + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01) + fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01, sys = TRUE, sys_mem = 16) + + # mapview::npts(fema) + + # # TODO: the flines argument needs the "hy_id" column to be named "id" + # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons() + # flines <- + # flines %>% + # dplyr::rename(id = hy_id) + + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + + # # TODO: hacky, need to fix the extend-trancterts to polygons function to not need these columns + # transects <- + # transects %>% + # dplyr::rename(geom = geometry) %>% + # dplyr::mutate( + # is_extended = FALSE + # ) + + transects <- + transects %>% + dplyr::left_join( + dplyr::select(sf::st_drop_geometry(flines), + hy_id, + mainstem + ), + by = "hy_id" + ) + # system.time({ + # TODO: make sure this 3000m extension distance is appropriate across VPUs # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... - transects <- hydrofabric3D::get_transect_extension_distances_to_polygons( - transect_lines = transects, - polygons = fema, - flines = flines, - max_extension_distance = 3000 - ) - + transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + crosswalk_id = "hy_id", + intersect_group_id = "mainstem", + max_extension_distance = 3000 + ) + + # }) + + message("FEMA extensions complete! - ( ", Sys.time(), " )") + + transects <- dplyr::select(transects, -tmp_id) + transects <- hydrofabric3D::add_tmp_id(transects) + + # transects <- + transects3 %>% + # dplyr::select(-cs_lengthm) %>% + # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% + dplyr::select( + hy_id, + cs_id, + cs_lengthm, + # cs_lengthm = new_cs_lengthm, + cs_source, + cs_measure, + geometry + # is_extended, + # is_fema_extended, + # geometry = geom + ) + + gc() + + # # --------------------------------------------------------------------- + # # --------------------------------------------------------------------- + # + # transects <- sf::read_sf(out_path) + # + # # flines <- + # # flines %>% + # # dplyr::filter(hy_id %in% transects$hy_id) + # # dplyr::slice(1:1000) + # + # flines <- + # flines %>% + # dplyr::slice(seq(1, nrow(flines), 10)) + # # + # transects <- + # transects %>% + # dplyr::filter(hy_id %in% flines$hy_id) + # # + # + # # TODO: the flines argument needs the "hy_id" column to be named "id" + # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons() + # flines <- + # flines %>% + # dplyr::rename(id = hy_id) + # + # + # # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01) + # + # mapview::mapview(dplyr::filter(fema, fema_id %in% 1:10), col.regions = "red") + + # mapview::mapview(dplyr::filter(fema_keep, fema_id %in% 1:10), col.regions = "green") + # # mapview::mapview(dplyr::filter(fema_nokeep, fema_id %in% 1:10), col.regions = "dodgerblue") + # transects + # + # # system.time({ + # # profvis::profvis({ + # + # # TODO: make sure this 3000m extension distance is appropriate across VPUs + # # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + # # transects2 <- hydrofabric3D::get_transect_extension_distances_to_polygons( + # # transect_lines = transects, + # # polygons = fema, + # # flines = flines, + # # max_extension_distance = 3000 + # # ) + # transects2 <- hydrofabric3D::extend_transects_to_polygons( + # transect_lines = transects, + # polygons = fema, + # flowlines = flines, + # max_extension_distance = 3000 + # ) + # # }) + # # }) + # + # # mapview::mapview(transects2, color = "green") + + # # mapview::mapview(transects, color = "red") + + # # mapview::mapview(fema_keep, col.regions = "dodgerblue") + # # --------------------------------------------------------------------- + # # ---------------------------------------------------------------------------------------------------------------- + # + # transects2 <- dplyr::select(transects2, -tmp_id) + # transects2 <- hydrofabric3D::add_tmp_id(transects2) + # + # extended_ids <- + # transects2 %>% + # dplyr::filter(left_is_extended | right_is_extended) %>% + # dplyr::pull(hy_id) %>% + # unique() + # + # start_trans <- dplyr::filter(transects, hy_id %in% extended_ids[1:150]) + # end_trans <- dplyr::filter(transects2, hy_id %in% extended_ids[1:150]) + # mapview::mapview(start_trans, color = "red") + + # mapview::mapview(end_trans, color = "green") + # transects <- + # transects %>% + # dplyr::select(-cs_lengthm) %>% + # # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% + # dplyr::select( + # hy_id, + # cs_id, + # # cs_lengthm, + # cs_lengthm = new_cs_lengthm, + # cs_source, + # cs_measure, + # # is_extended, + # # is_fema_extended, + # geometry = geom + # ) + + message("Saving transects to:\n - filepath: '", out_path, "'") + # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg) sf::write_sf( # save dataset with only subset of columns to upload to S3 @@ -154,9 +317,6 @@ for(i in 1:nrow(path_df)) { out_path ) - transects <- sf::read_sf(out_path) - - # command to copy transects geopackage to S3 copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) @@ -169,6 +329,7 @@ for(i in 1:nrow(path_df)) { system(copy_to_s3, intern = TRUE) message("Overwritting local copy of transects to include 'is_extended' column...\n==========================") + # Overwrite transects with additional columns for development purposes (is_extended) to have a local copy of dataset with information about extensions sf::write_sf( dplyr::select( @@ -184,4 +345,7 @@ for(i in 1:nrow(path_df)) { ), out_path ) + + rm(fema, transects, flines) + gc() } diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R index 1cae9ca..a5ac6f3 100644 --- a/runners/cs_runner/add_fema_to_transects.R +++ b/runners/cs_runner/add_fema_to_transects.R @@ -24,6 +24,9 @@ path_df <- align_files_by_vpu( path_df +# # install.packages("devtools") +devtools::install_github("anguswg-ucsb/hydrofabric3D") + # loop over each vpu and generate cross sections, then save locally and upload to s3 bucket # for(i in 1:nrow(path_df)) { diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 52653d3..160593e 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -19,6 +19,7 @@ source("runners/cs_runner/utils.R") library(dplyr) library(sf) library(geos) +library(fastmap) # ------------------------------------------------------------------------------------- # ---- OVERWRITE_FEMA_FILES constant logical ---- @@ -108,7 +109,8 @@ FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) # ------------------------------------------------------------------------------------- for (file in FEMA_FILENAMES) { - + # message(file) + local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) geojson_filename <- gsub(".fgb", ".geojson", file) @@ -146,15 +148,25 @@ for (file in FEMA_geojson_paths) { # message("Fema 100 year flood plain:\n > '", file, "'") output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(file)) output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename) - + clean_geojson_exists <- file.exists(output_path) message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + # start_fema <- sf::read_sf(file) + + # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, + # ' -simplify 0.15 visvalingam \\', + # ' -dissolve \\', + # ' -explode \\', + # ' -o ', output_path + # ) + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, - ' -simplify 0.15 visvalingam \\', - ' -dissolve \\', - ' -explode \\', + ' -dissolve2 FLD_AR_ID \\', + ' -simplify 0.1 visvalingam \\', + # ' -explode \\', + ' -snap \\', ' -o ', output_path ) @@ -163,12 +175,14 @@ for (file in FEMA_geojson_paths) { system(mapshaper_command) message("Writting '", output_clean_filename, "' to: \n > '", output_path, "'") } + # end_fema <- sf::read_sf(output_path) message() -} + + } # ------------------------------------------------------------------------------------- -# ---- Convert cleaned FEMA geometries to geopackages ---- +# ---- Convert cleaned FEMA geojson geometries to geopackages ---- # ------------------------------------------------------------------------------------- # paths to FEMA 100 year flood plain files @@ -182,8 +196,6 @@ for (file in FEMA_clean_paths) { message("Converting GEOJSON file to GPKG:\n > '", basename(file), "' > '", output_gpkg_filename, "'") - # system(ogr2ogr_command) - clean_gpkg_exists <- file.exists(output_path) message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists) @@ -199,41 +211,97 @@ for (file in FEMA_clean_paths) { message() } -# # ------------------------------------------------------------------------------------- -# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries ---- -# # ------------------------------------------------------------------------------------- -# +# # ------------------------------------------------------------------------------------------------------------------- +# # ---- Apply final dissolve/snap and removal of internal boudnaries in FEMA geometries ---- +# # ------------------------------------------------------------------------------------------------------------------- + # paths to FEMA 100 year flood plain files FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) for (file_path in FEMA_gpkg_paths) { message("Applying hydrofab::clean_geometry() to:\n > '", basename(file_path), "'") + + fema <- sf::read_sf(file_path) + + fema <- + fema[!sf::st_is_empty(fema), ] %>% + sf::st_transform(5070) + + # TODO: Snap using geos::geos_snap() + # fema <- + # geos::geos_snap( + # geos::as_geos_geometry(fema), + # geos::as_geos_geometry(fema), + # tolerance = 1 + # ) %>% + # geos::geos_make_valid() %>% + # sf::st_as_sf() + + # TODO: we get this error when trying to use the geometry column after geos snapping + # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]." + # fema %>% + # sf::st_cast("POLYGON") + + # TODO: Snap using sf::st_snap() + # fema <- sf::st_snap( + # fema, + # fema, + # tolerance = 2 + # ) fema <- - file_path %>% - sf::read_sf() %>% - sf::st_transform(5070) %>% - sf::st_cast("POLYGON") %>% + fema %>% + # fema[!sf::st_is_empty(fema), ] %>% + dplyr::select(geometry = geom) %>% + add_predicate_group_id(sf::st_intersects) %>% + sf::st_make_valid() %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-group_id) %>% + add_predicate_group_id(sf::st_intersects) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% dplyr::mutate( - fema_id = 1:dplyr::n() - ) %>% - dplyr::select(fema_id, geometry = geom) - - message(" > ", nrow(fema), " POLYGONs") - message("Start time: ", Sys.time()) - - fema_clean <- hydrofab::clean_geometry( - catchments = fema, - ID = "fema_id" - ) - - fema_clean <- - fema_clean %>% + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select(fema_id, geometry) + + # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + + # mapview::mapview(end_fema, color = 'red', col.regions = "white") + # mapview::mapview(start_fema$geom, color = "red", col.regions = "red") + + # mapview::mapview(end_fema$geom, color = 'limegreen', col.regions = "limegreen") + + # mapview::mapview(snap_union_sf, color = 'gold', col.regions = "gold") + + # mapview::mapview(final_fema, color = 'white', col.regions = "white") + + # mapview::mapview(fin, color = 'white', col.regions = "white") + + # message(" > ", nrow(fema), " POLYGONs") + # message("Start time: ", Sys.time()) + # + # fema_clean <- hydrofab::clean_geometry( + # catchments = fema, + # ID = "fema_id" + # ) + # + # fema_clean <- + # fema_clean %>% + # dplyr::mutate( + # source = basename(file_path), + # state = gsub("-100yr-flood_valid_clean.gpkg", "", source) + # ) %>% + # dplyr::select(fema_id, source, state, areasqkm, geometry) + + fema <- + fema %>% dplyr::mutate( source = basename(file_path), state = gsub("-100yr-flood_valid_clean.gpkg", "", source) ) %>% - dplyr::select(fema_id, source, state, areasqkm, geometry) + dplyr::select(fema_id, source, state, + # areasqkm, + geometry) message("End time: ", Sys.time()) @@ -245,7 +313,8 @@ for (file_path in FEMA_gpkg_paths) { if (OVERWRITE_FEMA_FILES) { message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") sf::write_sf( - fema_clean, + # fema_clean, + fema, file_path ) } diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R index ae40b70..8bfd145 100644 --- a/runners/cs_runner/utils.R +++ b/runners/cs_runner/utils.R @@ -26,3 +26,120 @@ polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) { return(polygons_with_lines) } + +add_predicate_group_id <- function(polys, predicate) { + # GROUP BY SPATIAL PREDICATES + # ----------------------------------------- + # predicate = sf::st_touches + # polys <- sf_df + # ----------------------------------------- + + + relations <- predicate(polys) + + relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) }) + + group_ids_map <- fastmap::fastmap() + ids_to_groups <- fastmap::fastmap() + + group_id <- 0 + + for (i in seq_along(relations)) { + + predicate_ids <- relations[i][[1]] + + # message("(", i, ") - ", predicate_ids) + # message("Start Group ID: ", group_id) + + id_group_check <- ids_to_groups$has(predicate_ids) + + if(any(id_group_check)) { + + known_groups <- ids_to_groups$mget(predicate_ids) + known_group <- known_groups[unname(sapply(known_groups , function(kg) { + !is.null(kg) + }))][[1]] + + # message("IDs part of past group ID > '", known_group, "'") + + past_group_ids <- group_ids_map$get(known_group)[[1]] + updated_group_ids <- as.character( + sort(as.numeric(unique(c(past_group_ids, predicate_ids)))) + ) + + group_ids_map$set(known_group, list(updated_group_ids)) + + new_ids <- predicate_ids[!predicate_ids %in% past_group_ids] + + # message("Adding ", new_ids, " to seen set...") + + # add any newly added IDs to the seen map + for (seen_id in new_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + + } else { + # get a new group ID number + group_id <- group_id + 1 + # message("IDs form NEW group > '", group_id, "'") + + # create a new key in the map with the predicate IDs list as the value + group_ids_map$set(as.character(group_id), list(predicate_ids)) + + # message("Adding ", predicate_ids, " to seen set...") + + # add each predicate ID to the map storing the seen indexes and their respecitve group IDs + for (seen_id in predicate_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + } + # message("End group ID: ", group_id, "\n") + } + + group_ids <- group_ids_map$as_list() + + grouping_df <- lapply(seq_along(group_ids), function(i) { + # i = 2 + grouping <- group_ids[i] + group_id <- names(grouping) + indices <- grouping[[1]][[1]] + + data.frame( + index = as.numeric(indices), + group_id = rep(group_id, length(indices)) + ) + + }) %>% + dplyr::bind_rows() %>% + dplyr::arrange(i) + + # count up the number of IDs for each group, well use this to determine which group + # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members) + group_id_counts <- + grouping_df %>% + dplyr::group_by(group_id) %>% + dplyr::count() %>% + # dplyr::arrange(-n) %>% + dplyr::ungroup() + + # select the IDs with the most other members + grouping_df <- + grouping_df %>% + dplyr::left_join( + group_id_counts, + by = 'group_id' + ) %>% + dplyr::group_by(index) %>% + dplyr::slice_max(n, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(-n) %>% + dplyr::arrange(-index) + + polys$group_id <- grouping_df$group_id + + return(polys) + +} + From fcedf0759b75ef61b8253935d405383b64c9fd5f Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 12 Jul 2024 08:42:05 -0700 Subject: [PATCH 41/64] reworking processing of fema FGBs to improve transect extensions --- runners/cs_runner/preprocess_fema.R | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 160593e..1498151 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -20,6 +20,10 @@ library(dplyr) library(sf) library(geos) library(fastmap) +library(nngeo) + +# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop +# TODO: Delete old files as needed # ------------------------------------------------------------------------------------- # ---- OVERWRITE_FEMA_FILES constant logical ---- @@ -330,9 +334,8 @@ for (file_path in FEMA_gpkg_paths) { FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) # paths to nextgen datasets and model attribute parquet files -NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) -NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) -# OVERWRITE_FEMA_FILES +NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) +NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) for (file_path in FEMA_CLEAN_GPKG_PATHS) { fema_file <- basename(file_path) @@ -371,8 +374,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { dplyr::mutate( vpu = vpu ) %>% - dplyr::select(vpu, fema_id, source, state, - areasqkm, geom) + dplyr::select(vpu, fema_id, source, state, geom) # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) @@ -409,14 +411,11 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { DELETE_STAGING_GPKGS <- FALSE -# FEMA_VPU_SUBFOLDERS - for (vpu_dir in FEMA_VPU_SUBFOLDERS) { -# for (i in 1:4) { - vpu_dir = FEMA_VPU_SUBFOLDERS[i] + # for (i in 1:4) { + # vpu_dir = FEMA_VPU_SUBFOLDERS[i] message("Merging files in '", basename(vpu_dir), "' directory...") - -# } + # } # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06' vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) From e1c66212cf01f54efe3c750f5d4350fa546126a2 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 12 Jul 2024 14:17:48 -0700 Subject: [PATCH 42/64] wip on removing wholes but not losing many polygons --- runners/cs_runner/preprocess_fema.R | 83 ++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 18 deletions(-) diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 1498151..b45948b 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -473,10 +473,9 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # ------------------------------------------------------------------------------------- MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged") - +MERGED_DIRS for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # i = 8 - # i vpu_dir = FEMA_VPU_SUBFOLDERS[i] VPU <- basename(vpu_dir) @@ -502,22 +501,50 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)] fema_vpu <- sf::read_sf(fema_vpu_file) - - # fema_vpu - - # fema_ids <- c(695) # fema_vpu <- # fema_vpu %>% # dplyr::group_by(source) %>% # dplyr::summarise() %>% # dplyr::ungroup() - # fema_vpu + + mapview::npts(fema_vpu) + fema_vpu2 <- + fema_vpu %>% + nngeo::st_remove_holes(max_area = 20) %>% + # dplyr::select(geometry = geom) %>% + add_predicate_group_id(sf::st_intersects) %>% + sf::st_make_valid() %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-group_id) %>% + add_predicate_group_id(sf::st_intersects) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + dplyr::mutate( + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select(fema_id, geometry) + + fema_vpu2 %>% mapview::npts() + + fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] - # fema_snapped <- sf::st_snap(fema_vpu, fema_vpu, tolerance = 10) - # sf::st_ + mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') + + mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') + # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') + # message("Removing holes before dissolve...") fema_vpu <- nngeo::st_remove_holes(fema_vpu) - # + mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') + + mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') + # mapview::npts(fema_vpu) + all(sf::st_is_valid(fema_vpu)) + rmapshaper::ms_innerlines(fema_vpu) + + # message("Making valid geometries...") # fema_vpu <- sf::st_make_valid(fema_vpu) @@ -527,8 +554,8 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { message("Dissolving...") - # 2633 = old number of polygons - fema_vpu <- rmapshaper::ms_dissolve( + # 1421 = old number of polygons + fema_vpu2 <- rmapshaper::ms_dissolve( input = fema_vpu, field = "source", sys = TRUE, @@ -536,18 +563,38 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { ) message("Exploding...") + # mapview::npts(fema_vpu) - # mapview::npts(fema_vpu_dissolve) - fema_vpu <- rmapshaper::ms_explode( - input = fema_vpu, + # mapview::npts(fema_vpu2) + + fema_vpu2 <- rmapshaper::ms_explode( + input = fema_vpu2, sys = TRUE, sys_mem = 16 ) - # mapview::npts(fema_exp) + # mapview::npts(fema_vpu2) message("Removing holes after explosion...") - fema_vpu <- nngeo::st_remove_holes(fema_vpu) - # mapview::npts(fema_exp_noholes) + fema_vpu2 <- nngeo::st_remove_holes(fema_vpu2) + fema_vpu2 <- + fema_vpu2 %>% + add_predicate_group_id(sf::st_intersects) %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) + + mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') + + mapview::mapview(fema_vpu2[1:00, ], color = 'green', col.regions = 'white') + + # mapview::npts(fema_vpu2) + sf::st_is_valid(fema_vpu2) %>% all() + + fema_vpu2 %>% + sf::st_make_valid() %>% + sf::st_geometry_type() %>% + unique() + sf::st_geometry_type(fema_vpu2) %>% unique() # slice_subset = 1:50 # fema_exp_noholes[slice_subset, ] # mapview::mapview( fema_vpu[1:100, ], col.regions = "dodgerblue")+ From 42ad438ca6a2a24dfebea732ae286b1f7c96ae2d Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 16 Jul 2024 13:45:52 -0700 Subject: [PATCH 43/64] replaced preprocess fema with new version which better resolves internal boundaries and leaves fewer islands/holes --- runners/cs_runner/preprocess_fema.R | 489 +++++++++----------------- runners/cs_runner/preprocess_fema2.R | 504 +++++++++++++++++++++++++++ 2 files changed, 666 insertions(+), 327 deletions(-) create mode 100644 runners/cs_runner/preprocess_fema2.R diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index b45948b..1e1cba5 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -32,24 +32,13 @@ library(nngeo) # ------------------------------------------------------------------------------------- # Default is TRUE (i.e. a fresh processing run is done from start to finish) -OVERWRITE_FEMA_FILES <- TRUE +OVERWRITE_FEMA_FILES <- TRUE +DELETE_STAGING_GPKGS <- TRUE # ------------------------------------------------------------------------------------- # ---- Create directories (if they do NOT exist) ---- # ------------------------------------------------------------------------------------- -# create geojsons directory (if not exists) -if (!dir.exists(FEMA_GEOJSON_PATH)) { - message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'")) - dir.create(FEMA_GEOJSON_PATH) -} - -# create directory for cleaned FEMA geometries (if not exists) -if (!dir.exists(FEMA_CLEAN_PATH)) { - message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'")) - dir.create(FEMA_CLEAN_PATH) -} - # create directory for cleaned FEMA geometries as geopackages (if not exists) if (!dir.exists(FEMA_GPKG_PATH)) { message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) @@ -66,171 +55,132 @@ for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) { # create directory for FEMA geomteries by VPU # message(VPU_SUBFOLDER) - state_dir = paste0(VPU_SUBFOLDER, "/states/") - merged_dir = paste0(VPU_SUBFOLDER, "/merged/") + # state_dir = paste0(VPU_SUBFOLDER, "/states/") + # merged_dir = paste0(VPU_SUBFOLDER, "/merged/") if (!dir.exists(VPU_SUBFOLDER)) { message("Creating FEMA VPU subfolder...") message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n Creating directory:\n > '", VPU_SUBFOLDER, "'")) dir.create(VPU_SUBFOLDER) } - - if (!dir.exists(state_dir)) { - message("Creating FEMA VPU states subfolder...") - message(paste0("'/", basename(state_dir), "' directory does not exist...\n Creating directory:\n > '", state_dir, "'")) - - dir.create(state_dir) - - } - - if (!dir.exists(merged_dir)) { - message("Creating FEMA VPU merged subfolder...") - message(paste0("'/", basename(merged_dir), "' directory does not exist...\n Creating directory:\n > '", merged_dir, "'")) - - dir.create(merged_dir) - - } -} - -# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) - - -# create FEMA GPKG Bounding Boxes directory (if not exists) -if (!dir.exists(FEMA_GPKG_BB_PATH)) { - message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) - dir.create(FEMA_GPKG_BB_PATH) + # if (!dir.exists(state_dir)) { + # message("Creating FEMA VPU states subfolder...") + # message(paste0("'/", basename(state_dir), "' directory does not exist...\n Creating directory:\n > '", state_dir, "'")) + # dir.create(state_dir) + # } + # if (!dir.exists(merged_dir)) { + # message("Creating FEMA VPU merged subfolder...") + # message(paste0("'/", basename(merged_dir), "' directory does not exist...\n Creating directory:\n > '", merged_dir, "'")) + # dir.create(merged_dir) + # } } # ------------------------------------------------------------------------------------- # ---- Get paths to downloaded FEMA 100 FGBs ---- # ------------------------------------------------------------------------------------- -FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) -FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) - -# ------------------------------------------------------------------------------------- -# ---- Run ogr2ogr to get FGB files into geojson ---- -# ------------------------------------------------------------------------------------- +FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) +FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) for (file in FEMA_FILENAMES) { - # message(file) - - local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) - geojson_filename <- gsub(".fgb", ".geojson", file) - geojson_save_path <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename) + STAGING_FILES_TO_DELETE <- c() + + # Convert FGB to GeoJSON + local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) + geojson_filename <- gsub(".fgb", ".geojson", file) + geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename) message("FEMA filename: '", file, "'") message("Converting \n > '", file, "' to geojson '", geojson_filename, "'") - geojson_exists <- file.exists(geojson_save_path) + geojson_exists <- file.exists(geojson_save_path) message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - # ogr2ogr command converting FGBs to GEOJSON for mapshaper processing - ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) + ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) - if (OVERWRITE_FEMA_FILES) { + if (OVERWRITE_FEMA_FILES || !geojson_exists) { system(ogr2ogr_command) - message("Writting '", geojson_filename, "' to: \n > '", geojson_save_path, "'") + message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path) } - message() -} - -# ------------------------------------------------------------------------------------- -# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ---- -# ------------------------------------------------------------------------------------- - -# paths to FEMA 100 year flood plain files -FEMA_geojson_paths <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE) - -for (file in FEMA_geojson_paths) { + # Clean GeoJSON + message("Simplify, dissolve, explode > '", geojson_filename, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename) + output_clean_geojson_path <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename) - message("Simplify, dissolve, explode > '", basename(file), "'") - # message("Fema 100 year flood plain:\n > '", file, "'") - output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(file)) - output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename) - - clean_geojson_exists <- file.exists(output_path) + clean_geojson_exists <- file.exists(output_clean_geojson_path) message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - # start_fema <- sf::read_sf(file) - - # mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, - # ' -simplify 0.15 visvalingam \\', - # ' -dissolve \\', - # ' -explode \\', - # ' -o ', output_path - # ) - - mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, ' -dissolve2 FLD_AR_ID \\', ' -simplify 0.1 visvalingam \\', - # ' -explode \\', ' -snap \\', - ' -o ', output_path + ' -o ', output_clean_geojson_path ) - if (OVERWRITE_FEMA_FILES) { + + if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) { message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") system(mapshaper_command) - message("Writting '", output_clean_filename, "' to: \n > '", output_path, "'") + message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) } - # end_fema <- sf::read_sf(output_path) - message() - - } - -# ------------------------------------------------------------------------------------- -# ---- Convert cleaned FEMA geojson geometries to geopackages ---- -# ------------------------------------------------------------------------------------- - -# paths to FEMA 100 year flood plain files -FEMA_clean_paths <- list.files(FEMA_CLEAN_PATH, full.names = TRUE) - -for (file in FEMA_clean_paths) { - message("Fema 100 year flood plain:\n > '", basename(file), "'") - - output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(file)) - output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) + # Convert cleaned GeoJSON to GeoPackage + message("Fema 100 year flood plain:\n > '", output_clean_filename, "'") - message("Converting GEOJSON file to GPKG:\n > '", basename(file), "' > '", output_gpkg_filename, "'") + output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename) + output_gpkg_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) - clean_gpkg_exists <- file.exists(output_path) + message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'") + clean_gpkg_exists <- file.exists(output_gpkg_path) message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists) message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file) - # ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file) + ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path) - if (OVERWRITE_FEMA_FILES) { + if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) { system(ogr2ogr_command) - message("Writting '", output_gpkg_filename, "' to: \n > '", output_path, "'") + message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'") + } + + message("Deleting intermediary files\n") + for (delete_file in STAGING_FILES_TO_DELETE) { + if (file.exists(delete_file)) { + message("Deleting >>> '", delete_file, "'") + file.remove(delete_file) + } + } + message() + } -# # ------------------------------------------------------------------------------------------------------------------- -# # ---- Apply final dissolve/snap and removal of internal boudnaries in FEMA geometries ---- -# # ------------------------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------------------------------------- +# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- +# ------------------------------------------------------------------------------------------------------------------- # paths to FEMA 100 year flood plain files FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) for (file_path in FEMA_gpkg_paths) { - message("Applying hydrofab::clean_geometry() to:\n > '", basename(file_path), "'") - + message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") + fema <- sf::read_sf(file_path) - + fema <- fema[!sf::st_is_empty(fema), ] %>% sf::st_transform(5070) - + # TODO: Snap using geos::geos_snap() # fema <- # geos::geos_snap( @@ -244,8 +194,8 @@ for (file_path in FEMA_gpkg_paths) { # TODO: we get this error when trying to use the geometry column after geos snapping # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]." # fema %>% - # sf::st_cast("POLYGON") - + # sf::st_cast("POLYGON") + # TODO: Snap using sf::st_snap() # fema <- sf::st_snap( # fema, @@ -275,27 +225,6 @@ for (file_path in FEMA_gpkg_paths) { # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + # mapview::mapview(end_fema, color = 'red', col.regions = "white") - # mapview::mapview(start_fema$geom, color = "red", col.regions = "red") + - # mapview::mapview(end_fema$geom, color = 'limegreen', col.regions = "limegreen") + - # mapview::mapview(snap_union_sf, color = 'gold', col.regions = "gold") + - # mapview::mapview(final_fema, color = 'white', col.regions = "white") + - # mapview::mapview(fin, color = 'white', col.regions = "white") - - # message(" > ", nrow(fema), " POLYGONs") - # message("Start time: ", Sys.time()) - # - # fema_clean <- hydrofab::clean_geometry( - # catchments = fema, - # ID = "fema_id" - # ) - # - # fema_clean <- - # fema_clean %>% - # dplyr::mutate( - # source = basename(file_path), - # state = gsub("-100yr-flood_valid_clean.gpkg", "", source) - # ) %>% - # dplyr::select(fema_id, source, state, areasqkm, geometry) fema <- fema %>% @@ -306,13 +235,8 @@ for (file_path in FEMA_gpkg_paths) { dplyr::select(fema_id, source, state, # areasqkm, geometry) - - message("End time: ", Sys.time()) - # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ]) - # mapview::mapview(fema[1, ], col.regions = "red") + - # mapview::mapview(fema_clean[1, ], col.regions = "green") + - # mapview::mapview(geom_diff, col.regions = "white") + message("End time: ", Sys.time()) if (OVERWRITE_FEMA_FILES) { message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") @@ -326,9 +250,9 @@ for (file_path in FEMA_gpkg_paths) { } -# # ------------------------------------------------------------------------------------- -# # ---- Partion parts of each FEMA GPKGs to the a Nextgen VPU ---- -# # ------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------- +# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- +# ------------------------------------------------------------------------------------- # Clean FEMA GPKG files FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) @@ -338,7 +262,9 @@ NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) for (file_path in FEMA_CLEAN_GPKG_PATHS) { + fema_file <- basename(file_path) + message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'") # read in fema polygons @@ -354,19 +280,21 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # read in nextgen flowlines flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - + # get the FEMA polygons that intersect with the nextgen flowlines fema_intersect <- polygons_with_line_intersects(fema, flines) - + fema_in_nextgen <- nrow(fema_intersect) != 0 message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) - + if(fema_in_nextgen) { # create filepaths vpu_subfolder <- paste0("VPU_", vpu) - vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") + # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") + vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) + # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] fema_intersect <- @@ -377,11 +305,11 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { dplyr::select(vpu, fema_id, source, state, geom) # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) - + fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) - + if (OVERWRITE_FEMA_FILES) { message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") @@ -390,8 +318,8 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { fema_vpu_path ) } - - + + } message() } @@ -401,7 +329,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { "--------------------------------------------------------------\n", "Completed all VPU intersections for: \n > '", fema_file, "'", "\n--------------------------------------------------------------\n" - ) + ) } @@ -409,28 +337,26 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- # ------------------------------------------------------------------------------------- -DELETE_STAGING_GPKGS <- FALSE +DELETE_STAGING_GPKGS <- F for (vpu_dir in FEMA_VPU_SUBFOLDERS) { - # for (i in 1:4) { - # vpu_dir = FEMA_VPU_SUBFOLDERS[i] + # for (i in 1:4) { + # vpu_dir = FEMA_VPU_SUBFOLDERS2[12] message("Merging files in '", basename(vpu_dir), "' directory...") - # } + # } - # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06' vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) - states_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/states"), vpu_subdirs)] - merged_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/merged"), vpu_subdirs)] + # path to the merged directory where the final merged geopackge will end up + master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) # fema state geopackages partioned for the specific VPU - fema_state_gpkgs <- list.files(states_dir, full.names = TRUE) + fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE) - master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) - master_gpkg_name <- paste0(master_name, ".gpkg") - - # path to the merged directory where the final merged geopackge will end up - master_filepath <- paste0(merged_dir, "/", master_gpkg_name) + # make sure to ignore the master file if it already exists + fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath] for(gpkg_file in fema_state_gpkgs) { # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", @@ -443,15 +369,17 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", master_filepath, " ", gpkg_file - ) + ) if (OVERWRITE_FEMA_FILES) { system(ogr2ogr_merge_command) } } - if(DELETE_STAGING_GPKGS) { - message(" - Deleting individual gpkgs from '/states' directory...") + has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0 + + if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) { + message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...") # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'") remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) @@ -472,198 +400,105 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # ---- Union each VPU geopackage (either on state or just touching predicate) ---- # ------------------------------------------------------------------------------------- -MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged") -MERGED_DIRS for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { - # i = 8 - vpu_dir = FEMA_VPU_SUBFOLDERS[i] - + vpu_dir <- FEMA_VPU_SUBFOLDERS[i] VPU <- basename(vpu_dir) - message("Attempting to union FEMA polygons for '", VPU, "'...") + message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") - merged_dir <- paste0(vpu_dir, "/merged") - fema_vpu_file <- list.files(merged_dir, full.names = TRUE) + # path to the merged directory where the final merged geopackage will end up + master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) - has_fema_vpu_file <- ifelse(length(fema_vpu_file) > 0, TRUE, FALSE) - # has_fema_vpu_file - # message() - # fema_vpu_file -# } - if(!has_fema_vpu_file) { + message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") + + if(!file.exists(master_filepath)) { message("No FEMA geometries in '", VPU, "'") message() next } - - message("> Re-unioning and re-exploding geometries in '", basename(fema_vpu_file), "'") - fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)] - - fema_vpu <- sf::read_sf(fema_vpu_file) - # fema_vpu <- - # fema_vpu %>% - # dplyr::group_by(source) %>% - # dplyr::summarise() %>% - # dplyr::ungroup() - - mapview::npts(fema_vpu) - fema_vpu2 <- + + fema_vpu <- sf::read_sf(master_filepath) + + # fema_vpu %>% sf::st_geometry_type() %>% unique() + + fema_vpu <- fema_vpu %>% - nngeo::st_remove_holes(max_area = 20) %>% + nngeo::st_remove_holes(max_area = 200) %>% # dplyr::select(geometry = geom) %>% add_predicate_group_id(sf::st_intersects) %>% - sf::st_make_valid() %>% + sf::st_make_valid() %>% dplyr::group_by(group_id) %>% dplyr::summarise( geometry = sf::st_combine(sf::st_union(geometry)) ) %>% dplyr::ungroup() %>% dplyr::select(-group_id) %>% - add_predicate_group_id(sf::st_intersects) %>% + add_predicate_group_id(sf::st_intersects) + + geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) + + message("Geometry counts before casting all geometries to MULTIPOLYGON:") + for (g in seq_along(geom_type_counts)) { + message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) + } + + message("Keeping only POLYGON and MULTIPOLYGON geometries...") + fema_vpu <- + fema_vpu %>% + dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>% + sf::st_cast("MULTIPOLYGON") %>% + sf::st_make_valid() %>% + # dplyr::group_by(group_id) %>% rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + nngeo::st_remove_holes(max_area = 200) %>% dplyr::mutate( fema_id = as.character(1:dplyr::n()) ) %>% dplyr::select(fema_id, geometry) - fema_vpu2 %>% mapview::npts() - - fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] - - mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') + - mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') - # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') - - # message("Removing holes before dissolve...") - fema_vpu <- nngeo::st_remove_holes(fema_vpu) - mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') + - mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') - # mapview::npts(fema_vpu) - all(sf::st_is_valid(fema_vpu)) - rmapshaper::ms_innerlines(fema_vpu) - - - # message("Making valid geometries...") - # fema_vpu <- sf::st_make_valid(fema_vpu) - - # fema_vpu <- - # fema_vpu %>% - # sf::st_cast("MULTIPOLYGON") - - message("Dissolving...") - - # 1421 = old number of polygons - fema_vpu2 <- rmapshaper::ms_dissolve( - input = fema_vpu, - field = "source", - sys = TRUE, - sys_mem = 16 - ) - - message("Exploding...") - - # mapview::npts(fema_vpu) - # mapview::npts(fema_vpu2) - - fema_vpu2 <- rmapshaper::ms_explode( - input = fema_vpu2, - sys = TRUE, - sys_mem = 16 - ) - # mapview::npts(fema_vpu2) - message("Removing holes after explosion...") - fema_vpu2 <- nngeo::st_remove_holes(fema_vpu2) - - fema_vpu2 <- - fema_vpu2 %>% - add_predicate_group_id(sf::st_intersects) %>% - dplyr::group_by(group_id) %>% - dplyr::summarise( - geometry = sf::st_combine(sf::st_union(geometry)) - ) + # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) + # message("Geometry counts after all processing steps: ") + # for (g in seq_along(end_geom_type_counts)) { + # message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g]) + # } - mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') + - mapview::mapview(fema_vpu2[1:00, ], color = 'green', col.regions = 'white') + # fema_vpu2 %>% mapview::npts() + # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] - # mapview::npts(fema_vpu2) - sf::st_is_valid(fema_vpu2) %>% all() + # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + + # mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white') + # # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') + # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') - fema_vpu2 %>% - sf::st_make_valid() %>% - sf::st_geometry_type() %>% - unique() - sf::st_geometry_type(fema_vpu2) %>% unique() - # slice_subset = 1:50 - # fema_exp_noholes[slice_subset, ] - # mapview::mapview( fema_vpu[1:100, ], col.regions = "dodgerblue")+ - # mapview::mapview( fema_exp[slice_subset, ], col.regions = "red") + - # mapview::mapview( fema_exp_noholes[slice_subset, ], col.regions = "green") - # fema_vpu <- rmapshaper::ms_dissolve(fema_vpu, - # field = "source", - # sys = TRUE, - # sys_mem = 16 - # # ) - # fema_vpu <- rmapshaper::ms_explode(fema_vpu, - # sys = TRUE, - # sys_mem = 16) - fema_vpu <- fema_vpu %>% # dplyr::group_by(source) %>% dplyr::mutate( - state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), + # state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), vpu = gsub("VPU_", "", VPU), fema_id = 1:dplyr::n() - ) %>% + ) %>% dplyr::ungroup() %>% - dplyr::select(vpu, fema_id, state, geom = geometry) + dplyr::select(vpu, fema_id, + # state, + geom = geometry) if (OVERWRITE_FEMA_FILES) { - union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) - message("> Writting '", basename(union_file_path), "' (unioned and exploded version)") + + message("> Overwritting '", basename(master_filepath), "' with final clean version...") + + # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) + # message("> writting '", basename(union_file_path), "' (unioned and exploded version)") + sf::write_sf( fema_vpu, - union_file_path - ) + master_filepath + # union_file_path + ) } message() -} - -# ------------------------------------------------------------------------------------- -# ---- Generate bounding box gpkg for each FEMA FGB ---- -# ------------------------------------------------------------------------------------- - -for (key in FEMA_FILENAMES) { - - local_fema_path <- paste0(FEMA_FGB_PATH, "/", key) - - gpkg_filename <- gsub(".fgb", "_bb.gpkg", key) - bb_save_path <- paste0(FEMA_FGB_BB_PATH, "/", gpkg_filename) - - message("S3 Key: '", key, "'") - message("Local FEMA file:\n > '", local_fema_path, "'") - message("Local output FEMA bounding box file:\n > '", bb_save_path, "'") - - # fema <- sf::read_sf(local_fema_path) - - fema_bb <- - local_fema_path %>% - sf::read_sf() %>% - sf::st_bbox() %>% - sf::st_as_sfc() %>% - sf::st_as_sf() %>% - dplyr::mutate( - fema_fgb = key, - fema_fgb_path = local_fema_path, - state = gsub("-100yr-flood_valid.fgb", "", key) - ) %>% - dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% - sf::st_transform(5070) - - message("Saving FEMA bounding box file:\n > '", bb_save_path, "'") - - sf::write_sf(fema_bb, bb_save_path) - message() -} +} \ No newline at end of file diff --git a/runners/cs_runner/preprocess_fema2.R b/runners/cs_runner/preprocess_fema2.R new file mode 100644 index 0000000..2141946 --- /dev/null +++ b/runners/cs_runner/preprocess_fema2.R @@ -0,0 +1,504 @@ +# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3 +# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages + +# Processing steps: +# - Convert FGBs to GEOJSON (via ogr2ogr) +# - Simplifies +# - Dissolves +# - Explodes +# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr) +# - Apply hydrofab::clean_geometry() +# - Partition FEMA 100 geometries by VPU # TODO still +# - Get FEMA bounding box geometries (maybe) + +# load config variables +source("runners/cs_runner/config_vars.R") +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +library(dplyr) +library(sf) +library(geos) +library(fastmap) +library(nngeo) + +# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop +# TODO: Delete old files as needed + +# ------------------------------------------------------------------------------------- +# ---- OVERWRITE_FEMA_FILES constant logical ---- +# ---- > if TRUE, processing steps will be run again +# and overwrite existing previously processed files +# ------------------------------------------------------------------------------------- + +# Default is TRUE (i.e. a fresh processing run is done from start to finish) +OVERWRITE_FEMA_FILES <- TRUE +DELETE_STAGING_GPKGS <- TRUE + +# ------------------------------------------------------------------------------------- +# ---- Create directories (if they do NOT exist) ---- +# ------------------------------------------------------------------------------------- + +# create directory for cleaned FEMA geometries as geopackages (if not exists) +if (!dir.exists(FEMA_GPKG_PATH)) { + message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) + dir.create(FEMA_GPKG_PATH) +} + +# create directory for FEMA geomteries partioned by VPU +if (!dir.exists(FEMA_BY_VPU_PATH)) { + message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'")) + dir.create(FEMA_BY_VPU_PATH) +} + +for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) { + # create directory for FEMA geomteries by VPU + # message(VPU_SUBFOLDER) + + # state_dir = paste0(VPU_SUBFOLDER, "/states/") + # merged_dir = paste0(VPU_SUBFOLDER, "/merged/") + + if (!dir.exists(VPU_SUBFOLDER)) { + message("Creating FEMA VPU subfolder...") + message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n Creating directory:\n > '", VPU_SUBFOLDER, "'")) + dir.create(VPU_SUBFOLDER) + } + # if (!dir.exists(state_dir)) { + # message("Creating FEMA VPU states subfolder...") + # message(paste0("'/", basename(state_dir), "' directory does not exist...\n Creating directory:\n > '", state_dir, "'")) + # dir.create(state_dir) + # } + # if (!dir.exists(merged_dir)) { + # message("Creating FEMA VPU merged subfolder...") + # message(paste0("'/", basename(merged_dir), "' directory does not exist...\n Creating directory:\n > '", merged_dir, "'")) + # dir.create(merged_dir) + # } +} + +# ------------------------------------------------------------------------------------- +# ---- Get paths to downloaded FEMA 100 FGBs ---- +# ------------------------------------------------------------------------------------- + +FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) +FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) + +for (file in FEMA_FILENAMES) { + + STAGING_FILES_TO_DELETE <- c() + + # Convert FGB to GeoJSON + local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) + geojson_filename <- gsub(".fgb", ".geojson", file) + geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename) + + message("FEMA filename: '", file, "'") + message("Converting \n > '", file, "' to geojson '", geojson_filename, "'") + + geojson_exists <- file.exists(geojson_save_path) + + message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) + + if (OVERWRITE_FEMA_FILES || !geojson_exists) { + system(ogr2ogr_command) + message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path) + } + + # Clean GeoJSON + message("Simplify, dissolve, explode > '", geojson_filename, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename) + output_clean_geojson_path <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename) + + clean_geojson_exists <- file.exists(output_clean_geojson_path) + message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, + ' -dissolve2 FLD_AR_ID \\', + ' -simplify 0.1 visvalingam \\', + ' -snap \\', + ' -o ', output_clean_geojson_path + ) + + + if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) { + message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") + system(mapshaper_command) + message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) + } + + # Convert cleaned GeoJSON to GeoPackage + message("Fema 100 year flood plain:\n > '", output_clean_filename, "'") + + output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename) + output_gpkg_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) + + message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'") + + clean_gpkg_exists <- file.exists(output_gpkg_path) + message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + + ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path) + + if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) { + system(ogr2ogr_command) + message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'") + } + + message("Deleting intermediary files\n") + for (delete_file in STAGING_FILES_TO_DELETE) { + if (file.exists(delete_file)) { + message("Deleting >>> '", delete_file, "'") + file.remove(delete_file) + } + + } + + message() + +} + +# ------------------------------------------------------------------------------------------------------------------- +# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- +# ------------------------------------------------------------------------------------------------------------------- + +# paths to FEMA 100 year flood plain files +FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +for (file_path in FEMA_gpkg_paths) { + message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") + + fema <- sf::read_sf(file_path) + + fema <- + fema[!sf::st_is_empty(fema), ] %>% + sf::st_transform(5070) + + # TODO: Snap using geos::geos_snap() + # fema <- + # geos::geos_snap( + # geos::as_geos_geometry(fema), + # geos::as_geos_geometry(fema), + # tolerance = 1 + # ) %>% + # geos::geos_make_valid() %>% + # sf::st_as_sf() + + # TODO: we get this error when trying to use the geometry column after geos snapping + # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]." + # fema %>% + # sf::st_cast("POLYGON") + + # TODO: Snap using sf::st_snap() + # fema <- sf::st_snap( + # fema, + # fema, + # tolerance = 2 + # ) + + fema <- + fema %>% + # fema[!sf::st_is_empty(fema), ] %>% + dplyr::select(geometry = geom) %>% + add_predicate_group_id(sf::st_intersects) %>% + sf::st_make_valid() %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-group_id) %>% + add_predicate_group_id(sf::st_intersects) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + dplyr::mutate( + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select(fema_id, geometry) + + # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + + # mapview::mapview(end_fema, color = 'red', col.regions = "white") + + fema <- + fema %>% + dplyr::mutate( + source = basename(file_path), + state = gsub("-100yr-flood_valid_clean.gpkg", "", source) + ) %>% + dplyr::select(fema_id, source, state, + # areasqkm, + geometry) + + message("End time: ", Sys.time()) + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") + sf::write_sf( + # fema_clean, + fema, + file_path + ) + } + message() + +} + +# ------------------------------------------------------------------------------------- +# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- +# ------------------------------------------------------------------------------------- + +# Clean FEMA GPKG files +FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +# paths to nextgen datasets and model attribute parquet files +NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) +NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) + +for (file_path in FEMA_CLEAN_GPKG_PATHS) { + + fema_file <- basename(file_path) + + message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'") + + # read in fema polygons + fema <- sf::read_sf(file_path) + + for (nextgen_path in NEXTGEN_FILE_PATHS) { + nextgen_basename <- basename(nextgen_path) + vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) + + message("VPU: ", vpu) + message("- nextgen gpkg:\n > '", nextgen_path, "'") + message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") + + # read in nextgen flowlines + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # get the FEMA polygons that intersect with the nextgen flowlines + fema_intersect <- polygons_with_line_intersects(fema, flines) + + fema_in_nextgen <- nrow(fema_intersect) != 0 + + message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) + + if(fema_in_nextgen) { + + # create filepaths + vpu_subfolder <- paste0("VPU_", vpu) + # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") + vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) + + # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] + + fema_intersect <- + fema_intersect %>% + dplyr::mutate( + vpu = vpu + ) %>% + dplyr::select(vpu, fema_id, source, state, geom) + + # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) + + fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) + fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) + + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") + + sf::write_sf( + fema_intersect, + fema_vpu_path + ) + } + + + } + message() + } + + + message( + "--------------------------------------------------------------\n", + "Completed all VPU intersections for: \n > '", fema_file, "'", + "\n--------------------------------------------------------------\n" + ) + +} + +# ------------------------------------------------------------------------------------- +# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- +# ------------------------------------------------------------------------------------- + +DELETE_STAGING_GPKGS <- F + +for (vpu_dir in FEMA_VPU_SUBFOLDERS) { + # for (i in 1:4) { + # vpu_dir = FEMA_VPU_SUBFOLDERS2[12] + message("Merging files in '", basename(vpu_dir), "' directory...") + # } + + vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) + + # path to the merged directory where the final merged geopackge will end up + master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + + # fema state geopackages partioned for the specific VPU + fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE) + + # make sure to ignore the master file if it already exists + fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath] + + for(gpkg_file in fema_state_gpkgs) { + # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", + # basename(gpkg_file), " > ", basename(master_filepath), + # "'") + message(" > '", + basename(gpkg_file), " > ", basename(master_filepath), + "'") + + ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", + master_filepath, + " ", gpkg_file + ) + + if (OVERWRITE_FEMA_FILES) { + system(ogr2ogr_merge_command) + } + } + + has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0 + + if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) { + message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...") + # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'") + + remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) + + for (remove_cmd in remove_gpkg_cmds) { + message(" > '", remove_cmd, "'") + system(remove_cmd) + } + } + + # message() + message("Merge complete!") + message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'") + message() +} + +# ------------------------------------------------------------------------------------- +# ---- Union each VPU geopackage (either on state or just touching predicate) ---- +# ------------------------------------------------------------------------------------- + +for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { + vpu_dir <- FEMA_VPU_SUBFOLDERS[i] + VPU <- basename(vpu_dir) + + message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") + + # path to the merged directory where the final merged geopackage will end up + master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) + master_gpkg_name <- paste0(master_name, ".gpkg") + master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + + message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") + + if(!file.exists(master_filepath)) { + message("No FEMA geometries in '", VPU, "'") + message() + next + } + + + fema_vpu <- sf::read_sf(master_filepath) + + # fema_vpu %>% sf::st_geometry_type() %>% unique() + + fema_vpu <- + fema_vpu %>% + nngeo::st_remove_holes(max_area = 200) %>% + # dplyr::select(geometry = geom) %>% + add_predicate_group_id(sf::st_intersects) %>% + sf::st_make_valid() %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-group_id) %>% + add_predicate_group_id(sf::st_intersects) + + geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) + + message("Geometry counts before casting all geometries to MULTIPOLYGON:") + for (g in seq_along(geom_type_counts)) { + message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) + } + + message("Keeping only POLYGON and MULTIPOLYGON geometries...") + fema_vpu <- + fema_vpu %>% + dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>% + sf::st_cast("MULTIPOLYGON") %>% + sf::st_make_valid() %>% + # dplyr::group_by(group_id) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + nngeo::st_remove_holes(max_area = 200) %>% + dplyr::mutate( + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select(fema_id, geometry) + + # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) + # message("Geometry counts after all processing steps: ") + # for (g in seq_along(end_geom_type_counts)) { + # message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g]) + # } + + # fema_vpu2 %>% mapview::npts() + # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] + + # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + + # mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white') + # # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') + # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') + + fema_vpu <- + fema_vpu %>% + # dplyr::group_by(source) %>% + dplyr::mutate( + # state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), + vpu = gsub("VPU_", "", VPU), + fema_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::select(vpu, fema_id, + # state, + geom = geometry) + + if (OVERWRITE_FEMA_FILES) { + + message("> Overwritting '", basename(master_filepath), "' with final clean version...") + + # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) + # message("> writting '", basename(union_file_path), "' (unioned and exploded version)") + + sf::write_sf( + fema_vpu, + master_filepath + # union_file_path + ) + } + message() +} From e7e8023b7dca56665bb17a17285bd978e9fdf31b Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 17 Jul 2024 13:55:06 -0700 Subject: [PATCH 44/64] cleaned up and deleted old scripts, finalized FEMA polygon simplification, dissolve, explode process --- runners/cs_runner/add_fema_to_transects.R | 1080 --------------------- runners/cs_runner/preprocess_fema.R | 201 +++- runners/cs_runner/preprocess_fema2.R | 504 ---------- 3 files changed, 153 insertions(+), 1632 deletions(-) delete mode 100644 runners/cs_runner/add_fema_to_transects.R delete mode 100644 runners/cs_runner/preprocess_fema2.R diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R deleted file mode 100644 index a5ac6f3..0000000 --- a/runners/cs_runner/add_fema_to_transects.R +++ /dev/null @@ -1,1080 +0,0 @@ -library(dplyr) -library(lwgeom) - -# generate the flowlines layer for the final cross_sections_<vpu>.gpkg for each vpu -source("runners/cs_runner/config.r") - -# transect bucket prefix -transects_prefix <- paste0(s3_bucket, version_prefix, "/3d/transects/") - -# paths to nextgen datasets and model attribute parquet files -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -transects_files <- list.files(transects_dir, full.names = FALSE) -transects_files <- transects_files[!grepl("updated", transects_files)] - - -net_source <- "hydrofabric3d" - -# ensure the files are in the same order and matched up by vpu -path_df <- align_files_by_vpu( - x = nextgen_files, - y = transects_files, - base = base_dir -) - -path_df - -# # install.packages("devtools") -devtools::install_github("anguswg-ucsb/hydrofabric3D") - -# loop over each vpu and generate cross sections, then save locally and upload to s3 bucket -# for(i in 1:nrow(path_df)) { - - i = 8 - - # nextgen file and full path - nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) - - transect_file <- path_df$y[i] - transect_path <- paste0(transects_dir, transect_file) - - vpu <- path_df$vpu[i] - transect_path - - # # model attributes file and full path - # model_attr_file <- path_df$y[i] - # model_attr_path <- paste0(model_attr_dir, model_attr_file) - - message("creating vpu ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") - # message("creating vpu ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") - - - fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") - # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") - - vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) - # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)] - - # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)] - vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] - vpu_fema_file - - # fema polygons and transect lines - fema <- sf::read_sf(vpu_fema_file) - # mapview::npts(fema) - - transects <- sf::read_sf(transect_path) - - # read in nextgen flowlines data - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # - # system.time({ - # extended_transects <- get_transect_extension_distances_to_polygons(transects, fema, flines, 3000) - # }) - # - # sf::write_sf(extended_transects, '/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg') - # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- - # ----- Generate plots of extensions ---- - # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- - extended_transects <- sf::read_sf('/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg') %>% - hydrofabric3D::add_tmp_id() - # ids_of_interest = c("wb-1002167", "wb-1002166", "wb-1002165", "wb-1002164") - # ids_of_interest = c( "wb-1002166", "wb-1002165") - # ids_of_interest = c("wb-1014540", "wb-1014541", "wb-1014542", "wb-1014570", "wb-1014574", - # "wb-1014575", "wb-1014572" ,"wb-1014573" ,"wb-1014571", - # "wb-1014568", "wb-1014569", "wb-1014567", "wb-1014543") - - # ids_of_interest = c( "wb-1014542") - - any_extended <- - extended_transects %>% - dplyr::filter( - (left_is_extended & right_is_extended) | (left_is_extended & !right_is_extended) | (!left_is_extended & right_is_extended) - ) %>% - dplyr::mutate( - extend_status = dplyr::case_when( - (left_is_extended & right_is_extended) ~ "both", - (left_is_extended & !right_is_extended) ~ "left_only", - (!left_is_extended & right_is_extended) ~ "right_only", - TRUE ~ "no_extension" - ) - ) %>% - dplyr::mutate( - extend_distance = left_distance + right_distance - ) %>% - dplyr::relocate(extend_status, extend_distance) %>% - dplyr::filter(hy_id == "wb-1002550") - # dplyr::slice(1:1500) - - fema_subset_intersects <- sf::st_intersects(fema, any_extended) - fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ] - fema_polygons - - og_trans <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% any_extended$tmp_id | hy_id %in% any_extended$hy_id) %>% - dplyr::mutate( - extend_status = "original" - ) - - Flowlines <- - flines %>% - dplyr::filter(id %in% unique(any_extended$hy_id)) - - FEMA = fema_polygons - Extended = sf::st_buffer(any_extended, 10) - Original = sf::st_buffer(og_trans, 10) - - mapview::mapview(FEMA, col.regions = "dodgerblue") + - mapview::mapview(Flowlines, color = "darkblue") + - mapview::mapview(Extended, col.regions = "green") + - mapview::mapview(Original, col.regions = "red") - # mapview::mapview(Extended, color = "green") + - # mapview::mapview(Original, color = "red") - extend_subset <- - any_extended %>% - dplyr::filter(hy_id %in% ids_of_interest) - # dplyr::group_by(extend_status) %>% - # dplyr::arrange(-extend_distance, .by_group = TRUE) %>% - # dplyr::slice( - # which.min(extend_distance), - # which.max(extend_distance) - # ) - # dplyr::slice(1) - extend_subset <- - any_extended %>% - dplyr::group_by(extend_status) %>% - dplyr::arrange(-extend_distance, .by_group = TRUE) %>% - # dplyr::slice( - # which.min(extend_distance), - # which.max(extend_distance) - # ) - dplyr::slice(2000:2020) - extend_subset - - fline_subset <- - flines %>% - dplyr::filter(id %in% unique(extend_subset$hy_id)) - - plot_data <- - dplyr::bind_rows( - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% - dplyr::mutate( - extend_status = "original" - ), - dplyr::filter(extend_subset, - extend_status != "no_extension") - ) %>% - dplyr::relocate(extend_status, extend_distance) %>% - dplyr::mutate( - extend_status = dplyr::case_when( - extend_status != "original" ~ "extended", - TRUE ~ extend_status - ) - ) - - fema_subset_intersects <- sf::st_intersects(fema, fline_subset) - fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ] - fema_polygons - - flines[lengths(sf::st_intersects(flines, fema_polygons)) > 0, ] %>% - dplyr::pull(id) %>% - unique() - fema_polygons - sf::st_crop(fema_polygons, fline_subset) - ggplot2::ggplot() + - ggplot2::geom_sf(data = sf::st_crop(fema_polygons, fline_subset), fill = "grey") + - ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 1) + - # ggplot2::geom_sf(data = plot_data, ggplot2::aes(color = extend_status)) - ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "extended"), - color = "green") + - ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "original"), - color = "red") - - - mapview::mapview(fema_polygons, col.regions = "dodgerblue") + - mapview::mapview(fline_subset, color = "dodgerblue") + - mapview::mapview(dplyr::filter(plot_data, extend_status == "extended"), color = "green") + - mapview::mapview(dplyr::filter(plot_data, extend_status == "original"), color = "red") - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% - dplyr::mutate( - extend_status = "original" - ) - - extend_subset - - ggplot2::ggplot() + - ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 5) + - ggplot2::geom_sf(data = extend_subset, ggplot2::aes(color = extend_status)) - - mapview::mapview(fline_subset, color = "dodgerblue") + - mapview::mapview(extend_subset, color = "green") - - - both_extended <- - extended_transects %>% - dplyr::filter(left_is_extended, right_is_extended) - - left_only_extended <- - extended_transects %>% - dplyr::filter(left_is_extended, !right_is_extended) - - right_only_extended <- - extended_transects %>% - dplyr::filter(!left_is_extended, right_is_extended) - - unique(both_extended$hy_id) - - extended_transects %>% - dplyr::filter( - # tmp_id %in% unique(left_only_extended$tmp_id), - tmp_id %in% unique(right_only_extended$tmp_id) || - tmp_id %in% unique(both_extended$tmp_id) - ) - - unique(extended_transects$hy_id) - # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- - # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- - # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- - # library(nngeo) - # fema %>% - # dplyr::group_by(fema_id) %>% - # dplyr::mutate( - # n = dplyr::n() - # ) %>% - # dplyr::arrange(-n) %>% - # dplyr::relocate(n) - # fema_sub <- - # fema %>% - # dplyr::filter(fema_id %in% c(1726)) - # # fema %>% - # # dplyr::filter(fema_id %in% c(1726)) %>% - # mapview::mapview(fema_sub[1, ], col.regions = "red") + - # mapview::mapview(sf::st_buffer(fema_sub[2, ], 500), col.regions = "green") - # fema_no_holes <- nngeo::st_remove_holes(fema) - # fema_no_holes_union <- sf::st_union(fema_no_holes) - # touching_list = sf::st_touches(fema_no_holes) - # mapview::npts(fema) - # mapview::npts(fema_no_holes) - # mapview::npts(fema_no_holes_union) - # fema_no_holes %>% - # dplyr::mutate(new_fema_id = 1:dplyr::n()) %>% - # dplyr::group_by(new_fema_id) - # fema_no_holes_union - # fema$fema_id %>% unique() %>% length() - # # union then explode FEMA polygons - # fema <- - # fema %>% - # sf::st_union() - # # reassign IDs and change geometry column name - # fema <- - # fema %>% - # sf::st_as_sf() %>% - # dplyr::mutate(fema_id = 1:dplyr::n()) %>% - # dplyr::select(fema_id, geom = x) - - # sf::st_union() %>% - # rmapshaper::ms_explode() %>% - # sf::st_as_sf() %>% - # dplyr::mutate(fema_id = 1:dplyr::n()) %>% - # dplyr::select(fema_id, geom = x) - # Given 2 geos_geometry point geometries, create a line between the 2 points - # start: geos_geoemtry, point - # end: geos_geoemtry, point - # Returns geos_geometry linestring - make_line_from_start_and_end_pts <- function(start, end, line_crs) { - Y_start <- geos::geos_y(start) - X_start <- geos::geos_x(start) - Y_end <- geos::geos_y(end) - X_end <- geos::geos_x(end) - - # make the new transect line from the start and points - geos_ls <- geos::geos_make_linestring(x = c(X_start, X_end), - y = c(Y_start, Y_end), - crs = line_crs) - - return(geos_ls) - - - } - - #' Check if an updated transect line is valid relative to the other transects and flowlines in the network - #' The 'transect_to_check' should be 'used' (i.e. function returns TRUE) if - #' the 'transect_to_check' does NOT interesect any other transects ('transect_lines') AND it only intersects a single flowline ONCE. - #' If the 'transect_to_check' intersects ANY other transects OR intersects a flowline more - #' than once (OR more than one flowline in the network) then the function returns FALSE. - #' @param transect_to_check geos_geometry, linestring - #' @param transect_lines geos_geometry, linestring - #' @param flowlines geos_geometry, linestring - #' - #' @return TRUE if the extension should be used, FALSE if it shouldn't be used - #' @importFrom geos geos_intersection geos_type - is_valid_transect_line <- function(transect_to_check, transect_lines, flowlines) { - - # ### ## ## ## ## ## ## ## ## ## - # extension_line <- left_extended_trans - # transect_lines <- transect_geoms - # flowlines <- flines_geos - # ### ## ## ## ## ## ## ## ## ## - - # Define conditions to decide which version of the transect to use - - # 1. Use transect with extension in BOTH directions - # 2. Use transect with LEFT extension only - # 3. Use transect with RIGHT extension only - - # Check that the extended transect lines only intersect a single flowline in the network only ONCE - intersects_with_flowlines <- geos::geos_intersection( - transect_to_check, - flowlines - ) - intersects_flowline_only_once <- sum(geos::geos_type(intersects_with_flowlines) == "point") == 1 && - sum(geos::geos_type(intersects_with_flowlines) == "multipoint") == 0 - - # check that the extended transect line does NOT intersect other transect lines (other than SELF) - intersects_other_transects <- sum(geos::geos_intersects(transect_to_check, transect_lines)) > 1 - - # TRUE == Only one flowline is intersected a single time AND no other transect lines are intersected - use_transect <- intersects_flowline_only_once && !intersects_other_transects - - return(use_transect) - } - - # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons") - # internal function for extending transect lines out to FEMA 100 year flood plain polygons - # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons) - # polygons, set of sf polygons that transect lines should be exteneded - # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons" - get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, flines, max_extension_distance) { - - ### ### ### ### ### ### ### - # transect_lines <- transects - # polygons <- fema - # # # flines <- flines - # # max_extension_distance <- 3000 - # max_extension_distance = 3500 - # ### ### ### ### ### ### ### - - # keep 10% of the original points for speed - polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10) - - # mapview::npts(fema) - # mapview::npts(polygons) - # transects <- sf::read_sf(transect_path) - - # polygons - transects_geos <- geos::as_geos_geometry(transects) - polygons_geos <- geos::as_geos_geometry(polygons) - - # geos::geos_union( polygons_geos, polygons_geos) %>% length() - # polygons_geos %>% length() - # polygons - # polygons_geos - transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) - polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) - - # subset the transects and polygons to only those with intersections - intersect_transects <- transects[lengths(transects_polygons_matrix) != 0, ] - intersect_polygons <- polygons_geos[lengths(polygons_transects_matrix) != 0] - - # Convert our intersecting polygons to LINESTRINGS b/c we DON'T NEED polygons to calculate extension distances from our transect lines - # This can be done with just linestrings (not sure if this is actually more performent but I'm pretty sure it is....) - intersect_lines <- - intersect_polygons %>% - # geos::geos_make_valid() %>% - sf::st_as_sf() %>% - # sf::st_union() %>% - # rmapshaper::ms_explode() %>% - # sf::st_as_sf() %>% - # dplyr::mutate(fema_id = 1:dplyr::n()) %>% - # dplyr::select(fema_id, geom = x) %>% - sf::st_cast("MULTILINESTRING") %>% - geos::as_geos_geometry() %>% - geos::geos_simplify_preserve_topology(25) - - # mapview::npts(sf::st_as_sf(intersect_lines)) - - # intersect_polygons %>% - # geos::geos_make_valid() %>% - # geos::geos_is_valid() %>% all() - # is.null(intersect_lines$geometry ) - # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?) - # TODO: Double check this logic. - min_segmentation <- min(intersect_transects$cs_lengthm %/% 2) - - # which.min(intersect_transects$cs_lengthm %/% 2) - - # make each transect line have way more segments so we can take a left and right half of each transect line - segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation) - - # mapview::mapview(left_trans, col.regions = "dodgerblue") + - # mapview::mapview(intersect_transects, color = "red") + - # mapview::mapview(intersect_transects[42, ], color = "yellow") + - # mapview::mapview(right_trans, color = "dodgerblue") + - # mapview::mapview(left_trans, color = "green") - - # Seperate the transect lines into LEFT and RIGHT halves - # We do this so we can check if a side of a transect is ENTIRELY WITHIN a polygon. - # If the half is entirely within a polygon, - left_trans <- - segmented_trans %>% - lwgeom::st_linesubstring(0, 0.50) %>% - dplyr::mutate( - partition = "left", - partition_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, geom) - - # Find the distances from the right side of transect lines - right_trans <- - segmented_trans %>% - lwgeom::st_linesubstring(0.50, 1) %>% - dplyr::mutate( - partition = "right", - partition_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, geom) - - # convert the transect geometries to geos types - # get the fema polygon indices for the transect halves that are completely within a fema polygon - # add the fema polygons index as a column to the transect dataframes - left_trans_geos <- geos::as_geos_geometry(left_trans) - right_trans_geos <- geos::as_geos_geometry(right_trans) - - left_within_matrix <- geos::geos_within_matrix(left_trans_geos, intersect_polygons) - right_within_matrix <- geos::geos_within_matrix(right_trans_geos, intersect_polygons) - - left_within_vect <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) - right_within_vect <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } }) - - # add the fema polygon indexes as columns - left_trans$left_fema_index <- left_within_vect - right_trans$right_fema_index <- right_within_vect - - # add boolean columns whether the transect is fully within the FEMA polygons - left_trans <- - left_trans %>% - dplyr::mutate( - left_is_within_fema = dplyr::case_when( - !is.na(left_fema_index) ~ TRUE, - TRUE ~ FALSE - ) - ) %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, - left_fema_index, left_is_within_fema, - geom - ) - - right_trans <- - right_trans %>% - dplyr::mutate( - right_is_within_fema = dplyr::case_when( - !is.na(right_fema_index) ~ TRUE, - TRUE ~ FALSE - ) - ) %>% - dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure, - cs_lengthm, is_extended, partition, partition_lengthm, - right_fema_index, right_is_within_fema, - geom - ) - - # max_extension_distance = 3000 - # which(transects_with_distances$hy_id == "wb-1003839") - # - # left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm - # right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm - # - # which(right_trans$hy_id == "wb-1003839") - - left_distances <- calc_extension_distances( - geos_geoms = left_trans_geos, - ids = left_trans$tmp_id, - lines_to_cut = intersect_lines, - lines_to_cut_indices = left_trans$left_fema_index, - direction = "head", - max_extension_distance = max_extension_distance - ) - - right_distances <- calc_extension_distances( - geos_geoms = right_trans_geos, - ids = right_trans$tmp_id, - lines_to_cut = intersect_lines, - lines_to_cut_indices = right_trans$right_fema_index, - direction = "tail", - max_extension_distance = max_extension_distance - ) - - left_trans$left_distance <- left_distances - right_trans$right_distance <- right_distances - - # extensions_by_id %>% - # dplyr::filter(hy_id == "wb-1003839") - # - # distance to extend LEFT and/or RIGHT for each hy_id/cs_id - extensions_by_id <- dplyr::left_join( - sf::st_drop_geometry( - dplyr::select(left_trans, - hy_id, cs_id, left_distance) - ), - sf::st_drop_geometry( - dplyr::select(right_trans, - hy_id, cs_id, - right_distance) - ), - by = c("hy_id", "cs_id") - ) - - # ######### ######## ######## ######## ####### - # # TODO: This is temporary !!!!!! - # fema_index_df <- dplyr::left_join( - # sf::st_drop_geometry( - # dplyr::select(left_trans, - # hy_id, cs_id, left_distance, left_fema_index) - # ), - # sf::st_drop_geometry( - # dplyr::select(right_trans, - # hy_id, cs_id, - # right_distance, right_fema_index) - # ), - # by = c("hy_id", "cs_id") - # ) - ######## ######## ######## ######## ######## - - # foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( - # fema_id = fema_uids - # ) - # - # polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( - # fema_id = fema_uids - # ) %>% - # dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590)) - # merged_polygon <- - # polygons_to_merge %>% - # sf::st_union() - # merged_polygon %>% - # rmapshaper::ms_explode() - # mapview::mapview(foi, col.regions = "dodgerblue") + - # mapview::mapview(polygons_to_merge, col.regions = "yellow") + - # mapview::mapview(merged_polygon, col.regions = "green") + - # mapview::mapview(toi, color = "red") + - # mapview::mapview(og_trans, color = "green") - # polygons %>% - # dplyr::filter(fema_id ) - ######## ######## ######## ######## ######## - - # TODO: Add left/right extension distancces to transect data - # TODO: this can ultimately just be the "transects" variable, dont need to make new "transects_with_distances" variable - transects <- - transects %>% - dplyr::left_join( - extensions_by_id, - by = c("hy_id", "cs_id") - ) %>% - dplyr::mutate( - left_distance = dplyr::case_when( - is.na(left_distance) ~ 0, - TRUE ~ left_distance - ), - right_distance = dplyr::case_when( - is.na(right_distance) ~ 0, - TRUE ~ right_distance - ) - # left_distance = dplyr::case_when( - # left_distance == 0 ~ NA, - # TRUE ~ left_distance - # ), - # right_distance = dplyr::case_when( - # right_distance == 0 ~ NA, - # TRUE ~ right_distance - # ) - ) %>% - hydrofabric3D::add_tmp_id() - - # transects_with_distances %>% - # dplyr::filter(hy_id == "wb-1003839") - # # transects - # flines %>% - # dplyr::arrange(-tot_drainage_areasqkm) - # flines %>% - # dplyr::arrange(-hydroseq) %>% - # dplyr::filter(hydroseq == min(hydroseq) | hydroseq == max(hydroseq)) %>% - # # dplyr::filter(hydroseq == max(hydroseq)) %>% - # mapview::mapview() - - # # extensions_by_id - # transects_with_distances %>% - # dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id) - -# ######## ######## ######## ######## ######## ######## ######## ######## -# left_extended_flag[1:20] -# right_extended_flag[1:20] -# both_extended_flag[1:20] -# -# range_vect <- 1:500 -# -# fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index), unlist(fema_index_df[range_vect, ]$right_fema_index))) -# fema_uids <- fema_uids[!is.na(fema_uids)] -# fema_uids -# foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate( -# fema_id = fema_uids -# ) -# # ooi <- sf::st_as_sf() -# # toi <- sf::st_as_sf(new_transects[1:20]) -# toi <- sf::st_as_sf(transect_geoms[range_vect]) -# toi -# og_trans <- transects_with_distances[range_vect, ] -# mapview::mapview(foi, col.regions = "dodgerblue") + -# mapview::mapview(toi, color = "red") + -# mapview::mapview(og_trans, color = "green") - ######## ######## ### ##### ######## ######## ######## ######## ######## - - fline_id_array <- flines$id - - # Convert the net object into a geos_geometry - flines_geos <- geos::as_geos_geometry(flines) - - transect_hy_id_array <- transects$hy_id - transect_cs_id_array <- transects$cs_id - - transect_geoms <- geos::as_geos_geometry(transects$geom) - - left_distances <- transects$left_distance - right_distances <- transects$right_distance - - # preallocate vector that stores the extension. distances - new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_hy_id_array))) - - left_extended_flag <- rep(FALSE, length(transect_hy_id_array)) - right_extended_flag <- rep(FALSE, length(transect_hy_id_array)) - both_extended_flag <- rep(FALSE, length(transect_hy_id_array)) - - - updated_left_distances <- rep(0, length(transect_hy_id_array)) - updated_right_distances <- rep(0, length(transect_hy_id_array)) - - # new_transects <- geos::geos_empty() - # # measures <- vctrs::vec_c() - # transects_with_distances[1:20, ] - # transects[1:20, ] - - # number of geometries that will be iterated over, keeping this variable to reference in message block - total <- length(transect_hy_id_array) - - # output a message every ~10% intervals - message_interval <- total %/% 20 - number_of_skips = 0 - - for (i in seq_along(transect_hy_id_array)) { - # message("i: ", i) - # i = 13 - # if(i > 2000) { - # message("-----> STOP BECAUSE at i", i) - # break - # } - # i = 1 - # Check if the iteration is a multiple of 100 - if (i %% message_interval == 0) { - - # get the percent complete - percent_done <- round(i/total, 2) * 100 - - # Print the message every "message_interval" - # if(verbose) { - message(i, " > ", percent_done, "% ") - message("Number of skips: ", number_of_skips) - # } - # message("Iteration ", i, " / ", length(extended_trans), - # " - (", percent_done, "%) ") - - } - # which(transects_with_distances$hy_id == "wb-1003839") - # i = 9587 - # get the current transect, hy_id, cs_id, flowline, and extension distances - current_trans <- transect_geoms[i] - - current_hy_id <- transect_hy_id_array[i] - current_cs_id <- transect_cs_id_array[i] - - current_fline <- flines_geos[fline_id_array == current_hy_id] - - left_distance_to_extend <- left_distances[i] - right_distance_to_extend <- right_distances[i] - - no_extension_required <- (left_distance_to_extend == 0 && right_distance_to_extend == 0) - # no_extension_required <- is.na(left_distance_to_extend) && is.na(right_distance_to_extend) - # message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")") - - if(no_extension_required) { - # message("Skipping -left/right extension are both 0") - number_of_skips = number_of_skips + 1 - - next - } - - # message("Extending transect line left and right") - # extend the lines - left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, - left_distance_to_extend, "head") - right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, - right_distance_to_extend, "tail") - - # initial check to make sure the extended versions of the transects are valid - use_left_extension <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos) - use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos) - # use_both_extensions <- use_left_extension && use_right_extension - - used_half_of_left <- FALSE - used_half_of_right <- FALSE - - # TODO: Probably should precompute this division BEFORE the loop... - half_left_distance <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0) - half_right_distance <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0) - - # if we CAN'T use the original LEFT extension distance, - # we try HALF the distance (or some distane less than we extended by in the first place) - if (!use_left_extension) { - - # half_left_distance <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0) - left_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, - half_left_distance, "head") - use_left_extension <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos) - - used_half_of_left <- ifelse(use_left_extension, TRUE, FALSE) - } - - # if we CAN'T use the original RIGHT extension distance, - # we try HALF the distance (or some distance less than we extended by in the first place) - if (!use_right_extension) { - - # half_right_distance <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0) - right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, - half_right_distance, "tail") - use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos) - - used_half_of_right <- ifelse(use_right_extension, TRUE, FALSE) - - # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + - # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + - # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") + - # mapview::mapview(sf::st_as_sf(left_extended_trans2), color = "dodgerblue") + - # mapview::mapview(sf::st_as_sf(right_extended_trans2), color = "dodgerblue") - - } - - use_both_extensions <- use_left_extension && use_right_extension - - # # message("Checking left and right intersections with flowline...") - # # --------------------------------------------------------------------------------- - # # TODO: UNCOMMENT BELOW ---> this was my original method - # # --------------------------------------------------------------------------------- - # # Check that the extended transect lines only intersect the current flowline once - # left_intersects_fline <- geos::geos_intersection( - # left_extended_trans, - # # current_fline - # flines_geos - # ) - # - # right_intersects_fline <- geos::geos_intersection( - # right_extended_trans, - # # current_fline - # flines_geos - # ) - # - # - # # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) + - # # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + - # # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + - # # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") - # - # # Define conditions to decide which version of the transect to use - # - # # 1. Use transect with extension in BOTH directions - # # 2. Use transect with LEFT extension only - # # 3. Use transect with RIGHT extension only - # - # # left_intersects_fline_once <- geos::geos_type(left_intersects_fline) == "point" - # # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point" - # left_intersects_fline_once <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && - # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 - # - # right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 && - # sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 - # - # # sum(geos::geos_type(left_intersects_fline) == "point") == 1 - # # sum(geos::geos_type(right_intersects_fline) == "point") == 1 - # # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 - # - # - # - # # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) - # # left_does_not_intersect_other_transects <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) - # # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) - # # - # # use_left_extension <- left_intersects_fline_once && left_does_not_intersect_other_transects - # # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects - # # use_both_extensions <- use_left_extension && use_right_extension - # - # - # # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read - # left_intersects_other_transects <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i])) - # right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i])) - # - # # # make sure the extended transects don't hit any of the newly extended transects - # # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect - # # left_intersects_new_transects <- any(geos::geos_intersects(left_extended_trans, new_transects)) - # # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects)) - # - # # make TRUE/FALSE flags stating which transect should we use - # # - BOTH extensions - # # - LEFT ONLY extensions - # # - RIGHT only extensions - # use_left_extension <- left_intersects_fline_once && !left_intersects_other_transects - # use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects - # use_both_extensions <- use_left_extension && use_right_extension - # - # new_use_left_extension <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos) - # new_use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos) - # new_use_both_extensions <- new_use_left_extension && new_use_right_extension - # - # message("--------------------------------------------") - # message("Left intersects FLINE ONCE: ", left_intersects_fline_once) - # message("Right intersects FLINE ONCE: ", right_intersects_fline_once) - # message() - # message("Left intersects OTHER TRANSECTS: ", left_intersects_other_transects) - # message("Right intersects OTHER TRANSECTS: ", right_intersects_other_transects) - # message() - # message("Use LEFT extension intersects: ", use_left_extension) - # message("Use RIGHT extension intersects: ", use_right_extension) - # message("Use BOTH extension intersects: ", use_both_extensions) - # message() - # message("--------------------------------------------") - # message() - # # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans) - # # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING")) - # # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") + - # # mapview::mapview(sf::st_as_sf(left_start), col.region = "red") + - # # mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + - # # mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") + - # # mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue") - # # --------------------------------------------------------------------------------- - # # TODO: UNCOMMENT ABOVE ---> this was my original method - # # --------------------------------------------------------------------------------- - # - # if(use_both_extensions) { - - # Get the start and end of both extended tranects - left_start <- geos::geos_point_start(left_extended_trans) - left_end <- geos::geos_point_end(left_extended_trans) - right_start <- geos::geos_point_start(right_extended_trans) - right_end <- geos::geos_point_end(right_extended_trans) - - # } - # Extend in BOTH directions - if(use_both_extensions) { - # message("Extend direction: BOTH") - start <- left_start - end <- right_end - - # extend ONLY the left side - } else if(use_left_extension && !use_right_extension) { - # message("Extend direction: LEFT") - start <- left_start - end <- left_end - - # Extend ONLY the right side - } else if(!use_left_extension && use_right_extension) { - # message("Extend direction: RIGHT") - start <- right_start - end <- right_end - - # DO NOT extend either direction - } else { - # message("No extension") - # TODO: Really dont need to do anything - # TODO: in this scenario because we just use the original transect line - start <- left_end - end <- right_start - } - - # trans_needs_extension <- use_left_extension || use_right_extension - - # if(trans_needs_extension) { - - line_crs <- wk::wk_crs(current_trans) - updated_trans <- make_line_from_start_and_end_pts(start, end, line_crs) - - # touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ] - # mapview::mapview(touched_flines, color = "dodgerblue") + - # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + - # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + - # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") + - # mapview::mapview(sf::st_as_sf(updated_trans), color = "yellow") - # nrow(flines) - # touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ] - # flines[lengths(right_intersects_fline) == 0, ] - # length(right_intersects_fline) - # } - - # --------------------------------------------------- - # TODO: UNCOMMENT BELOW - # --------------------------------------------------- - if(use_left_extension) { - left_extended_flag[i] <- TRUE - } - - if(use_right_extension) { - right_extended_flag[i] <- TRUE - } - - if(use_both_extensions) { - both_extended_flag[i] <- TRUE - } - - if(used_half_of_left) { - updated_left_distances[i] <- half_left_distance - } - if(used_half_of_right) { - updated_right_distances[i] <- half_right_distance - } - - # new_transects[i] <- updated_trans - transect_geoms[i] <- updated_trans - - # --------------------------------------------------- - # TODO: UNCOMMENT ABOVE - # --------------------------------------------------- - - # start %>% class() - } - - # transects2 <- transects - # dplyr::mutate( - # new_cs_lengthm = as.numeric(sf::st_length(geom)) - # ) %>% - # dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm) - - - # Update the "transects_to_extend" with new geos geometries ("geos_list") - sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms)) - - transects <- - transects %>% - dplyr::mutate( - new_cs_lengthm = as.numeric(sf::st_length(geom)) - ) %>% - dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm) - - # transects2 %>% - # dplyr::filter( - # new_cs_lengthm > cs_lengthm - # ) - # - - transects$left_is_extended <- left_extended_flag - transects$right_is_extended <- right_extended_flag - - return(transects) - - } - -# # ---------------------------------------------------------------- -# # ---------------------------------------------------------------- -# # ---------------------------------------------------------------- -# -# # make the new transect line from the start and points -# final_line <- geos::geos_make_linestring(x = c(X_start, X_end), -# y = c(Y_start, Y_end), -# crs = wk::wk_crs(current_trans) -# ) -# geos::geos_make_collection(start, type_id = "LINESTRING") -# -# -# left_start <- geos::geos_point_start(left_extended_trans) -# right_end <- geos::geos_point_end(right_extended_trans) -# -# fline_intersects -# geos::geos_type(fline_intersects) == "point" -# -# geos::geos_intersects(current_trans, ) -# -# # if( -# # geos::geos_type(fline_intersects) -# # -# # -# # ) -# mapview::mapview(sf::st_as_sf(trans), color = "green") + -# mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red") + -# mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red") + -# mapview::mapview( sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue") + -# mapview::mapview( sf::st_as_sf(final_line), color = "yellow") - -# -# } - -# left_trans$left_extension_distance - -# length(right_distances) -# # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within - calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) { - ##### ##### ##### ##### ##### - # geos_geoms <- left_trans_geos - - # geos_geoms = left_trans_geos - # ids = left_trans$tmp_id - # lines_to_cut = intersect_lines - # lines_to_cut_indices = left_trans$left_fema_index - # direction = "head" - # max_extension_distance = max_extension_distance - # - - ##### ##### ##### ##### ##### - - if (!direction %in% c("head", "tail")) { - stop("Invalid 'direction' value, must be one of 'head' or 'tail'") - } - - # preallocate vector that stores the extension. distances - extension_dists <- vctrs::vec_c(rep(0, length(ids))) - - # extension_dists <- vector(mode = "numeric", length = nrow(trans_data)) - for (i in seq_along(ids)) { - # i = 118 - curr_id <- ids[i] - is_within_polygon <- any(!is.na(lines_to_cut_indices[[i]])) - polygon_index <- lines_to_cut_indices[[i]] - # any(is_within_polygon) - message("Transect: '", curr_id, "' - (", i, ")") - - if (is_within_polygon) { - message("- Side of transect intersects with FEMA") - message("\t > FEMA index: ", polygon_index) - - curr_geom <- geos_geoms[[i]] - index_vect <- sort(unlist(polygon_index)) - - distance_to_extend <- hydrofabric3D:::geos_bs_distance( - distances = 1:max_extension_distance, - line = curr_geom, - geoms_to_cut = lines_to_cut[index_vect], - direction = direction - ) - - extension_dists[i] <- distance_to_extend - } - - } - - return(extension_dists) - } diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R index 1e1cba5..ee08e25 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/preprocess_fema.R @@ -22,6 +22,8 @@ library(geos) library(fastmap) library(nngeo) +# devtools::install_github("anguswg-ucsb/hydrofabric3D") + # TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop # TODO: Delete old files as needed @@ -33,7 +35,8 @@ library(nngeo) # Default is TRUE (i.e. a fresh processing run is done from start to finish) OVERWRITE_FEMA_FILES <- TRUE -DELETE_STAGING_GPKGS <- TRUE +DELETE_STAGING_GPKGS <- FALSE +# DELETE_STAGING_GPKGS <- TRUE # ------------------------------------------------------------------------------------- # ---- Create directories (if they do NOT exist) ---- @@ -337,8 +340,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- # ------------------------------------------------------------------------------------- -DELETE_STAGING_GPKGS <- F - for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # for (i in 1:4) { # vpu_dir = FEMA_VPU_SUBFOLDERS2[12] @@ -399,8 +400,16 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # ------------------------------------------------------------------------------------- # ---- Union each VPU geopackage (either on state or just touching predicate) ---- # ------------------------------------------------------------------------------------- +# for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { +# +# vpu_dir <- FEMA_VPU_SUBFOLDERS[i] +# VPU <- basename(vpu_dir) +# +# message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") +# } -for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { +for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { + vpu_dir <- FEMA_VPU_SUBFOLDERS[i] VPU <- basename(vpu_dir) @@ -411,6 +420,9 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { master_gpkg_name <- paste0(master_name, ".gpkg") master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + updated_gpkg_name <- gsub(".gpkg", "_output.gpkg", master_gpkg_name) + updated_filepath <- paste0(vpu_dir, "/", updated_gpkg_name) + message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") if(!file.exists(master_filepath)) { @@ -423,21 +435,12 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { fema_vpu <- sf::read_sf(master_filepath) # fema_vpu %>% sf::st_geometry_type() %>% unique() - - fema_vpu <- - fema_vpu %>% - nngeo::st_remove_holes(max_area = 200) %>% - # dplyr::select(geometry = geom) %>% - add_predicate_group_id(sf::st_intersects) %>% - sf::st_make_valid() %>% - dplyr::group_by(group_id) %>% - dplyr::summarise( - geometry = sf::st_combine(sf::st_union(geometry)) - ) %>% - dplyr::ungroup() %>% - dplyr::select(-group_id) %>% - add_predicate_group_id(sf::st_intersects) - + # fema_vpu %>% mapview::npts() + # fema_vpu %>% sf::st_is_valid() %>% all() + # fema_vpu %>% + # sf::st_make_valid() %>% + # sf::st_geometry_type() %>% + # unique() geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) message("Geometry counts before casting all geometries to MULTIPOLYGON:") @@ -445,20 +448,122 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) } - message("Keeping only POLYGON and MULTIPOLYGON geometries...") - fema_vpu <- - fema_vpu %>% - dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>% - sf::st_cast("MULTIPOLYGON") %>% - sf::st_make_valid() %>% - # dplyr::group_by(group_id) %>% - rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% - rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - nngeo::st_remove_holes(max_area = 200) %>% - dplyr::mutate( - fema_id = as.character(1:dplyr::n()) - ) %>% - dplyr::select(fema_id, geometry) + # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + + # mapview::mapview(fema_union, color = 'green', col.regions = 'white') + + # fema_vpu %>% + # sf::st_make_valid() %>% + # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% + # sf::st_is_valid() %>% + # all() + + tryCatch({ + + fema_vpu <- + fema_vpu %>% + nngeo::st_remove_holes(max_area = 200) %>% + sf::st_make_valid() %>% + # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% + add_predicate_group_id(sf::st_intersects) %>% + dplyr::group_by(group_id) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + dplyr::ungroup() %>% + nngeo::st_remove_holes(max_area = 200) %>% + dplyr::mutate( + vpu = gsub("VPU_", "", VPU), + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select( + vpu, fema_id, + # state, + geom = geometry + ) + + }, error = function(e) { + message(VPU, " threw into the following error \n ", e) + message(" > Cleaning ", VPU, " using a backup cleaning strategy...") + + fema_vpu <- + fema_vpu %>% + sf::st_make_valid() %>% + dplyr::mutate( + vpu = gsub("VPU_", "", VPU), + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select( + vpu, fema_id, + # state, + geom + ) + + }) + + # fema_vpu2 <- + # fema_vpu %>% + # nngeo::st_remove_holes(max_area = 200) %>% + # # sf::st_make_valid() %>% + # # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% + # add_predicate_group_id(sf::st_intersects) %>% + # dplyr::group_by(group_id) %>% + # dplyr::summarise( + # geometry = sf::st_combine(sf::st_union(geometry)) + # ) %>% + # rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + # dplyr::ungroup() %>% + # nngeo::st_remove_holes(max_area = 200) + + # fema_vpu <- + # fema_vpu %>% + # dplyr::mutate( + # vpu = gsub("VPU_", "", VPU), + # fema_id = as.character(1:dplyr::n()) + # ) %>% + # dplyr::select( + # vpu, fema_id, + # # state, + # geom = geometry + # ) + + + # fema_vpu <- + # fema_vpu %>% + # nngeo::st_remove_holes(max_area = 200) %>% + # # dplyr::select(geometry = geom) %>% + # add_predicate_group_id(sf::st_intersects) %>% + # sf::st_make_valid() %>% + # dplyr::group_by(group_id) %>% + # dplyr::summarise( + # geometry = sf::st_combine(sf::st_union(geometry)) + # ) %>% + # dplyr::ungroup() %>% + # dplyr::select(-group_id) %>% + # add_predicate_group_id(sf::st_intersects) + # + # fema_vpu %>% sf::st_geometry_type() %>% unique() + # fema_vpu %>% mapview::npts() + # + # geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) + # + # message("Geometry counts before casting all geometries to MULTIPOLYGON:") + # for (g in seq_along(geom_type_counts)) { + # message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) + # } + # + # message("Keeping only POLYGON and MULTIPOLYGON geometries...") + # fema_vpu <- + # fema_vpu %>% + # dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>% + # sf::st_cast("MULTIPOLYGON") %>% + # sf::st_make_valid() %>% + # # dplyr::group_by(group_id) %>% + # rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + # rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + # nngeo::st_remove_holes(max_area = 200) %>% + # dplyr::mutate( + # fema_id = as.character(1:dplyr::n()) + # ) %>% + # dplyr::select(fema_id, geometry) # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) # message("Geometry counts after all processing steps: ") @@ -468,24 +573,23 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # fema_vpu2 %>% mapview::npts() # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] - # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + # mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white') # # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') - fema_vpu <- - fema_vpu %>% - # dplyr::group_by(source) %>% - dplyr::mutate( - # state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), - vpu = gsub("VPU_", "", VPU), - fema_id = 1:dplyr::n() - ) %>% - dplyr::ungroup() %>% - dplyr::select(vpu, fema_id, - # state, - geom = geometry) + # fema_vpu <- + # fema_vpu %>% + # # dplyr::group_by(source) %>% + # dplyr::mutate( + # # state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), + # vpu = gsub("VPU_", "", VPU), + # fema_id = 1:dplyr::n() + # ) %>% + # dplyr::ungroup() %>% + # dplyr::select(vpu, fema_id, + # # state, + # geom = geometry) if (OVERWRITE_FEMA_FILES) { @@ -496,9 +600,10 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { sf::write_sf( fema_vpu, - master_filepath + updated_filepath + # master_filepath # union_file_path ) } message() -} \ No newline at end of file +} diff --git a/runners/cs_runner/preprocess_fema2.R b/runners/cs_runner/preprocess_fema2.R deleted file mode 100644 index 2141946..0000000 --- a/runners/cs_runner/preprocess_fema2.R +++ /dev/null @@ -1,504 +0,0 @@ -# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3 -# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages - -# Processing steps: -# - Convert FGBs to GEOJSON (via ogr2ogr) -# - Simplifies -# - Dissolves -# - Explodes -# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr) -# - Apply hydrofab::clean_geometry() -# - Partition FEMA 100 geometries by VPU # TODO still -# - Get FEMA bounding box geometries (maybe) - -# load config variables -source("runners/cs_runner/config_vars.R") -source("runners/cs_runner/config.R") -source("runners/cs_runner/utils.R") - -library(dplyr) -library(sf) -library(geos) -library(fastmap) -library(nngeo) - -# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop -# TODO: Delete old files as needed - -# ------------------------------------------------------------------------------------- -# ---- OVERWRITE_FEMA_FILES constant logical ---- -# ---- > if TRUE, processing steps will be run again -# and overwrite existing previously processed files -# ------------------------------------------------------------------------------------- - -# Default is TRUE (i.e. a fresh processing run is done from start to finish) -OVERWRITE_FEMA_FILES <- TRUE -DELETE_STAGING_GPKGS <- TRUE - -# ------------------------------------------------------------------------------------- -# ---- Create directories (if they do NOT exist) ---- -# ------------------------------------------------------------------------------------- - -# create directory for cleaned FEMA geometries as geopackages (if not exists) -if (!dir.exists(FEMA_GPKG_PATH)) { - message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) - dir.create(FEMA_GPKG_PATH) -} - -# create directory for FEMA geomteries partioned by VPU -if (!dir.exists(FEMA_BY_VPU_PATH)) { - message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'")) - dir.create(FEMA_BY_VPU_PATH) -} - -for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) { - # create directory for FEMA geomteries by VPU - # message(VPU_SUBFOLDER) - - # state_dir = paste0(VPU_SUBFOLDER, "/states/") - # merged_dir = paste0(VPU_SUBFOLDER, "/merged/") - - if (!dir.exists(VPU_SUBFOLDER)) { - message("Creating FEMA VPU subfolder...") - message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n Creating directory:\n > '", VPU_SUBFOLDER, "'")) - dir.create(VPU_SUBFOLDER) - } - # if (!dir.exists(state_dir)) { - # message("Creating FEMA VPU states subfolder...") - # message(paste0("'/", basename(state_dir), "' directory does not exist...\n Creating directory:\n > '", state_dir, "'")) - # dir.create(state_dir) - # } - # if (!dir.exists(merged_dir)) { - # message("Creating FEMA VPU merged subfolder...") - # message(paste0("'/", basename(merged_dir), "' directory does not exist...\n Creating directory:\n > '", merged_dir, "'")) - # dir.create(merged_dir) - # } -} - -# ------------------------------------------------------------------------------------- -# ---- Get paths to downloaded FEMA 100 FGBs ---- -# ------------------------------------------------------------------------------------- - -FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) -FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) - -for (file in FEMA_FILENAMES) { - - STAGING_FILES_TO_DELETE <- c() - - # Convert FGB to GeoJSON - local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) - geojson_filename <- gsub(".fgb", ".geojson", file) - geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename) - - message("FEMA filename: '", file, "'") - message("Converting \n > '", file, "' to geojson '", geojson_filename, "'") - - geojson_exists <- file.exists(geojson_save_path) - - message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) - message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - - ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) - - if (OVERWRITE_FEMA_FILES || !geojson_exists) { - system(ogr2ogr_command) - message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'") - - STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path) - } - - # Clean GeoJSON - message("Simplify, dissolve, explode > '", geojson_filename, "'") - output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename) - output_clean_geojson_path <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename) - - clean_geojson_exists <- file.exists(output_clean_geojson_path) - message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) - message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - - mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, - ' -dissolve2 FLD_AR_ID \\', - ' -simplify 0.1 visvalingam \\', - ' -snap \\', - ' -o ', output_clean_geojson_path - ) - - - if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) { - message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") - system(mapshaper_command) - message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'") - - STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) - } - - # Convert cleaned GeoJSON to GeoPackage - message("Fema 100 year flood plain:\n > '", output_clean_filename, "'") - - output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename) - output_gpkg_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) - - message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'") - - clean_gpkg_exists <- file.exists(output_gpkg_path) - message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists) - message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - - ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path) - - if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) { - system(ogr2ogr_command) - message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'") - } - - message("Deleting intermediary files\n") - for (delete_file in STAGING_FILES_TO_DELETE) { - if (file.exists(delete_file)) { - message("Deleting >>> '", delete_file, "'") - file.remove(delete_file) - } - - } - - message() - -} - -# ------------------------------------------------------------------------------------------------------------------- -# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- -# ------------------------------------------------------------------------------------------------------------------- - -# paths to FEMA 100 year flood plain files -FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) - -for (file_path in FEMA_gpkg_paths) { - message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") - - fema <- sf::read_sf(file_path) - - fema <- - fema[!sf::st_is_empty(fema), ] %>% - sf::st_transform(5070) - - # TODO: Snap using geos::geos_snap() - # fema <- - # geos::geos_snap( - # geos::as_geos_geometry(fema), - # geos::as_geos_geometry(fema), - # tolerance = 1 - # ) %>% - # geos::geos_make_valid() %>% - # sf::st_as_sf() - - # TODO: we get this error when trying to use the geometry column after geos snapping - # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]." - # fema %>% - # sf::st_cast("POLYGON") - - # TODO: Snap using sf::st_snap() - # fema <- sf::st_snap( - # fema, - # fema, - # tolerance = 2 - # ) - - fema <- - fema %>% - # fema[!sf::st_is_empty(fema), ] %>% - dplyr::select(geometry = geom) %>% - add_predicate_group_id(sf::st_intersects) %>% - sf::st_make_valid() %>% - dplyr::group_by(group_id) %>% - dplyr::summarise( - geometry = sf::st_combine(sf::st_union(geometry)) - ) %>% - dplyr::ungroup() %>% - dplyr::select(-group_id) %>% - add_predicate_group_id(sf::st_intersects) %>% - rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% - rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - dplyr::mutate( - fema_id = as.character(1:dplyr::n()) - ) %>% - dplyr::select(fema_id, geometry) - - # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + - # mapview::mapview(end_fema, color = 'red', col.regions = "white") - - fema <- - fema %>% - dplyr::mutate( - source = basename(file_path), - state = gsub("-100yr-flood_valid_clean.gpkg", "", source) - ) %>% - dplyr::select(fema_id, source, state, - # areasqkm, - geometry) - - message("End time: ", Sys.time()) - - if (OVERWRITE_FEMA_FILES) { - message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") - sf::write_sf( - # fema_clean, - fema, - file_path - ) - } - message() - -} - -# ------------------------------------------------------------------------------------- -# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- -# ------------------------------------------------------------------------------------- - -# Clean FEMA GPKG files -FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) - -# paths to nextgen datasets and model attribute parquet files -NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) -NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) - -for (file_path in FEMA_CLEAN_GPKG_PATHS) { - - fema_file <- basename(file_path) - - message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'") - - # read in fema polygons - fema <- sf::read_sf(file_path) - - for (nextgen_path in NEXTGEN_FILE_PATHS) { - nextgen_basename <- basename(nextgen_path) - vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) - - message("VPU: ", vpu) - message("- nextgen gpkg:\n > '", nextgen_path, "'") - message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") - - # read in nextgen flowlines - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - - # get the FEMA polygons that intersect with the nextgen flowlines - fema_intersect <- polygons_with_line_intersects(fema, flines) - - fema_in_nextgen <- nrow(fema_intersect) != 0 - - message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) - - if(fema_in_nextgen) { - - # create filepaths - vpu_subfolder <- paste0("VPU_", vpu) - # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") - vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) - - # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] - - fema_intersect <- - fema_intersect %>% - dplyr::mutate( - vpu = vpu - ) %>% - dplyr::select(vpu, fema_id, source, state, geom) - - # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) - - fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) - fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) - - - if (OVERWRITE_FEMA_FILES) { - message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") - - sf::write_sf( - fema_intersect, - fema_vpu_path - ) - } - - - } - message() - } - - - message( - "--------------------------------------------------------------\n", - "Completed all VPU intersections for: \n > '", fema_file, "'", - "\n--------------------------------------------------------------\n" - ) - -} - -# ------------------------------------------------------------------------------------- -# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- -# ------------------------------------------------------------------------------------- - -DELETE_STAGING_GPKGS <- F - -for (vpu_dir in FEMA_VPU_SUBFOLDERS) { - # for (i in 1:4) { - # vpu_dir = FEMA_VPU_SUBFOLDERS2[12] - message("Merging files in '", basename(vpu_dir), "' directory...") - # } - - vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) - - # path to the merged directory where the final merged geopackge will end up - master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) - master_gpkg_name <- paste0(master_name, ".gpkg") - master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) - - # fema state geopackages partioned for the specific VPU - fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE) - - # make sure to ignore the master file if it already exists - fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath] - - for(gpkg_file in fema_state_gpkgs) { - # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", - # basename(gpkg_file), " > ", basename(master_filepath), - # "'") - message(" > '", - basename(gpkg_file), " > ", basename(master_filepath), - "'") - - ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", - master_filepath, - " ", gpkg_file - ) - - if (OVERWRITE_FEMA_FILES) { - system(ogr2ogr_merge_command) - } - } - - has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0 - - if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) { - message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...") - # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'") - - remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) - - for (remove_cmd in remove_gpkg_cmds) { - message(" > '", remove_cmd, "'") - system(remove_cmd) - } - } - - # message() - message("Merge complete!") - message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'") - message() -} - -# ------------------------------------------------------------------------------------- -# ---- Union each VPU geopackage (either on state or just touching predicate) ---- -# ------------------------------------------------------------------------------------- - -for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { - vpu_dir <- FEMA_VPU_SUBFOLDERS[i] - VPU <- basename(vpu_dir) - - message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") - - # path to the merged directory where the final merged geopackage will end up - master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) - master_gpkg_name <- paste0(master_name, ".gpkg") - master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) - - message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") - - if(!file.exists(master_filepath)) { - message("No FEMA geometries in '", VPU, "'") - message() - next - } - - - fema_vpu <- sf::read_sf(master_filepath) - - # fema_vpu %>% sf::st_geometry_type() %>% unique() - - fema_vpu <- - fema_vpu %>% - nngeo::st_remove_holes(max_area = 200) %>% - # dplyr::select(geometry = geom) %>% - add_predicate_group_id(sf::st_intersects) %>% - sf::st_make_valid() %>% - dplyr::group_by(group_id) %>% - dplyr::summarise( - geometry = sf::st_combine(sf::st_union(geometry)) - ) %>% - dplyr::ungroup() %>% - dplyr::select(-group_id) %>% - add_predicate_group_id(sf::st_intersects) - - geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) - - message("Geometry counts before casting all geometries to MULTIPOLYGON:") - for (g in seq_along(geom_type_counts)) { - message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) - } - - message("Keeping only POLYGON and MULTIPOLYGON geometries...") - fema_vpu <- - fema_vpu %>% - dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>% - sf::st_cast("MULTIPOLYGON") %>% - sf::st_make_valid() %>% - # dplyr::group_by(group_id) %>% - rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% - rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - nngeo::st_remove_holes(max_area = 200) %>% - dplyr::mutate( - fema_id = as.character(1:dplyr::n()) - ) %>% - dplyr::select(fema_id, geometry) - - # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) - # message("Geometry counts after all processing steps: ") - # for (g in seq_along(end_geom_type_counts)) { - # message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g]) - # } - - # fema_vpu2 %>% mapview::npts() - # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] - - # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + - # mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white') - # # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') - # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') - - fema_vpu <- - fema_vpu %>% - # dplyr::group_by(source) %>% - dplyr::mutate( - # state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), - vpu = gsub("VPU_", "", VPU), - fema_id = 1:dplyr::n() - ) %>% - dplyr::ungroup() %>% - dplyr::select(vpu, fema_id, - # state, - geom = geometry) - - if (OVERWRITE_FEMA_FILES) { - - message("> Overwritting '", basename(master_filepath), "' with final clean version...") - - # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) - # message("> writting '", basename(union_file_path), "' (unioned and exploded version)") - - sf::write_sf( - fema_vpu, - master_filepath - # union_file_path - ) - } - message() -} From 5b449f782b726e27accc1d4fbd1e7c183fb8f504 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 17 Jul 2024 13:59:48 -0700 Subject: [PATCH 45/64] moved variables for running partition_fema_by_vpu.R and renamed process_fema.R to parition_fema_by_vpu, added fema download and processing to 04_driver script --- runners/cs_runner/01_transects.R | 2 +- runners/cs_runner/04_driver.R | 6 +++ runners/cs_runner/config_vars.R | 10 +++++ ...process_fema.R => partition_fema_by_vpu.R} | 43 ++----------------- 4 files changed, 20 insertions(+), 41 deletions(-) rename runners/cs_runner/{preprocess_fema.R => partition_fema_by_vpu.R} (94%) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index e8d0476..f88d216 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -38,7 +38,7 @@ for(i in 1:nrow(path_df)) { # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) - vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)] + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)] # # model attributes file and full path diff --git a/runners/cs_runner/04_driver.R b/runners/cs_runner/04_driver.R index af8e1ca..a19a7c4 100644 --- a/runners/cs_runner/04_driver.R +++ b/runners/cs_runner/04_driver.R @@ -6,6 +6,12 @@ source("runners/cs_runner/config.R") # downloads nextgen datasets source("runners/cs_runner/download_nextgen.R") +# download FEMA100 year FGBs +source("runners/cs_runner/download_fema100.R") + +# simplify, dissolve, FEMA polygons and partition FEMA polygons by VPU +source("runners/cs_runner/partition_fema_by_vpu.R") + # generate and upload transects datasets source("runners/cs_runner/01_transects.R") diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 47483c9..492611f 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -38,6 +38,16 @@ FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) # ) # ) +# ------------------------------------------------------------------------------------- +# ---- OVERWRITE_FEMA_FILES constant logical ---- +# ---- > if TRUE, processing steps will be run again +# and overwrite existing previously processed files +# ------------------------------------------------------------------------------------- + +# Default is TRUE (i.e. a fresh processing run is done from start to finish) +OVERWRITE_FEMA_FILES <- TRUE +DELETE_STAGING_GPKGS <- TRUE # remove intermediary files from the main output folder + # ---------------------------------------------------------------------------- # ---- Cross section point extraction constant variables ---- # ---------------------------------------------------------------------------- diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/partition_fema_by_vpu.R similarity index 94% rename from runners/cs_runner/preprocess_fema.R rename to runners/cs_runner/partition_fema_by_vpu.R index ee08e25..48447d8 100644 --- a/runners/cs_runner/preprocess_fema.R +++ b/runners/cs_runner/partition_fema_by_vpu.R @@ -35,8 +35,7 @@ library(nngeo) # Default is TRUE (i.e. a fresh processing run is done from start to finish) OVERWRITE_FEMA_FILES <- TRUE -DELETE_STAGING_GPKGS <- FALSE -# DELETE_STAGING_GPKGS <- TRUE +DELETE_STAGING_GPKGS <- TRUE # ------------------------------------------------------------------------------------- # ---- Create directories (if they do NOT exist) ---- @@ -498,32 +497,7 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { ) }) - - # fema_vpu2 <- - # fema_vpu %>% - # nngeo::st_remove_holes(max_area = 200) %>% - # # sf::st_make_valid() %>% - # # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% - # add_predicate_group_id(sf::st_intersects) %>% - # dplyr::group_by(group_id) %>% - # dplyr::summarise( - # geometry = sf::st_combine(sf::st_union(geometry)) - # ) %>% - # rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - # dplyr::ungroup() %>% - # nngeo::st_remove_holes(max_area = 200) - - # fema_vpu <- - # fema_vpu %>% - # dplyr::mutate( - # vpu = gsub("VPU_", "", VPU), - # fema_id = as.character(1:dplyr::n()) - # ) %>% - # dplyr::select( - # vpu, fema_id, - # # state, - # geom = geometry - # ) + # fema_vpu <- @@ -539,17 +513,11 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { # dplyr::ungroup() %>% # dplyr::select(-group_id) %>% # add_predicate_group_id(sf::st_intersects) - # - # fema_vpu %>% sf::st_geometry_type() %>% unique() - # fema_vpu %>% mapview::npts() - # # geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) - # # message("Geometry counts before casting all geometries to MULTIPOLYGON:") # for (g in seq_along(geom_type_counts)) { # message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) # } - # # message("Keeping only POLYGON and MULTIPOLYGON geometries...") # fema_vpu <- # fema_vpu %>% @@ -575,9 +543,6 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + # mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white') - # # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white') - # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') - # fema_vpu <- # fema_vpu %>% # # dplyr::group_by(source) %>% @@ -587,9 +552,7 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { # fema_id = 1:dplyr::n() # ) %>% # dplyr::ungroup() %>% - # dplyr::select(vpu, fema_id, - # # state, - # geom = geometry) + # dplyr::select(vpu, fema_id, geom = geometry) if (OVERWRITE_FEMA_FILES) { From c81675c73a2225c7ca9e63848a176ff0a1dfebe6 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 18 Jul 2024 04:56:40 -0700 Subject: [PATCH 46/64] small cleanups --- runners/cs_runner/partition_fema_by_vpu.R | 99 +++++------------------ 1 file changed, 22 insertions(+), 77 deletions(-) diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R index 48447d8..03f224f 100644 --- a/runners/cs_runner/partition_fema_by_vpu.R +++ b/runners/cs_runner/partition_fema_by_vpu.R @@ -341,7 +341,8 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # for (i in 1:4) { - # vpu_dir = FEMA_VPU_SUBFOLDERS2[12] + # i = 1 + # vpu_dir = FEMA_VPU_SUBFOLDERS2[i] message("Merging files in '", basename(vpu_dir), "' directory...") # } @@ -352,25 +353,31 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { master_gpkg_name <- paste0(master_name, ".gpkg") master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + # if the file already exists, remove it so we dont OVER append data to the "master file" + if (file.exists(master_filepath)) { + file.remove(master_filepath) + } + # fema state geopackages partioned for the specific VPU - fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE) + fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE) + master_output_filepath <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name)) # make sure to ignore the master file if it already exists - fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath] + fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath] for(gpkg_file in fema_state_gpkgs) { - # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", - # basename(gpkg_file), " > ", basename(master_filepath), + # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", + # basename(gpkg_file), " > ", basename(master_filepath), # "'") - message(" > '", - basename(gpkg_file), " > ", basename(master_filepath), + message(" > '", + basename(gpkg_file), " > ", basename(master_filepath), "'") - - ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", - master_filepath, + + ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ", + master_filepath, " ", gpkg_file ) - + if (OVERWRITE_FEMA_FILES) { system(ogr2ogr_merge_command) } @@ -399,15 +406,8 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # ------------------------------------------------------------------------------------- # ---- Union each VPU geopackage (either on state or just touching predicate) ---- # ------------------------------------------------------------------------------------- -# for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { -# -# vpu_dir <- FEMA_VPU_SUBFOLDERS[i] -# VPU <- basename(vpu_dir) -# -# message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") -# } -for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { +for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { vpu_dir <- FEMA_VPU_SUBFOLDERS[i] VPU <- basename(vpu_dir) @@ -440,6 +440,7 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { # sf::st_make_valid() %>% # sf::st_geometry_type() %>% # unique() + geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) message("Geometry counts before casting all geometries to MULTIPOLYGON:") @@ -461,10 +462,10 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { fema_vpu <- fema_vpu %>% nngeo::st_remove_holes(max_area = 200) %>% - sf::st_make_valid() %>% + # sf::st_make_valid() %>% # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% add_predicate_group_id(sf::st_intersects) %>% - dplyr::group_by(group_id) %>% + dplyr::group_by(group_id) %>% rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% dplyr::ungroup() %>% @@ -497,62 +498,6 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) { ) }) - - - - # fema_vpu <- - # fema_vpu %>% - # nngeo::st_remove_holes(max_area = 200) %>% - # # dplyr::select(geometry = geom) %>% - # add_predicate_group_id(sf::st_intersects) %>% - # sf::st_make_valid() %>% - # dplyr::group_by(group_id) %>% - # dplyr::summarise( - # geometry = sf::st_combine(sf::st_union(geometry)) - # ) %>% - # dplyr::ungroup() %>% - # dplyr::select(-group_id) %>% - # add_predicate_group_id(sf::st_intersects) - # geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) - # message("Geometry counts before casting all geometries to MULTIPOLYGON:") - # for (g in seq_along(geom_type_counts)) { - # message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) - # } - # message("Keeping only POLYGON and MULTIPOLYGON geometries...") - # fema_vpu <- - # fema_vpu %>% - # dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>% - # sf::st_cast("MULTIPOLYGON") %>% - # sf::st_make_valid() %>% - # # dplyr::group_by(group_id) %>% - # rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% - # rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - # nngeo::st_remove_holes(max_area = 200) %>% - # dplyr::mutate( - # fema_id = as.character(1:dplyr::n()) - # ) %>% - # dplyr::select(fema_id, geometry) - - # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) - # message("Geometry counts after all processing steps: ") - # for (g in seq_along(end_geom_type_counts)) { - # message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g]) - # } - - # fema_vpu2 %>% mapview::npts() - # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ])) > 1, ] - # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + - # mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white') - # fema_vpu <- - # fema_vpu %>% - # # dplyr::group_by(source) %>% - # dplyr::mutate( - # # state = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)), - # vpu = gsub("VPU_", "", VPU), - # fema_id = 1:dplyr::n() - # ) %>% - # dplyr::ungroup() %>% - # dplyr::select(vpu, fema_id, geom = geometry) if (OVERWRITE_FEMA_FILES) { From 64f5b156bbd4b11d55a3629554ae514d81692297 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 18 Jul 2024 12:15:06 -0700 Subject: [PATCH 47/64] small cleanups and set fema simplification to 1% but to keep all shapes --- runners/cs_runner/01_transects.R | 144 +++---------------------------- 1 file changed, 11 insertions(+), 133 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index f88d216..de1bd90 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -26,7 +26,7 @@ path_df <- align_files_by_vpu( # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket for(i in 1:nrow(path_df)) { - # i = 8 + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -34,17 +34,12 @@ for(i in 1:nrow(path_df)) { vpu <- path_df$vpu[i] # Get FEMA by VPU directory and files for current VPU - fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") + fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))]) # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged") vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE) - vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)] + vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)] - - # # model attributes file and full path - # model_attr_file <- path_df$y[i] - # model_attr_path <- paste0(model_attr_dir, model_attr_file) - message("Creating VPU ", vpu, " transects:", "\n - flowpaths: '", nextgen_file, "'", @@ -57,29 +52,12 @@ for(i in 1:nrow(path_df)) { # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - # # model attributes - # model_attrs <- arrow::read_parquet(model_attr_path) - - # # join flowlines with model atttributes - # flines <- dplyr::left_join( - # flines, - # dplyr::select( - # model_attrs, - # id, eTW - # ), by = "id") - # calculate bankfull width flines <- flines %>% dplyr::mutate( bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) ) %>% - # dplyr::mutate( bf_width = 11 * eTW) %>% - # dplyr::mutate( # if there are any NAs, use exp(0.700 + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width - # bf_width = dplyr::case_when( - # is.na(bf_width) ~ exp(0.700 + 0.365* log(tot_drainage_areasqkm)), - # TRUE ~ bf_width - # )) %>% dplyr::select( hy_id = id, lengthkm, @@ -131,6 +109,7 @@ for(i in 1:nrow(path_df)) { dplyr::mutate( cs_source = net_source ) + # --------------------------------------------------------------------- # --- Extend transects out to FEMA 100yr floodplains # --------------------------------------------------------------------- @@ -140,29 +119,15 @@ for(i in 1:nrow(path_df)) { # mapview::npts(fema) message("Simplifying FEMA polygons...") + # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. - # keep 10% of the original points for speed - # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01) - fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01, sys = TRUE, sys_mem = 16) - + # keep 1% of the original points for speed + fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.1, sys = TRUE, sys_mem = 16) # mapview::npts(fema) - - # # TODO: the flines argument needs the "hy_id" column to be named "id" - # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons() - # flines <- - # flines %>% - # dplyr::rename(id = hy_id) - + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") - # # TODO: hacky, need to fix the extend-trancterts to polygons function to not need these columns - # transects <- - # transects %>% - # dplyr::rename(geom = geometry) %>% - # dplyr::mutate( - # is_extended = FALSE - # ) - transects <- transects %>% dplyr::left_join( @@ -193,8 +158,8 @@ for(i in 1:nrow(path_df)) { transects <- dplyr::select(transects, -tmp_id) transects <- hydrofabric3D::add_tmp_id(transects) - # transects <- - transects3 %>% + transects <- + transects %>% # dplyr::select(-cs_lengthm) %>% # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% dplyr::select( @@ -213,93 +178,6 @@ for(i in 1:nrow(path_df)) { gc() # # --------------------------------------------------------------------- - # # --------------------------------------------------------------------- - # - # transects <- sf::read_sf(out_path) - # - # # flines <- - # # flines %>% - # # dplyr::filter(hy_id %in% transects$hy_id) - # # dplyr::slice(1:1000) - # - # flines <- - # flines %>% - # dplyr::slice(seq(1, nrow(flines), 10)) - # # - # transects <- - # transects %>% - # dplyr::filter(hy_id %in% flines$hy_id) - # # - # - # # TODO: the flines argument needs the "hy_id" column to be named "id" - # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons() - # flines <- - # flines %>% - # dplyr::rename(id = hy_id) - # - # - # # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01) - # - # mapview::mapview(dplyr::filter(fema, fema_id %in% 1:10), col.regions = "red") + - # mapview::mapview(dplyr::filter(fema_keep, fema_id %in% 1:10), col.regions = "green") - # # mapview::mapview(dplyr::filter(fema_nokeep, fema_id %in% 1:10), col.regions = "dodgerblue") - # transects - # - # # system.time({ - # # profvis::profvis({ - # - # # TODO: make sure this 3000m extension distance is appropriate across VPUs - # # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... - # # transects2 <- hydrofabric3D::get_transect_extension_distances_to_polygons( - # # transect_lines = transects, - # # polygons = fema, - # # flines = flines, - # # max_extension_distance = 3000 - # # ) - # transects2 <- hydrofabric3D::extend_transects_to_polygons( - # transect_lines = transects, - # polygons = fema, - # flowlines = flines, - # max_extension_distance = 3000 - # ) - # # }) - # # }) - # - # # mapview::mapview(transects2, color = "green") + - # # mapview::mapview(transects, color = "red") + - # # mapview::mapview(fema_keep, col.regions = "dodgerblue") - # # --------------------------------------------------------------------- - # # ---------------------------------------------------------------------------------------------------------------- - # - # transects2 <- dplyr::select(transects2, -tmp_id) - # transects2 <- hydrofabric3D::add_tmp_id(transects2) - # - # extended_ids <- - # transects2 %>% - # dplyr::filter(left_is_extended | right_is_extended) %>% - # dplyr::pull(hy_id) %>% - # unique() - # - # start_trans <- dplyr::filter(transects, hy_id %in% extended_ids[1:150]) - # end_trans <- dplyr::filter(transects2, hy_id %in% extended_ids[1:150]) - # mapview::mapview(start_trans, color = "red") + - # mapview::mapview(end_trans, color = "green") - # transects <- - # transects %>% - # dplyr::select(-cs_lengthm) %>% - # # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% - # dplyr::select( - # hy_id, - # cs_id, - # # cs_lengthm, - # cs_lengthm = new_cs_lengthm, - # cs_source, - # cs_measure, - # # is_extended, - # # is_fema_extended, - # geometry = geom - # ) - message("Saving transects to:\n - filepath: '", out_path, "'") # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg) From 16cd482c8dc7260e0c0d837d6ccaad42a6467e65 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Thu, 18 Jul 2024 12:16:57 -0700 Subject: [PATCH 48/64] removed extra 00_fema.R file --- runners/cs_runner/00_fema.R | 143 ------------------------------------ 1 file changed, 143 deletions(-) delete mode 100644 runners/cs_runner/00_fema.R diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R deleted file mode 100644 index 11f55be..0000000 --- a/runners/cs_runner/00_fema.R +++ /dev/null @@ -1,143 +0,0 @@ -library(dplyr) - -# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU -source("runners/cs_runner/config.R") - -# transect bucket prefix -transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") - -# paths to nextgen datasets and model attribute parquet files -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -FEMA_files <- list.files(FEMA_FGB_PATH, full.names = FALSE) -FEMA_BB_files <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE) -transects_files <- list.files(transects_dir, full.names = FALSE) -transects_files <- transects_files[!grepl("updated", transects_files)] - -# string to fill in "cs_source" column in output datasets -net_source <- "hydrofabric3D" - -# ensure the files are in the same order and matched up by VPU -path_df <- align_files_by_vpu( - x = nextgen_files, - y = transects_files, - base = base_dir -) - -path_df - -us_states <- - USAboundaries::us_states() %>% - sf::st_transform(5070) - -# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket -# for(i in 1:nrow(path_df)) { - - i = 8 - - # nextgen file and full path - nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) - - transect_file <- path_df$y[i] - transect_path <- paste0(transects_dir, transect_file) - - transect_path - - # # model attributes file and full path - # model_attr_file <- path_df$y[i] - # model_attr_path <- paste0(model_attr_dir, model_attr_file) - - message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'") - # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") - - # read in nextgen data - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - - flines_bb <- - flines %>% - sf::st_bbox() %>% - sf::st_as_sfc() %>% - sf::st_as_sf() - - transects <- sf::read_sf(transect_path) - - - # find the states intersecting with the given VPU flowlines - intersecting_states <- - sf::st_intersection(us_states, flines_bb) %>% - sf::st_drop_geometry() %>% - .$name %>% - gsub(" ", "-", .) - - # get the matching FEMA floodplain FGB file names - matching_fema_files <- unlist(lapply(intersecting_states, function(state_name) { - FEMA_files[grepl(state_name, FEMA_files)] - })) - - # full paths - files_of_interest <- paste0(FEMA_FGB_PATH, "/", matching_fema_files) - - # Iterate over each FEMA file and determine optimal widths for cross sections..... - # for (file in rev(files_of_interest)) { - - file = "/Users/anguswatters/Desktop/lynker-spatial/FEMA100/Tennessee-100yr-flood_valid.fgb" - fema_fgb <- - file %>% - sf::read_sf() %>% - sf::st_transform(5070) - - fema_bb <- - fema_fgb %>% - sf::st_bbox() %>% - sf::st_as_sfc() %>% - sf::st_as_sf() - - # fline_subset <- - # flines %>% - # sf::st_intersection(fema_fgb) - - fline_fema_intersects <- sf::st_intersects(flines, fema_fgb) - fema_fline_intersects <- sf::st_intersects(fema_fgb, flines) - fema_subset$FLD_AR_ID %>% unique() %>% length() - - fema_subset <- - fema_fgb[unlist(fema_fline_intersects), ] %>% - rmapshaper::ms_simplify() - - fema_subset$FLD_AR_ID %>% unique() %>% length() - - fema_subset <- fema_subset %>% - rmapshaper::ms_dissolve(field = "FLD_AR_ID") - - - fema_subset %>% mapview::npts() - unlist(flines_with_fema)[1] - fema_fgb - - flines_with_fema <- flines[lengths(fline_fema_intersects) > 0,] - - - # flines %>% - # sf::st_filter( - # fema_fgb, - # .predicate = st_touches - # ) - fema_subset %>% - dplyr::filter() - fema_subset %>% mapview::npts() - - transects_subset <- - transects %>% - dplyr::filter(hy_id %in% flines_with_fema$id) - - flines %>% sf::st_crs() - mapview::mapview(flines) + fema_bb - - # mapview::mapview(fema_fgb, col.regions = "dodgerblue") + - mapview::mapview(fema_subset, col.regions = "dodgerblue") + - mapview::mapview(transects_subset, color = "red") + - mapview::mapview(flines_with_fema, color = "green") - - message("file: ", file) - - # } \ No newline at end of file From 72b3e888a9a33a4e3217fe1de506fe636a321e98 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 19 Jul 2024 10:06:58 -0700 Subject: [PATCH 49/64] updated final step for processing fema polygons to use mapshaper instead of sf dissolve methods --- runners/cs_runner/partition_fema_by_vpu.R | 259 +++++++++++++++------- 1 file changed, 184 insertions(+), 75 deletions(-) diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R index 03f224f..5a2d35b 100644 --- a/runners/cs_runner/partition_fema_by_vpu.R +++ b/runners/cs_runner/partition_fema_by_vpu.R @@ -402,12 +402,14 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'") message() } - # ------------------------------------------------------------------------------------- -# ---- Union each VPU geopackage (either on state or just touching predicate) ---- +# ----Apply simplify, dissolve, explode on the MERGED polygons ---- # ------------------------------------------------------------------------------------- +# list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))] for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { + # i = 8 + STAGING_FILES_TO_DELETE <- c() vpu_dir <- FEMA_VPU_SUBFOLDERS[i] VPU <- basename(vpu_dir) @@ -419,10 +421,14 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { master_gpkg_name <- paste0(master_name, ".gpkg") master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + master_geojson_name <- paste0(master_name, ".geojson") + master_geojson_filepath <- paste0(vpu_dir, "/", master_geojson_name) + updated_gpkg_name <- gsub(".gpkg", "_output.gpkg", master_gpkg_name) updated_filepath <- paste0(vpu_dir, "/", updated_gpkg_name) - message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") + message("VPU Merged FEMA filename: '", master_gpkg_name, "'") + message("> Simplifying, dissolve, exploding VPU aggregated FEMA polygons... '", basename(master_filepath), "'") if(!file.exists(master_filepath)) { message("No FEMA geometries in '", VPU, "'") @@ -430,88 +436,191 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { next } + message("Converting \n > '", basename(master_filepath), "' to geojson '", master_geojson_name, "'") - fema_vpu <- sf::read_sf(master_filepath) + geojson_exists <- file.exists(master_geojson_filepath) - # fema_vpu %>% sf::st_geometry_type() %>% unique() - # fema_vpu %>% mapview::npts() - # fema_vpu %>% sf::st_is_valid() %>% all() - # fema_vpu %>% - # sf::st_make_valid() %>% - # sf::st_geometry_type() %>% - # unique() + # message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) + # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) + gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath) - message("Geometry counts before casting all geometries to MULTIPOLYGON:") - for (g in seq_along(geom_type_counts)) { - message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) + if (OVERWRITE_FEMA_FILES || !geojson_exists) { + system(gpkg_to_geojson_cmd) + message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath) } - # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + - # mapview::mapview(fema_union, color = 'green', col.regions = 'white') + # Clean GeoJSON + message("Simplify, dissolve, explode > '", master_geojson_name, "'") + output_clean_filename <- gsub(".geojson", "_clean.geojson", master_geojson_name) + output_clean_geojson_path <- paste0(vpu_dir, "/", output_clean_filename) - # fema_vpu %>% - # sf::st_make_valid() %>% - # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% - # sf::st_is_valid() %>% - # all() + clean_geojson_exists <- file.exists(output_clean_geojson_path) + message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) + message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - tryCatch({ - - fema_vpu <- - fema_vpu %>% - nngeo::st_remove_holes(max_area = 200) %>% - # sf::st_make_valid() %>% - # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% - add_predicate_group_id(sf::st_intersects) %>% - dplyr::group_by(group_id) %>% - rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% - rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - dplyr::ungroup() %>% - nngeo::st_remove_holes(max_area = 200) %>% - dplyr::mutate( - vpu = gsub("VPU_", "", VPU), - fema_id = as.character(1:dplyr::n()) - ) %>% - dplyr::select( - vpu, fema_id, - # state, - geom = geometry - ) - - }, error = function(e) { - message(VPU, " threw into the following error \n ", e) - message(" > Cleaning ", VPU, " using a backup cleaning strategy...") - - fema_vpu <- - fema_vpu %>% - sf::st_make_valid() %>% - dplyr::mutate( - vpu = gsub("VPU_", "", VPU), - fema_id = as.character(1:dplyr::n()) - ) %>% - dplyr::select( - vpu, fema_id, - # state, - geom - ) - - }) + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', master_geojson_filepath, + ' -dissolve2 \\', + ' -simplify 0.5 visvalingam \\', + ' -snap \\', + ' -explode \\', + ' -o ', output_clean_geojson_path + ) - if (OVERWRITE_FEMA_FILES) { - - message("> Overwritting '", basename(master_filepath), "' with final clean version...") - - # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) - # message("> writting '", basename(union_file_path), "' (unioned and exploded version)") + system(mapshaper_command) + # message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) + output_clean_gpkg_filename <- gsub(".geojson", ".gpkg", master_geojson_name) + output_clean_gpkg_path <- paste0(vpu_dir, "/", output_clean_gpkg_filename) + + # fema_vpu <- sf::read_sf(master_filepath) + geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON ", updated_filepath, " ", output_clean_geojson_path) + updated_gpkg_exists <- file.exists(updated_filepath) + + if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) { + system(geojson_to_gpkg_cmd) + message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'") - sf::write_sf( - fema_vpu, - updated_filepath - # master_filepath - # union_file_path + } + + fema <- + sf::read_sf(updated_filepath) %>% + rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% + dplyr::mutate( + vpu = gsub("VPU_", "", VPU), + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select( + fema_id, + geom ) + + sf::write_sf( + fema, + updated_filepath + ) + + message("Deleting intermediary files\n") + for (delete_file in STAGING_FILES_TO_DELETE) { + if (file.exists(delete_file)) { + message("Deleting >>> '", delete_file, "'") + file.remove(delete_file) + } + } - message() } +# # ------------------------------------------------------------------------------------- +# # ---- Union each VPU geopackage (either on state or just touching predicate) ---- +# # ------------------------------------------------------------------------------------- +# +# for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { +# +# vpu_dir <- FEMA_VPU_SUBFOLDERS[i] +# VPU <- basename(vpu_dir) +# +# message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") +# +# # path to the merged directory where the final merged geopackage will end up +# master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) +# master_gpkg_name <- paste0(master_name, ".gpkg") +# master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) +# +# updated_gpkg_name <- gsub(".gpkg", "_output.gpkg", master_gpkg_name) +# updated_filepath <- paste0(vpu_dir, "/", updated_gpkg_name) +# +# message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'") +# +# if(!file.exists(master_filepath)) { +# message("No FEMA geometries in '", VPU, "'") +# message() +# next +# } +# +# +# fema_vpu <- sf::read_sf(master_filepath) +# +# # fema_vpu %>% sf::st_geometry_type() %>% unique() +# # fema_vpu %>% mapview::npts() +# # fema_vpu %>% sf::st_is_valid() %>% all() +# # fema_vpu %>% +# # sf::st_make_valid() %>% +# # sf::st_geometry_type() %>% +# # unique() +# +# geom_type_counts <- table(sf::st_geometry_type(fema_vpu)) +# +# message("Geometry counts before casting all geometries to MULTIPOLYGON:") +# for (g in seq_along(geom_type_counts)) { +# message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g]) +# } +# +# # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') + +# # mapview::mapview(fema_union, color = 'green', col.regions = 'white') +# +# # fema_vpu %>% +# # sf::st_make_valid() %>% +# # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% +# # sf::st_is_valid() %>% +# # all() +# +# tryCatch({ +# +# fema_vpu <- +# fema_vpu %>% +# nngeo::st_remove_holes(max_area = 200) %>% +# # sf::st_make_valid() %>% +# # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% +# add_predicate_group_id(sf::st_intersects) %>% +# dplyr::group_by(group_id) %>% +# rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% +# rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% +# dplyr::ungroup() %>% +# nngeo::st_remove_holes(max_area = 200) %>% +# dplyr::mutate( +# vpu = gsub("VPU_", "", VPU), +# fema_id = as.character(1:dplyr::n()) +# ) %>% +# dplyr::select( +# vpu, fema_id, +# # state, +# geom = geometry +# ) +# +# }, error = function(e) { +# message(VPU, " threw into the following error \n ", e) +# message(" > Cleaning ", VPU, " using a backup cleaning strategy...") +# +# fema_vpu <- +# fema_vpu %>% +# sf::st_make_valid() %>% +# dplyr::mutate( +# vpu = gsub("VPU_", "", VPU), +# fema_id = as.character(1:dplyr::n()) +# ) %>% +# dplyr::select( +# vpu, fema_id, +# # state, +# geom +# ) +# +# }) +# +# if (OVERWRITE_FEMA_FILES) { +# +# message("> Overwritting '", basename(master_filepath), "' with final clean version...") +# +# # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file) +# # message("> writting '", basename(union_file_path), "' (unioned and exploded version)") +# +# sf::write_sf( +# fema_vpu, +# updated_filepath +# # master_filepath +# # union_file_path +# ) +# } +# message() +# } From 8477da02f8524138f0a839c2abcfc89ca0c3055b Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 22 Jul 2024 10:58:59 -0700 Subject: [PATCH 50/64] changes to fema processing steps, using mapshaper clean arg and removed excess dissolve --- runners/cs_runner/01_transects.R | 6 ++- runners/cs_runner/partition_fema_by_vpu.R | 62 ++++++++++++++++++----- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 7b38b23..31467ae 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -25,7 +25,8 @@ path_df <- align_files_by_vpu( ) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket -for(i in 2:nrow(path_df)) { +for(i in 1:nrow(path_df)) { + # i = 8 # nextgen file and full path nextgen_file <- path_df$x[i] @@ -114,10 +115,11 @@ for(i in 2:nrow(path_df)) { # --- Extend transects out to FEMA 100yr floodplains # --------------------------------------------------------------------- message("Reading in FEMA polygons...") + # fema polygons and transect lines fema <- sf::read_sf(vpu_fema_file) - # mapview::npts(fema) + mapview::npts(fema) message("Simplifying FEMA polygons...") # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R index 5a2d35b..13ea3ba 100644 --- a/runners/cs_runner/partition_fema_by_vpu.R +++ b/runners/cs_runner/partition_fema_by_vpu.R @@ -36,6 +36,7 @@ library(nngeo) # Default is TRUE (i.e. a fresh processing run is done from start to finish) OVERWRITE_FEMA_FILES <- TRUE DELETE_STAGING_GPKGS <- TRUE +Sys.setenv(OGR_GEOJSON_MAX_OBJ_SIZE=0) # ------------------------------------------------------------------------------------- # ---- Create directories (if they do NOT exist) ---- @@ -406,7 +407,11 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # ----Apply simplify, dissolve, explode on the MERGED polygons ---- # ------------------------------------------------------------------------------------- -# list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))] +# # NOTE: remove past runs for testing... +# for (i in list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]) { +# file.remove(i) +# } + for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # i = 8 STAGING_FILES_TO_DELETE <- c() @@ -442,8 +447,10 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - - gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath) + gpkg_to_geojson_cmd <- paste0("ogr2ogr -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath) + # gpkg_to_geojson_cmd <- paste0("ogr2ogr -f GEOJSON -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath) + # gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath) + # file.remove(master_geojson_filepath) if (OVERWRITE_FEMA_FILES || !geojson_exists) { system(gpkg_to_geojson_cmd) @@ -452,6 +459,9 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath) } + # master_gj <- sf::read_sf(master_geojson_filepath) + # master_gpkg <- sf::read_sf(master_filepath) + # Clean GeoJSON message("Simplify, dissolve, explode > '", master_geojson_name, "'") output_clean_filename <- gsub(".geojson", "_clean.geojson", master_geojson_name) @@ -460,12 +470,19 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { clean_geojson_exists <- file.exists(output_clean_geojson_path) message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - - mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', master_geojson_filepath, - ' -dissolve2 \\', - ' -simplify 0.5 visvalingam \\', + + # file.remove(output_clean_geojson_path) + + mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', + master_geojson_filepath, + # ' -clean \\', + # ' -explode \\', + # ' -dissolve2 \\', + ' -simplify 0.3 visvalingam \\', ' -snap \\', ' -explode \\', + ' -clean \\', + # ' -proj EPSG:5070 \\', ' -o ', output_clean_geojson_path ) @@ -477,30 +494,49 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { output_clean_gpkg_path <- paste0(vpu_dir, "/", output_clean_gpkg_filename) # fema_vpu <- sf::read_sf(master_filepath) - geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON ", updated_filepath, " ", output_clean_geojson_path) + # geojson_to_gpkg_cmd <- paste0("ogr2ogr -f GPKG ", updated_filepath, " ", output_clean_geojson_path) + geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070 ", updated_filepath, " ", output_clean_geojson_path) + # geojson_to_gpkg_cmd <- paste0("ogr2ogr ", updated_filepath, " ", output_clean_geojson_path) + updated_gpkg_exists <- file.exists(updated_filepath) + # updated_gpkg_exists + # file.remove(updated_filepath) if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) { system(geojson_to_gpkg_cmd) message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'") - } + + # sf::st_layers(updated_filepath) - fema <- - sf::read_sf(updated_filepath) %>% - rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% + # mapview::npts(fema) + + fema <- + sf::read_sf(updated_filepath) %>% + # sf::read_sf(output_clean_geojson_path) %>% + # rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% dplyr::mutate( vpu = gsub("VPU_", "", VPU), fema_id = as.character(1:dplyr::n()) ) %>% dplyr::select( + vpu, fema_id, geom ) + # fema %>% + # rmapshaper::ms_simplify(keep = 0.5, keep_shapes = T) %>% + # dplyr::group_by(fema_id) %>% + # dplyr::mutate(pts = mapview::npts(geom)) %>% + # dplyr::arrange(-pts) + + file.remove(updated_filepath) + sf::write_sf( fema, - updated_filepath + updated_filepath, + append = FALSE ) message("Deleting intermediary files\n") From 550c85c66b07ab99b69f7c3d2fa2b4d3127b1b37 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 26 Jul 2024 12:42:41 -0700 Subject: [PATCH 51/64] testing out new fema simplification amounts... --- runners/cs_runner/01_transects.R | 58 +++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 31467ae..3bf707a 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -25,9 +25,9 @@ path_df <- align_files_by_vpu( ) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket -for(i in 1:nrow(path_df)) { - # i = 8 - +for(i in 4:nrow(path_df)) { + # i = 2 + # i = 4 # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -103,7 +103,6 @@ for(i in 1:nrow(path_df)) { out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg") out_path <- paste0(transects_dir, out_file) - # add cs_source column and rename cs_widths to cs_lengthm transects <- transects %>% @@ -119,15 +118,51 @@ for(i in 1:nrow(path_df)) { # fema polygons and transect lines fema <- sf::read_sf(vpu_fema_file) - mapview::npts(fema) + # mapview::npts(fema) + + # mapview::npts(fema) message("Simplifying FEMA polygons...") + message(" - Number of points BEFORE simplifying: ", mapview::npts(fema)) + message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. # keep 1% of the original points for speed - fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) + fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.02, sys = TRUE, sys_mem = 16) + + message(" - Number of points AFTER simplifying: ", mapview::npts(fema)) + message(" - Number of geoms AFTER simplifying: ", nrow(fema)) + # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, explode = TRUE, keep = 0.05, snap = F, sys = TRUE, sys_mem = 16) + # fema_nokeep <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.05, sys = TRUE, sys_mem = 16) + # mapview::npts(fema, by_feature = F) + # mapview::npts(fema_keep, by_feature = F) + # mapview::npts(fema_keep, by_feature = T) + # + # mapview::npts(fema_nokeep, by_feature = F) + # mapview::npts(fema_nokeep, by_feature = T) + # + # fema_keep <- + # fema_keep %>% + # dplyr::mutate(nid = 1:dplyr::n()) %>% + # dplyr::group_by(nid) %>% + # dplyr::mutate(npts = mapview::npts(geom)) %>% + # dplyr::arrange(-npts) + # + # fema_nokeep <- + # fema_nokeep %>% + # dplyr::mutate(nid = 1:dplyr::n()) %>% + # dplyr::group_by(nid) %>% + # dplyr::mutate(npts = mapview::npts(geom)) %>% + # dplyr::arrange(-npts) + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.1, sys = TRUE, sys_mem = 16) - # mapview::npts(fema) - + + # mapview::npts(fema_nokeep, by_feature = T) + + # fema <- + # fema %>% + # sf::st_make_valid() + # fema %>% sf::st_is_valid() %>% all() + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") transects <- @@ -140,6 +175,13 @@ for(i in 1:nrow(path_df)) { by = "hy_id" ) + # TODO: TESTING DATA + # sf::write_sf(transects2, "/Users/anguswatters/Desktop/test_transects_02.gpkg") + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_flines_02.gpkg") + # fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_02/fema_vpu_02_output.gpkg") + # transects3 <- sf::read_sf("/Users/anguswatters/Desktop/test_transects_02.gpkg") + # flines2 <- sf::read_sf("/Users/anguswatters/Desktop/test_flines_02.gpkg") + # system.time({ # TODO: make sure this 3000m extension distance is appropriate across VPUs From b99bbd9deead721cba025f76ed5f91fa6b08a04c Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 31 Jul 2024 15:12:17 -0700 Subject: [PATCH 52/64] small cleanups --- runners/cs_runner/01_transects.R | 62 +++++--------------------------- 1 file changed, 9 insertions(+), 53 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 3bf707a..804e2dc 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -25,9 +25,8 @@ path_df <- align_files_by_vpu( ) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket -for(i in 4:nrow(path_df)) { - # i = 2 - # i = 4 +for(i in 1:nrow(path_df)) { + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(nextgen_dir, nextgen_file) @@ -49,6 +48,10 @@ for(i in 4:nrow(path_df)) { ) # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'") + # sf::write_sf( + # dplyr::slice(dplyr::filter(flines, order == 2), 2), + # "/Users/anguswatters/Desktop/example_flowline.gpkg" + # ) # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") @@ -118,51 +121,15 @@ for(i in 4:nrow(path_df)) { # fema polygons and transect lines fema <- sf::read_sf(vpu_fema_file) - # mapview::npts(fema) - - # mapview::npts(fema) message("Simplifying FEMA polygons...") - message(" - Number of points BEFORE simplifying: ", mapview::npts(fema)) message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. # keep 1% of the original points for speed - fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.02, sys = TRUE, sys_mem = 16) + fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) - message(" - Number of points AFTER simplifying: ", mapview::npts(fema)) message(" - Number of geoms AFTER simplifying: ", nrow(fema)) - # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, explode = TRUE, keep = 0.05, snap = F, sys = TRUE, sys_mem = 16) - # fema_nokeep <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.05, sys = TRUE, sys_mem = 16) - # mapview::npts(fema, by_feature = F) - # mapview::npts(fema_keep, by_feature = F) - # mapview::npts(fema_keep, by_feature = T) - # - # mapview::npts(fema_nokeep, by_feature = F) - # mapview::npts(fema_nokeep, by_feature = T) - # - # fema_keep <- - # fema_keep %>% - # dplyr::mutate(nid = 1:dplyr::n()) %>% - # dplyr::group_by(nid) %>% - # dplyr::mutate(npts = mapview::npts(geom)) %>% - # dplyr::arrange(-npts) - # - # fema_nokeep <- - # fema_nokeep %>% - # dplyr::mutate(nid = 1:dplyr::n()) %>% - # dplyr::group_by(nid) %>% - # dplyr::mutate(npts = mapview::npts(geom)) %>% - # dplyr::arrange(-npts) - - # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.1, sys = TRUE, sys_mem = 16) - - # mapview::npts(fema_nokeep, by_feature = T) - - # fema <- - # fema %>% - # sf::st_make_valid() - # fema %>% sf::st_is_valid() %>% all() - message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") transects <- @@ -175,15 +142,6 @@ for(i in 4:nrow(path_df)) { by = "hy_id" ) - # TODO: TESTING DATA - # sf::write_sf(transects2, "/Users/anguswatters/Desktop/test_transects_02.gpkg") - # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_flines_02.gpkg") - # fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_02/fema_vpu_02_output.gpkg") - # transects3 <- sf::read_sf("/Users/anguswatters/Desktop/test_transects_02.gpkg") - # flines2 <- sf::read_sf("/Users/anguswatters/Desktop/test_flines_02.gpkg") - - # system.time({ - # TODO: make sure this 3000m extension distance is appropriate across VPUs # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... transects <- hydrofabric3D::extend_transects_to_polygons( @@ -191,12 +149,10 @@ for(i in 4:nrow(path_df)) { polygons = fema, flowlines = flines, crosswalk_id = "hy_id", - intersect_group_id = "mainstem", + grouping_id = "mainstem", max_extension_distance = 3000 ) - # }) - message("FEMA extensions complete! - ( ", Sys.time(), " )") transects <- dplyr::select(transects, -tmp_id) From 92fad96457d04d56d2fa0d760bc14ea023ad3679 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 2 Aug 2024 09:04:47 -0700 Subject: [PATCH 53/64] huge overall of variable declarations and path variable creations for cs_runners/, split download_nextgen.R into seperate scripts for downloading nextgen, ref_features, and ML outputs, moved util functions to a utils.R script, updated 04_driver.R script, still more cleanups needed --- runners/cs_runner/01_transects.R | 26 ++-- runners/cs_runner/02_cs_pts.R | 50 +++--- runners/cs_runner/03_inject_ml.R | 27 ++-- runners/cs_runner/04_driver.R | 12 +- runners/cs_runner/config.R | 108 ++++++++----- runners/cs_runner/config_vars.R | 158 ++++++++++++++----- runners/cs_runner/download_fema100.R | 8 +- runners/cs_runner/download_ml_outputs.R | 16 ++ runners/cs_runner/download_nextgen.R | 178 +++++++--------------- runners/cs_runner/download_ref_features.R | 35 +++++ runners/cs_runner/partition_fema_by_vpu.R | 4 +- runners/cs_runner/set_path_variables.R | 10 ++ runners/cs_runner/utils.R | 149 ++++++++++++++++++ 13 files changed, 521 insertions(+), 260 deletions(-) create mode 100644 runners/cs_runner/download_ml_outputs.R create mode 100644 runners/cs_runner/download_ref_features.R create mode 100644 runners/cs_runner/set_path_variables.R diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 804e2dc..2c2ce2f 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -1,5 +1,6 @@ # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") # # # # load libraries # library(hydrofabric3D) @@ -8,20 +9,17 @@ source("runners/cs_runner/config.R") # install.packages("devtools") # transect bucket prefix -transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") +S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) -model_attr_files <- list.files(model_attr_dir, full.names = FALSE) - -# string to fill in "cs_source" column in output datasets -net_source <- "hydrofabric3D" +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) +# model_attr_files <- list.files(MODEL_ATTR_DIR, full.names = FALSE) # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, - y = model_attr_files, - base = base_dir + x = NEXTGEN_FILES, + y = NEXTGEN_FILES, + base = BASE_DIR ) # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket @@ -29,7 +27,7 @@ for(i in 1:nrow(path_df)) { # nextgen file and full path nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) vpu <- path_df$vpu[i] @@ -104,13 +102,13 @@ for(i in 1:nrow(path_df)) { # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg") - out_path <- paste0(transects_dir, out_file) + out_path <- paste0(TRANSECTS_DIR, out_file) # add cs_source column and rename cs_widths to cs_lengthm transects <- transects %>% dplyr::mutate( - cs_source = net_source + cs_source = CS_SOURCE ) # --------------------------------------------------------------------- @@ -196,8 +194,8 @@ for(i in 1:nrow(path_df)) { ) # command to copy transects geopackage to S3 - copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) + copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_TRANSECTS_DIR, out_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)) ) message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 4b18b93..d28e43e 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -11,33 +11,32 @@ library(hydrofabric3D) library(dplyr) library(sf) -# cross section bucket prefix -cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/") - -# transect bucket prefix -transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/") +# # cross section bucket prefix +# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/") +# +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") # paths to nextgen datasets -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) # paths to nextgen datasets -transect_files <- list.files(transects_dir, full.names = FALSE) +transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) transect_files <- transect_files[!grepl("updated_", transect_files)] -# string to fill in "cs_source" column in output datasets -cs_source <- "hydrofabric3D" +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) # reference features dataframe ref_df <- data.frame( - vpu = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), - ref_file = ref_features + vpu = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = REF_FEATURES ) # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, + x = NEXTGEN_FILES, y = transect_files, - base = base_dir + base = BASE_DIR ) %>% dplyr::left_join( ref_df, @@ -48,21 +47,21 @@ path_df <- align_files_by_vpu( # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { - + # i = 8 start <- Sys.time() # nextgen file and full path nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) # model attributes file and full path transect_file <- path_df$y[i] - transect_path <- paste0(transects_dir, transect_file) + transect_path <- paste0(TRANSECTS_DIR, transect_file) # model attributes file and full path ref_file <- path_df$ref_file[i] - ref_path <- paste0(ref_features_dir, "gpkg/", ref_file) + ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) # current VPU being processed VPU = path_df$vpu[i] @@ -76,6 +75,7 @@ for (i in 1:nrow(path_df)) { ################### + # read in transects data transects <- sf::read_sf(transect_path) @@ -339,7 +339,7 @@ for (i in 1:nrow(path_df)) { fixed_pts <- fixed_pts %>% dplyr::mutate( - Z_source = cs_source + Z_source = CS_SOURCE ) %>% dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class, point_type, @@ -399,8 +399,8 @@ for (i in 1:nrow(path_df)) { ) # command to copy transects geopackage to S3 - trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) + trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", S3_TRANSECTS_DIR, transect_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))) message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", trans_to_s3, @@ -416,7 +416,7 @@ for (i in 1:nrow(path_df)) { # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") - out_path <- paste0(cs_pts_dir, out_file) + out_path <- paste0(CS_PTS_DIR, out_file) message("Saving cross section points to:\n - filepath: '", out_path, "'") @@ -424,12 +424,12 @@ for (i in 1:nrow(path_df)) { arrow::write_parquet(fixed_pts, out_path) # command to copy cross section points parquet to S3 - copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))) + copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))) message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", - paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), + paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file, + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))), "'\n==========================") system(copy_cs_pts_to_s3, intern = TRUE) diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index c607e9f..0321003 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -8,24 +8,25 @@ library(patchwork) # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") # cross section bucket prefix -CS_ML_PTS_S3_PREFIX <- paste0(s3_bucket, version_prefix, "/3D/cross-sections/") -# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/") +S3_CS_ML_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/") +# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, "v20/3D/dem-cross-sections/") ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE) # paths to nextgen datasets -nextgen_files <- list.files(nextgen_dir, full.names = FALSE) +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) # paths to nextgen datasets -cs_files <- list.files(cs_pts_dir, full.names = FALSE) +cs_files <- list.files(CS_PTS_DIR, full.names = FALSE) # ensure the files are in the same order and matched up by VPU path_df <- align_files_by_vpu( - x = nextgen_files, + x = NEXTGEN_FILES, y = cs_files, - base = base_dir + base = BASE_DIR ) # dplyr::left_join( # ref_df, @@ -44,11 +45,11 @@ for (i in 1:nrow(path_df)) { # nextgen file and full path nextgen_file <- path_df$x[i] - nextgen_path <- paste0(nextgen_dir, nextgen_file) + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) # model attributes file and full path cs_file <- path_df$y[i] - cs_pts_path <- paste0(cs_pts_dir, cs_file) + cs_pts_path <- paste0(CS_PTS_DIR, cs_file) # current VPU being processed VPU = path_df$vpu[i] @@ -56,8 +57,8 @@ for (i in 1:nrow(path_df)) { message("Augmenting DEM cross sections with ML estimated widths/depths: ", VPU, " cross section points:", "'\n - cross section points: '", cs_file, "'", - "'\n - ML estimated widths/depths: '", ML_OUTPUTS_FILE, "'", - # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_URI, "'", + "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_FILE, "'", + # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_URI, "'", "\n - CONUS network file: '", CONUS_NETWORK_URI, "'", "\n - flowpaths: '", nextgen_file, # "\n - waterbodies: '", ref_file, "'", @@ -326,21 +327,21 @@ for (i in 1:nrow(path_df)) { # name of file and path to save transects gpkg too out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") - out_path <- paste0(final_dir, out_file) + out_path <- paste0(CS_OUTPUT_DIR, out_file) message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", out_path, "'") # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) arrow::write_parquet(final_cs, out_path) - s3_save_uri <- paste0(CS_ML_PTS_S3_PREFIX, out_file) + s3_save_uri <- paste0(S3_CS_ML_PTS_DIR, out_file) # command to copy cross section points parquet to S3 copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", s3_save_uri, - ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)) + ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)) ) message( diff --git a/runners/cs_runner/04_driver.R b/runners/cs_runner/04_driver.R index a19a7c4..212e1ac 100644 --- a/runners/cs_runner/04_driver.R +++ b/runners/cs_runner/04_driver.R @@ -3,10 +3,14 @@ # downloads nextgen datasets source("runners/cs_runner/config.R") -# downloads nextgen datasets +# downloads datasets +# - Nextgen data +# - Reference features (for waterbody filtering) +# - ML outputs +# - FEMA 100 year floodplain polygons (FGBs) source("runners/cs_runner/download_nextgen.R") - -# download FEMA100 year FGBs +source("runners/cs_runner/download_ref_features.R") +source("runners/cs_runner/download_ml_outputs.R") source("runners/cs_runner/download_fema100.R") # simplify, dissolve, FEMA polygons and partition FEMA polygons by VPU @@ -19,4 +23,4 @@ source("runners/cs_runner/01_transects.R") source("runners/cs_runner/02_cs_pts.R") # Apply machine learning topwidths and depths estimates to DEM cross section points -source("runners/cs_runner/02_cs_pts.R") +source("runners/cs_runner/03_inject_ml.R") diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index 8d22554..a839f61 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -10,69 +10,99 @@ pacman::p_load( # load root directory source("runners/cs_runner/config_vars.R") +source("runners/cs_runner/utils.R") sf::sf_use_s2(FALSE) -### Cross section point +### Cross section point -### S3 names +# # ------------------------------------------------------------------------------------- +# # ----- S3 names ------ +# # ------------------------------------------------------------------------------------- -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" +# # AWS S3 bucket URI +# S3_BUCKET_URI <- "s3://lynker-spatial/" -# name of bucket with nextgen data -nextgen_bucket <- "lynker-spatial" +# # name of bucket with nextgen data +# S3_BUCKET_NAME <- "lynker-spatial" -# nextgen bucket folder name -nextgen_bucket_folder <- "v20.1/gpkg/" +# # the name of the folder in the S3 bucket with the nextgen data +# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" -# nextgen bucket name -nextgen_prefix <- paste0(s3_bucket, nextgen_bucket_folder) +# # full URI to the S3 bucket folder with the nextgen data +# S3_BUCKET_NEXTGEN_DIR_URI <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR) -# reference features S3 bucket prefix -ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/" +# # reference features S3 bucket prefix +# S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" -# S3 prefix/folder of version run -version_prefix <- "v20.1" -# version_prefix <- "v20" +# # S3 prefix/folder of version run +# VERSION <- "v20.1" -### LOCAL DIRS +# # ------------------------------------------------------------------------------------- -# directory to copy nextgen bucket data too -nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder) -# nextgen_dir <- paste0(base_dir, "/pre-release/") +# # ------------------------------------------------------------------------------------- +# # ----- Local directories ------ +# # ------------------------------------------------------------------------------------- -# model attributes directory -model_attr_dir <- paste0(base_dir, "/model_attributes/") +# ### LOCAL DIRS -# cross-section data model data directories -transects_dir <- paste0(base_dir, "/01_transects/") -cs_pts_dir <- paste0(base_dir, "/02_cs_pts/") +# # directory to copy nextgen bucket data too +# NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) +# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/") -# final output directory with geopackages per VPU -final_dir <- paste0(base_dir, "/cross_sections/") +# # model attributes directory +# MODEL_ATTR_DIR <- paste0(BASE_DIR, "/model_attributes/") -# directory to copy nextgen bucket data too -ref_features_dir <- paste0(base_dir, "/00_reference_features/") +# # cross-section data model data directories +# TRANSECTS_DIR <- paste0(BASE_DIR, "/01_transects/") +# CS_PTS_DIR <- paste0(BASE_DIR, "/02_cs_pts/") -# make a directory for the ML outputs data -ML_OUTPUTS_DIR <- paste0(base_dir, "/ml-outputs/") +# # final output directory with geopackages per VPU +# CS_OUTPUT_DIR <- paste0(BASE_DIR, "/cross_sections/") +# # directory to copy nextgen bucket data too +# REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/") + +# # make a directory for the ML outputs data +# ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") + +# ------------------------------------------------------------------------------------- +# ----- Create local directories ------ +# ------------------------------------------------------------------------------------- # create directories -dir.create(transects_dir, showWarnings = FALSE) -dir.create(cs_pts_dir, showWarnings = FALSE) -dir.create(ref_features_dir, showWarnings = FALSE) -dir.create(paste0(ref_features_dir, "gpkg/"), showWarnings = FALSE) -dir.create(final_dir, showWarnings = FALSE) +dir.create(TRANSECTS_DIR, showWarnings = FALSE) +dir.create(CS_PTS_DIR, showWarnings = FALSE) +dir.create(REF_FEATURES_DIR, showWarnings = FALSE) +dir.create(paste0(REF_FEATURES_DIR, "gpkg/"), showWarnings = FALSE) +dir.create(CS_OUTPUT_DIR, showWarnings = FALSE) dir.create(ML_OUTPUTS_DIR, showWarnings = FALSE) -# dir.create(model_attr_dir, showWarnings = FALSE) + +# create the directory if it does NOT exist +if(!dir.exists(NEXTGEN_DIR)) { + message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'") + + dir.create(NEXTGEN_DIR) +} + +# # create the directory if it does NOT exist +# if(!dir.exists(MODEL_ATTR_DIR)) { +# message("Directory does not exist at: \n\t'", MODEL_ATTR_DIR, "'\nCreating directory at: \n\t'", MODEL_ATTR_DIR, "'") +# dir.create(MODEL_ATTR_DIR) +# } +# dir.create(MODEL_ATTR_DIR, showWarnings = FALSE) + +# ------------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------------- +# ----- Get the paths / locations of reference_features data ------ +# ------------------------------------------------------------------------------------- ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern list_ref_features <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information - S3_BUCKET="', ref_features_prefix , '" + S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '" # Regular expression pattern to match object keys PATTERN="reference_features.gpkg" @@ -88,8 +118,8 @@ list_ref_features <- paste0('#!/bin/bash ref_features <- system(list_ref_features, intern = TRUE) # ref features datasets -ref_features_keys <- paste0(ref_features_prefix, ref_features) -ref_features_files <- paste0(ref_features_dir, "gpkg/", ref_features) +ref_features_keys <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features) +ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features) ### ### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ### diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 492611f..5c4a713 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -1,47 +1,142 @@ -### EDIT base_dir, aws_profile, and DEM_URL ### +### EDIT BASE_DIR, AWS_PROFILE, and DEM_URL ### -# ---------------------------------------------------------------------------- +# --------------------------------------------------------------------------------- # ---- General paths and constants variables ---- -# ---------------------------------------------------------------------------- -base_dir <- '/Users/anguswatters/Desktop/lynker-spatial' +# - edit to match your local environment +# - BASE_DIR: base directory for local file storage +# - AWS_PROFILE: AWS profile to run CLI commands +# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for +# --------------------------------------------------------------------------------- -# AWS profile to run CLI commands -aws_profile <- "angus-lynker" +# Base directory for local file storage +BASE_DIR <- '/Users/anguswatters/Desktop/lynker-spatial' -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" +# AWS profile to run CLI commands +AWS_PROFILE <- "angus-lynker" # S3 prefix/folder of version run -version_prefix <- "v20.1" +VERSION <- "v20.1" + +# string to fill in "CS_SOURCE" column in output datasets +CS_SOURCE <- "hydrofabric3D" + +# name of bucket with nextgen data +S3_BUCKET_NAME <- "lynker-spatial" +S3_BUCKET_SUBDIR <- "hydrofabric" + +# AWS S3 bucket URI +S3_BUCKET_BASE_URI <- paste0("s3://", S3_BUCKET_NAME, "/") +S3_BUCKET_URI <- paste0(S3_BUCKET_BASE_URI, S3_BUCKET_SUBDIR, "/") +# S3_BUCKET_URI <- "s3://lynker-spatial/" + +# ------------------------------------------------------------------------------------- +# ---- S3 output directories ----- +# - transects +# - cross section points +# - ML cross section points +# ------------------------------------------------------------------------------------- + +# transect bucket prefix +S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") + +# cross section bucket prefix +S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/") + +# cross section bucket prefix +S3_CS_ML_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/") + +# ------------------------------------------------------------------------------------- +# ---- S3 nextgen data paths / directories ----- +# ------------------------------------------------------------------------------------- + +# the name of the folder in the S3 bucket with the nextgen data +S3_BUCKET_NEXTGEN_DIR <- paste0(VERSION, "/gpkg/") +# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" + +# full URI to the S3 bucket folder with the nextgen data +S3_BUCKET_NEXTGEN_DIR_URI <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR) + +# reference features S3 bucket prefix +S3_BUCKET_REF_FEATURES_URI <- paste0("s3://", S3_BUCKET_NAME, "/00_reference_features/gpkg/") +# S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" + +# ---------------------------------------------------------------------------- +# ---- Machine learning data path variables ---- +# ---------------------------------------------------------------------------- + +ML_OUTPUTS_S3_FILE <- "channel_ml_outputs.parquet" + +# ML_OUTPUTS_S3_DIR <- paste0(VERSION, "/3D/ml-outputs/") +# ML_OUTPUTS_S3_DIR <- "v20.1/3D/ml-outputs/" + +ML_OUTPUTS_S3_URI <- paste0(S3_BUCKET_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE) +# ML_OUTPUTS_S3_URI <- paste0(S3_BUCKET_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE) + +ML_OUTPUTS_PATH <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE) + +# path to the remote CONUS net parquet file +CONUS_NETWORK_FILE <- "conus_net.parquet" +CONUS_NETWORK_URI <- paste0(S3_BUCKET_URI, VERSION, "/", CONUS_NETWORK_FILE) + +# ---------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------- +# ---- Local directory / path variables ---- +# ------------------------------------------------------------------------------------- + +# directory to copy nextgen bucket data too +NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) +# NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/") + +# # model attributes directory +# MODEL_ATTR_DIR <- paste0(BASE_DIR, "/model_attributes/") + +# cross-section data model data directories +TRANSECTS_DIR <- paste0(BASE_DIR, "/01_transects/") +CS_PTS_DIR <- paste0(BASE_DIR, "/02_cs_pts/") + +# final output directory with geopackages per VPU +CS_OUTPUT_DIR <- paste0(BASE_DIR, "/cross_sections/") + +# directory to copy nextgen bucket data too +REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/") +REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/") + +# make a directory for the ML outputs data +ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") + +# ------------------------------------------------------------------------------------- +# ---- Create local directory / path variables (FEMA data) ---- +# ------------------------------------------------------------------------------------- # location of FEMA 100 year flood plain FGB files -FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" -FEMA_S3_BUCKET_PREFIX <- "FEMA100/" -FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX) +FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +FEMA_S3_BUCKET_PREFIX <- "FEMA100/" +FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX) # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) -FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100") -FEMA_GEOJSON_PATH <- paste0(base_dir, "/FEMA100_geojson") -FEMA_CLEAN_PATH <- paste0(base_dir, "/FEMA100_clean") -FEMA_GPKG_PATH <- paste0(base_dir, "/FEMA100_gpkg") -FEMA_GPKG_BB_PATH <- paste0(base_dir, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet +FEMA_FGB_PATH <- paste0(BASE_DIR, "/FEMA100") +FEMA_GEOJSON_PATH <- paste0(BASE_DIR, "/FEMA100_geojson") +FEMA_CLEAN_PATH <- paste0(BASE_DIR, "/FEMA100_clean") +FEMA_GPKG_PATH <- paste0(BASE_DIR, "/FEMA100_gpkg") +FEMA_GPKG_BB_PATH <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet -FEMA_BY_VPU_PATH <- paste0(base_dir, "/FEMA_BY_VPU") +FEMA_BY_VPU_PATH <- paste0(BASE_DIR, "/FEMA_BY_VPU") VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID -FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) +FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) # FEMA_VPU_SUBFOLDERS <- paste0( # FEMA_BY_VPU_PATH, "/VPU_", # unlist( -# lapply(list.files(nextgen_dir, full.names = FALSE), function(vpu_file_names) { +# lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) { # unlist(regmatches(vpu_file_names, gregexpr("\\d+[A-Za-z]*", vpu_file_names)))}) # ) # ) # ------------------------------------------------------------------------------------- -# ---- OVERWRITE_FEMA_FILES constant logical ---- +# ---- OVERWRITE_FEMA_FILES constant logicals---- # ---- > if TRUE, processing steps will be run again # and overwrite existing previously processed files +# TODO: Describe these variables # ------------------------------------------------------------------------------------- # Default is TRUE (i.e. a fresh processing run is done from start to finish) @@ -53,12 +148,12 @@ DELETE_STAGING_GPKGS <- TRUE # remove intermediary files from the main output f # ---------------------------------------------------------------------------- # DEM URL -DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" # scale argument for cross_section_pts() function. # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line # Default setting is 50% of the original transect lines length (0.5) -EXTENSION_PCT <- 0.5 +EXTENSION_PCT <- 0.5 # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not # 1% of the cross sections length is the default value we are using @@ -72,22 +167,7 @@ COLLECT_META <- TRUE # Where should meta data CSVs be saved to? # Local path to save CSVs of cross section meta data during each iteration # TODO: Probably delete this -META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' +META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' # META_PATH <- "/local/path/to/save/cross_section_meta_data/" -# ---------------------------------------------------------------------------- -# ---- Machine learning data path variables ---- -# ---------------------------------------------------------------------------- - -ML_OUTPUTS_FILE = "channel_ml_outputs.parquet" -ML_OUTPUTS_PREFIX = "v20.1/3D/ml-outputs/" -ML_OUTPUTS_URI = paste0(s3_bucket, ML_OUTPUTS_PREFIX, ML_OUTPUTS_FILE) -# ML_OUTPUTS_URI = "s3://lynker-spatial/v20.1/3D/ml-outputs/channel_ml_outputs.parquet" - -ML_OUTPUTS_PATH <- paste0(base_dir, "/ml-outputs/", ML_OUTPUTS_FILE) - -# path to the remote CONUS net parquet file -CONUS_NETWORK_FILENAME <- "conus_net.parquet" -CONUS_NETWORK_URI <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME) -### EDIT ### diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index 0606142..9fa2f8a 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -1,5 +1,5 @@ -# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "base_dir" -# base_dir is defined within runners/workflow/root_dir.R +# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R # NOTE: The lynker-hydrofabric S3 bucket is private at the moment @@ -75,7 +75,7 @@ fema_list_command <- paste0('#!/bin/bash PATTERN=".fgb$" # AWS CLI command to list objects in the S3 bucket and use grep to filter them - S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', aws_profile, ' | awk \'{print $4}\' | grep -E "$PATTERN") + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN") echo "$S3_OBJECTS"' ) @@ -99,7 +99,7 @@ for (key in FEMA_BUCKET_KEYS) { local_save_path <- paste0(FEMA_FGB_PATH, "/", key) if(!file.exists(local_save_path)) { - copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", aws_profile) + copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", AWS_PROFILE) message("S3 object:\n > '", FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, "'") message("Downloading S3 object to:\n > '", local_save_path, "'") diff --git a/runners/cs_runner/download_ml_outputs.R b/runners/cs_runner/download_ml_outputs.R new file mode 100644 index 0000000..a3751cc --- /dev/null +++ b/runners/cs_runner/download_ml_outputs.R @@ -0,0 +1,16 @@ +# download_ml_outputs.R +# This script pulls the ML outputs data from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" + +# load config variables +source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Get ML outputs data from S3 bucket ---- +# --------------------------------------------------------------------------- + +ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI))) + +message("Copying S3 object:\n", ML_OUTPUTS_S3_URI) +system(ml_copy_cmd) +message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!") +message("------------------") diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R index dd0b807..9c455ee 100644 --- a/runners/cs_runner/download_nextgen.R +++ b/runners/cs_runner/download_nextgen.R @@ -1,43 +1,17 @@ -# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "base_dir" -# base_dir is defined within runners/workflow/root_dir.R +# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R # load config variables source("runners/cs_runner/config_vars.R") -# name of S3 bucket -s3_bucket <- "s3://lynker-spatial/" - -# nextgen bucket folder name -nextgen_bucket_folder <- "v20.1/gpkg/" - -# nextgen bucket name -nextgen_prefix <- paste0(s3_bucket, nextgen_bucket_folder) - -# prerelease_prefix <- "s3://lynker-spatial/pre-release/" - -# # reference features S3 bucket prefix -# ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/" - -# nextgen model attributes folder in S3 bucket with parquet files -model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/") - # directory to copy nextgen bucket data too -nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder) +NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) # create the directory if it does NOT exist -if(!dir.exists(nextgen_dir)) { - message("Directory does not exist at: \n\t'", nextgen_dir, "'\nCreating directory at: \n\t'", nextgen_dir, "'") +if(!dir.exists(NEXTGEN_DIR)) { + message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'") - dir.create(nextgen_dir) -} - -# model attributes directory -model_attr_dir <- paste0(base_dir, "/model_attributes/") - -# create the directory if it does NOT exist -if(!dir.exists(model_attr_dir)) { - message("Directory does not exist at: \n\t'", model_attr_dir, "'\nCreating directory at: \n\t'", model_attr_dir, "'") - dir.create(model_attr_dir) + dir.create(NEXTGEN_DIR) } # --------------------------------------------------------------------------- @@ -47,8 +21,8 @@ if(!dir.exists(model_attr_dir)) { # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern command <- paste0('#!/bin/bash # AWS S3 Bucket and Directory information - S3_BUCKET="', nextgen_prefix, '" - DESTINATION_DIR=', nextgen_dir, ' + S3_BUCKET="', S3_BUCKET_NEXTGEN_DIR_URI, '" + DESTINATION_DIR=', NEXTGEN_DIR, ' # Regular expression pattern to match object keys PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\.gpkg$" @@ -66,8 +40,8 @@ bucket_keys <- system(command, intern = TRUE) # Parse the selected S3 objects keys and copy them to the destination directory for (key in bucket_keys) { - copy_cmd <- paste0('aws s3 cp ', nextgen_prefix, key, " ", nextgen_dir, key) - message("Copying S3 object:\n", paste0(nextgen_prefix, key)) + copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_NEXTGEN_DIR_URI, key, " ", NEXTGEN_DIR, key) + message("Copying S3 object:\n", paste0(S3_BUCKET_NEXTGEN_DIR_URI, key)) system(copy_cmd) @@ -75,87 +49,51 @@ for (key in bucket_keys) { message("------------------") } -# --------------------------------------------------------------------------- -# ---- List/Get nextgen model attributes parquets from S3 bucket ---- -# --------------------------------------------------------------------------- - -# aws s3 ls s3://lynker-spatial/v20/3D/model_attributes/ - -# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern -list_model_attr_cmd <- paste0('#!/bin/bash - # AWS S3 Bucket and Directory information - S3_BUCKET="', model_attr_prefix, '" - - # Regular expression pattern to match object keys - PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\_model_attributes.parquet$" - - # AWS CLI command to list objects in the S3 bucket and use grep to filter them - S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") - - echo "$S3_OBJECTS"' -) - -# get a list of the model attributes objects in S3 -model_attr_keys <- system(list_model_attr_cmd, intern = TRUE) - -# Parse the selected S3 objects keys and copy them to the destination directory -for (key in model_attr_keys) { - - copy_cmd <- paste0('aws s3 cp ', model_attr_prefix, key, ' ', model_attr_dir, key) - - message("Copying S3 object:\n", paste0(model_attr_prefix, key)) - - system(copy_cmd) - - message("Download '", paste0(model_attr_prefix, key), "' complete!") - message("------------------") -} - -# --------------------------------------------------------------------------- -# ---- List/Get reference features from S3 bucket ---- -# --------------------------------------------------------------------------- - -## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory - -# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern -list_ref_features <- paste0('#!/bin/bash - # AWS S3 Bucket and Directory information - S3_BUCKET="', ref_features_prefix , '" - - # Regular expression pattern to match object keys - PATTERN="reference_features.gpkg" - - S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") - - echo "$S3_OBJECTS"' -) - -# ---- Get a list of reference features geopackages geopackages ---- -# Run the script to get a list of the nextgen geopackages that matched the regular expression above -ref_features <- system(list_ref_features, intern = TRUE) - -## Download reference features geopackages and save them to a local directory -# Parse the selected S3 objects keys and copy them to the destination directory -for (key in ref_features) { - # paste0(ref_features_dir, "gpkg/") - copy_cmd <- paste0('aws s3 cp ', ref_features_prefix, key, ' ', paste0(ref_features_dir, "gpkg/"), key) - - message("Copying S3 object:\n", paste0(ref_features_prefix, key)) - system(copy_cmd) - - message("Download '", paste0(ref_features_dir, "gpkg/", key), "' complete!") - message("------------------") -} - -# --------------------------------------------------------------------------- -# ---- Get ML outputs data from S3 bucket ---- -# --------------------------------------------------------------------------- - -ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI))) - -message("Copying S3 object:\n", ML_OUTPUTS_URI) -system(ml_copy_cmd) - -message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI)), "' complete!") -message("------------------") - +# # --------------------------------------------------------------------------- +# # ---- List/Get reference features from S3 bucket ---- +# # --------------------------------------------------------------------------- +# +# ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory +# +# # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +# list_ref_features <- paste0('#!/bin/bash +# # AWS S3 Bucket and Directory information +# S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '" +# +# # Regular expression pattern to match object keys +# PATTERN="reference_features.gpkg" +# +# S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") +# +# echo "$S3_OBJECTS"' +# ) +# +# # ---- Get a list of reference features geopackages geopackages ---- +# # Run the script to get a list of the nextgen geopackages that matched the regular expression above +# ref_features <- system(list_ref_features, intern = TRUE) +# +# ## Download reference features geopackages and save them to a local directory +# # Parse the selected S3 objects keys and copy them to the destination directory +# for (key in ref_features) { +# # paste0(REF_FEATURES_DIR, "gpkg/") +# copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key) +# +# message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key)) +# system(copy_cmd) +# +# message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!") +# message("------------------") +# } +# +# # --------------------------------------------------------------------------- +# # ---- Get ML outputs data from S3 bucket ---- +# # --------------------------------------------------------------------------- +# +# ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI))) +# +# message("Copying S3 object:\n", ML_OUTPUTS_S3_URI) +# system(ml_copy_cmd) +# +# message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!") +# message("------------------") +# diff --git a/runners/cs_runner/download_ref_features.R b/runners/cs_runner/download_ref_features.R new file mode 100644 index 0000000..fc10e80 --- /dev/null +++ b/runners/cs_runner/download_ref_features.R @@ -0,0 +1,35 @@ +# download_ref_features.R +# This script pulls the reference features geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" + +# load config variables +source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- List/Get reference features from S3 bucket ---- +# --------------------------------------------------------------------------- + +# list objects in S3 bucket, and regular expression match to reference_features.gpkg pattern +list_ref_features <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI, '" + + # Regular expression pattern to match object keys + PATTERN="reference_features.gpkg" + + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# ---- Get a list of reference features geopackages ---- +# Run the script to get a list of the reference features geopackages that matched the regular expression above +ref_features <- system(list_ref_features, intern = TRUE) + +# Parse the selected S3 objects keys and copy them to the destination directory +for (key in ref_features) { + copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key) + message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key)) + system(copy_cmd) + message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!") + message("------------------") +} diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R index 13ea3ba..8e413eb 100644 --- a/runners/cs_runner/partition_fema_by_vpu.R +++ b/runners/cs_runner/partition_fema_by_vpu.R @@ -261,8 +261,8 @@ for (file_path in FEMA_gpkg_paths) { FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) # paths to nextgen datasets and model attribute parquet files -NEXTGEN_FILENAMES <- list.files(nextgen_dir, full.names = FALSE) -NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES) +NEXTGEN_FILENAMES <- list.files(NEXTGEN_DIR, full.names = FALSE) +NEXTGEN_FILE_PATHS <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES) for (file_path in FEMA_CLEAN_GPKG_PATHS) { diff --git a/runners/cs_runner/set_path_variables.R b/runners/cs_runner/set_path_variables.R new file mode 100644 index 0000000..b590024 --- /dev/null +++ b/runners/cs_runner/set_path_variables.R @@ -0,0 +1,10 @@ +# Generate the list of file paths for locally stored nextgen datasets: +# - NEXTGEN_FILES --> (list NEXTGEN_DIR files that were downloaded via download_nextgen.R) + + +# load config variables +source("runners/cs_runner/config_vars.R") + +# NOTE: SET VARIABLE FOR REST OF PROCESSING +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R index 8bfd145..c4495d7 100644 --- a/runners/cs_runner/utils.R +++ b/runners/cs_runner/utils.R @@ -1,3 +1,152 @@ +# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to +# make sure they are aligned and in the same order +# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# base is a character vector of the base directory of the files. Defaults to NULL +# Returns a dataframe with VPU, x, and y columns +align_files_by_vpu <- function( + x, + y, + base = NULL +) { + + # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period + regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*" + + # path dataframe for X filepaths + x_paths <- data.frame(x = x) + + # path dataframe for Y filepaths + y_paths <- data.frame(y = y) + + # generate VPU IDs based on file path regular expression matching with "regex_pattern" above + x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x) + y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y) + + # match paths based on VPU column + matched_paths <- dplyr::left_join( + x_paths, + y_paths, + by = "vpu" + ) + + # reorder columns + matched_paths <- dplyr::relocate(matched_paths, vpu, x, y) + + if(!is.null(base)) { + matched_paths$base_dir <- base + } + + return(matched_paths) + +} + +# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# flowlines: flowlines linestring sf object +# trans: transects linestring sf object +# waterbodies: waterbodies polygon sf object +# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies +### Returns a list of length 2 with updated "flowlines" and "transects" sf objects +wb_intersects <- function(flowlines, trans, waterbodies) { + + ######## ######## ######## ######## ######## ######## + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies (v2) ...") + wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos) # (NEW METHOD) + # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)]) # (OLD METHOD) + + # sum(lengths(wb_trans_index) == 0) + # length(wb_trans_index) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[lengths(wb_trans_index) == 0, ] # (NEW METHOD) + # trans_keep <- trans_check[!wb_trans_index, ] # (OLD METHOD) + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects + ) + ) + + # # within the transects lines that are on a flowline that crosses a waterbody, + # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + # trans_keep <- trans_check[!trans_wb_index, ] + # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + # + # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + # + # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # # have transects that are NOT in the waterbody + # to_keep <- dplyr::bind_rows(to_keep, to_check) + # + # # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # # that were determined to be valid (are being kept) + # check_ids <- unique(trans_check$tmp_id) + # keep_ids <- unique(trans_keep$tmp_id) + # + # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # valid_flowlines <- flowlines$id %in% to_keep$id + # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + # + # # return alist of updated flowlines and transects + # return( + # list( + # "valid_flowlines" = valid_flowlines, + # "valid_transects" = valid_transects + # ) + # ) +} + #' Get the polygons that interesect with any of the linestring geometries #' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes #' @param polygons polygon sf object. Default is NULL From 6c6b7a559a1bbdbda3bb0e4655f3e2b64a530d50 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 2 Aug 2024 09:05:44 -0700 Subject: [PATCH 54/64] removed duplicate S3_TRANSECTS_DIR variable declaration --- runners/cs_runner/01_transects.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 2c2ce2f..01bc78a 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -8,8 +8,8 @@ source("runners/cs_runner/utils.R") # library(sf) # install.packages("devtools") -# transect bucket prefix -S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) From 967f40b1bd7e2991e86a6a3e1fd4667066270160 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 26 Aug 2024 14:25:16 -0700 Subject: [PATCH 55/64] updated 02_cs_pts.R to use new ID based hydrofabric3D updates --- runners/cs_runner/02_cs_pts.R | 94 +++++++++++++++++++++++--------- runners/cs_runner/03_inject_ml.R | 10 +++- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index d28e43e..f91991b 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -1,8 +1,3 @@ - -############################################################################################################################## -################################################ REDO EVERYTHING ####################################################### -############################################################################################################################## - # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU source("runners/cs_runner/config.R") @@ -11,20 +6,14 @@ library(hydrofabric3D) library(dplyr) library(sf) -# # cross section bucket prefix -# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/") -# -# # transect bucket prefix -# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") - # paths to nextgen datasets -NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) # paths to nextgen datasets transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) transect_files <- transect_files[!grepl("updated_", transect_files)] -REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) # reference features dataframe ref_df <- data.frame( @@ -46,11 +35,10 @@ path_df <- align_files_by_vpu( # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 1:nrow(path_df)) { - # i = 8 - +for (i in 11:nrow(path_df)) { start <- Sys.time() - + # i = 8 + message("Using newest Hydrofabric3D!!!") # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) @@ -64,7 +52,9 @@ for (i in 1:nrow(path_df)) { ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) # current VPU being processed - VPU = path_df$vpu[i] + VPU <- path_df$vpu[i] + + start <- Sys.time() message("Creating VPU ", VPU, " cross section points:\n - flowpaths: '", nextgen_file, @@ -75,7 +65,6 @@ for (i in 1:nrow(path_df)) { ################### - # read in transects data transects <- sf::read_sf(transect_path) @@ -96,46 +85,94 @@ for (i in 1:nrow(path_df)) { gc() start_cs_pts <- Sys.time() + # # ------------------------------------------------------------------------ + # # ------ TESTING DATA ------- + # # ------------------------------------------------------------------------ + # flines <- + # flines %>% + # dplyr::slice(1:3500) + # + # transects <- + # transects %>% + # dplyr::filter(hy_id %in% flines$id) + + # ------------------------------------------------------------------------ + message("Extracting cross section points (", start_cs_pts, ")") # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 1: Extract cs points from DEM ---- # ---------------------------------------------------------------------------------------------------------------- - + # system.time({ + # get cross section point elevations cs_pts <- hydrofabric3D::cross_section_pts( + cs = transects, + crosswalk_id = "hy_id", points_per_cs = NULL, min_pts_per_cs = 10, dem = DEM_URL ) + # }) + # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- # ---------------------------------------------------------------------------------------------------------------- # system.time({ - + # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points cs_pts <- cs_pts %>% dplyr::group_by(hy_id, cs_id) %>% dplyr::filter(!any(is.na(Z))) %>% dplyr::ungroup() %>% - hydrofabric3D::classify_points(pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF) + hydrofabric3D::classify_points( + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) # }) ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_06.gpkg") + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_06.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_06.gpkg") + # # + + # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- # ---------------------------------------------------------------------------------------------------------------- + # dplyr::rename(flines, hy_id = id) + # profvis::profvis({ # system.time({ - # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief - fixed_pts <- hydrofabric3D::improve_invalid_cs( + + # # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief + # fixed_pts <- hydrofabric3D::improve_invalid_cs2( + # cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + # net = dplyr::rename(flines, hy_id = id), # original flowline network + # # net = flines, # original flowline network + # transects = transects, # original transect lines + # points_per_cs = NULL, + # min_pts_per_cs = 10, # number of points per cross sections + # dem = DEM_URL, # DEM to extract points from + # scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + # fix_ids = FALSE, + # verbose = TRUE + # ) + + + # system.time({ + fixed_pts <- hydrofabric3D::get_improved_cs_pts( cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() - net = flines, # original flowline network + net = dplyr::rename(flines, hy_id = id), # original flowline network + # net = flines, # original flowline network transects = transects, # original transect lines + crosswalk_id = "hy_id", points_per_cs = NULL, min_pts_per_cs = 10, # number of points per cross sections dem = DEM_URL, # DEM to extract points from @@ -146,6 +183,8 @@ for (i in 1:nrow(path_df)) { ) # }) + # fixed_pts2$is_extended %>% sum() + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more @@ -354,8 +393,9 @@ for (i in 1:nrow(path_df)) { message("Reclassifying cross section points...") fixed_pts <- hydrofabric3D::classify_points( - cs_pts = fixed_pts, - pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + cs_pts = fixed_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF ) ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index 0321003..834dec1 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -225,6 +225,10 @@ for (i in 1:nrow(path_df)) { DINGMAN_R = owp_dingman_r ) + # bankful_cs %>% + # dplyr::slice(1000:1200) %>% + # hydrofabric3D::plot_cs_pts(color = "point_type") + # sanity check that all rows are accounted for after splitting up data split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs) # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) @@ -242,7 +246,11 @@ for (i in 1:nrow(path_df)) { inchannel_cs <- hydrofabric3D::add_cs_bathymetry( cross_section_pts = inchannel_cs ) - + # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") + # ml_subset %>% + # dplyr::filter(hy_id == "wb-1005207") %>% + # dplyr::select(owp_y_inchan, owp_tw_inchan) %>% + # .$owp_y_inchan message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") # Add bathymetry using "bankful" estimates From 1998c4282c8090a80e2ed7787e21898c2ea52ba5 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 26 Aug 2024 14:26:47 -0700 Subject: [PATCH 56/64] added crosswalk_id paramater to classify_points function in ml_inject workflow --- runners/cs_runner/03_inject_ml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index 834dec1..9b34c03 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -288,7 +288,7 @@ for (i in 1:nrow(path_df)) { message(round(Sys.time()), " - Reclassifying cross section point types...") # reclassify - final_cs <- hydrofabric3D::classify_points(final_cs) + final_cs <- hydrofabric3D::classify_points(final_cs, crosswalk_id = "hy_id") # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids() # random_uids <- sample(x=final_uids, size=12) From 3ac563da09a9138fe55f856bf215f269f1ce9ccf Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 25 Sep 2024 15:50:51 -0700 Subject: [PATCH 57/64] setup new indepdent file (domain_with_fema.R) for generating a fema informed, set of transect lines and cross section points given a CONUS network and an AOI for subsetting that network --- runners/cs_runner/02_cs_pts.R | 75 ++-- runners/cs_runner/config.R | 245 ++----------- runners/cs_runner/config_vars.R | 69 ++++ runners/cs_runner/domain_with_fema.R | 471 +++++++++++++++++++++++++ runners/cs_runner/new_domain.R | 510 +++++++++++++++++++++++++++ runners/cs_runner/utils.R | 229 ++++++++++++ 6 files changed, 1346 insertions(+), 253 deletions(-) create mode 100644 runners/cs_runner/domain_with_fema.R create mode 100644 runners/cs_runner/new_domain.R diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index f91991b..4713c4b 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -35,10 +35,10 @@ path_df <- align_files_by_vpu( # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket -for (i in 11:nrow(path_df)) { +for (i in 20:nrow(path_df)) { + start <- Sys.time() - # i = 8 - message("Using newest Hydrofabric3D!!!") + # nextgen file and full path nextgen_file <- path_df$x[i] nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) @@ -64,13 +64,15 @@ for (i in 11:nrow(path_df)) { ) ################### - + message("Reading in transects...\n > ", transect_file) # read in transects data transects <- sf::read_sf(transect_path) + message("Reading in flowlines... \n > ", nextgen_file) # read in nextgen data flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + message("Reading in waterbodies... \n > ", ref_file) # read in waterbodies reference features layer waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") @@ -119,15 +121,31 @@ for (i in 11:nrow(path_df)) { # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- # ---------------------------------------------------------------------------------------------------------------- - + + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg") + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg") + + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg") + # cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() + # + # cs_pts %>% + # hydrofabric3D::drop_incomplete_cs_pts("hy_id") + # system.time({ # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points cs_pts <- + # cs_pts2 <- cs_pts %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::filter(!any(is.na(Z))) %>% - dplyr::ungroup() %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% hydrofabric3D::classify_points( crosswalk_id = "hy_id", pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF @@ -135,12 +153,11 @@ for (i in 11:nrow(path_df)) { # }) - ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id - # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_06.gpkg") - # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_06.gpkg") - # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_06.gpkg") - # # + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") # ---------------------------------------------------------------------------------------------------------------- @@ -167,20 +184,20 @@ for (i in 11:nrow(path_df)) { # system.time({ - fixed_pts <- hydrofabric3D::get_improved_cs_pts( - cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() - net = dplyr::rename(flines, hy_id = id), # original flowline network - # net = flines, # original flowline network - transects = transects, # original transect lines - crosswalk_id = "hy_id", - points_per_cs = NULL, - min_pts_per_cs = 10, # number of points per cross sections - dem = DEM_URL, # DEM to extract points from - scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked - pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" - fix_ids = FALSE, - verbose = TRUE - ) + fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = dplyr::rename(flines, hy_id = id), # original flowline network + # net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_URL, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) # }) # fixed_pts2$is_extended %>% sum() @@ -219,6 +236,7 @@ for (i in 11:nrow(path_df)) { # get the counts of each point type to add this data to the transects dataset point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts) + # point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, crosswalk_id = "hy_id") # # check the number of cross sections that were extended # fixed_pts$is_extended %>% table() @@ -387,7 +405,8 @@ for (i in 11:nrow(path_df)) { ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id message("Aligning banks and smoothing bottoms...") - fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts) + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts) + # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index a839f61..5a435b3 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -112,7 +112,27 @@ list_ref_features <- paste0('#!/bin/bash echo "$S3_OBJECTS"' ) +# -------------------------------------------------------------------------- +# ---- Create empty file structure for a "new_domain" ---- +# -------------------------------------------------------------------------- + +create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME) + +# -------------------------------------------------------------------------- +# ---- Create empty file structure for a "domain_with_fema" ---- +# -------------------------------------------------------------------------- + +create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME) + +# -------------------------------------------------------------------------- +# ---- Create empty file structure for a "new_conus_domain" ---- +# -------------------------------------------------------------------------- + +create_new_domain_dirs(BASE_DIR, NEW_CONUS_DOMAIN_DIRNAME) + +# -------------------------------------------------------------------------- # ---- Get a list of reference features geopackages ---- +# -------------------------------------------------------------------------- # Run the script to get a list of the nextgen geopackages that matched the regular expression above ref_features <- system(list_ref_features, intern = TRUE) @@ -120,228 +140,3 @@ ref_features <- system(list_ref_features, intern = TRUE) # ref features datasets ref_features_keys <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features) ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features) - -### -### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ### -### - -# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to -# make sure they are aligned and in the same order -# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string -# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string -# base is a character vector of the base directory of the files. Defaults to NULL -# Returns a dataframe with VPU, x, and y columns -align_files_by_vpu <- function( - x, - y, - base = NULL -) { - - # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period - regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*" - - # path dataframe for X filepaths - x_paths <- data.frame(x = x) - - # path dataframe for Y filepaths - y_paths <- data.frame(y = y) - - # generate VPU IDs based on file path regular expression matching with "regex_pattern" above - x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x) - y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y) - - # match paths based on VPU column - matched_paths <- dplyr::left_join( - x_paths, - y_paths, - by = "vpu" - ) - - # reorder columns - matched_paths <- dplyr::relocate(matched_paths, vpu, x, y) - - if(!is.null(base)) { - matched_paths$base_dir <- base - } - - return(matched_paths) - -} - -# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies -# flowlines: flowlines linestring sf object -# trans: transects linestring sf object -# waterbodies: waterbodies polygon sf object -# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies -### Returns a list of length 2 with updated "flowlines" and "transects" sf objects -wb_intersects <- function(flowlines, trans, waterbodies) { - - ######## ######## ######## ######## ######## ######## - - flowlines_geos <- geos::as_geos_geometry(flowlines) - wbs_geos <- geos::as_geos_geometry(waterbodies) - - # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps - trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) - - message("Checking flowlines against waterbodies...") - - # create an index between flowlines and waterbodies - wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) - - # remove any flowlines that cross more than 1 waterbody - to_keep <- flowlines[lengths(wb_index) == 0, ] - to_check <- flowlines[lengths(wb_index) != 0, ] - - # subset transects to the hy_ids in "to_check" set of flowlines - trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] - # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] - - # check where the transects linestrings intersect with the waterbodies - trans_geos_check <- geos::as_geos_geometry(trans_check) - - message("Checking transects against waterbodies (v2) ...") - wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos) # (NEW METHOD) - # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)]) # (OLD METHOD) - - # sum(lengths(wb_trans_index) == 0) - # length(wb_trans_index) - - # within the transects lines that are on a flowline that crosses a waterbody, - # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL - trans_keep <- trans_check[lengths(wb_trans_index) == 0, ] # (NEW METHOD) - # trans_keep <- trans_check[!wb_trans_index, ] # (OLD METHOD) - - # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies - to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] - - # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, - # have transects that are NOT in the waterbody - to_keep <- dplyr::bind_rows(to_keep, to_check) - - # 'tmp_ids' of transects that are being checked and also the transects within trans_check - # that were determined to be valid (are being kept) - check_ids <- unique(trans_check$tmp_id) - keep_ids <- unique(trans_keep$tmp_id) - - # logical vectors of which flowlines/transects to keep (KEEP == TRUE) - # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. - # - Keep original transects that are not on flowlines that intersect waterbodies AND - # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody - valid_flowlines <- flowlines$id %in% to_keep$id - valid_transects <- trans$tmp_id %in% dplyr::filter(trans, - !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id - - # return alist of updated flowlines and transects - return( - list( - "valid_flowlines" = valid_flowlines, - "valid_transects" = valid_transects - ) - ) - - # # within the transects lines that are on a flowline that crosses a waterbody, - # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL - # trans_keep <- trans_check[!trans_wb_index, ] - # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] - # - # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies - # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] - # - # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, - # # have transects that are NOT in the waterbody - # to_keep <- dplyr::bind_rows(to_keep, to_check) - # - # # 'tmp_ids' of transects that are being checked and also the transects within trans_check - # # that were determined to be valid (are being kept) - # check_ids <- unique(trans_check$tmp_id) - # keep_ids <- unique(trans_keep$tmp_id) - # - # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) - # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. - # # - Keep original transects that are not on flowlines that intersect waterbodies AND - # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody - # valid_flowlines <- flowlines$id %in% to_keep$id - # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, - # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id - # - # # return alist of updated flowlines and transects - # return( - # list( - # "valid_flowlines" = valid_flowlines, - # "valid_transects" = valid_transects - # ) - # ) -} - -# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies -# flowlines: flowlines linestring sf object -# trans: transects linestring sf object -# waterbodies: waterbodies polygon sf object -# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies -### Returns a list of length 2 with updated "flowlines" and "transects" sf objects -wb_intersects_v1 <- function(flowlines, trans, waterbodies) { - - ######## ######## ######## ######## ######## ######## - - flowlines_geos <- geos::as_geos_geometry(flowlines) - wbs_geos <- geos::as_geos_geometry(waterbodies) - - # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps - trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) - - message("Checking flowlines against waterbodies...") - - # create an index between flowlines and waterbodies - wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) - - # remove any flowlines that cross more than 1 waterbody - to_keep <- flowlines[lengths(wb_index) == 0, ] - to_check <- flowlines[lengths(wb_index) != 0, ] - - # subset transects to the hy_ids in "to_check" set of flowlines - trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] - # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] - - # check where the transects linestrings intersect with the waterbodies - trans_geos_check <- geos::as_geos_geometry(trans_check) - - message("Checking transects against waterbodies...") - trans_wb_index <- geos::geos_intersects_any( - trans_geos_check, - wbs_geos[unlist(wb_index)] - ) - - # within the transects lines that are on a flowline that crosses a waterbody, - # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL - trans_keep <- trans_check[!trans_wb_index, ] - # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] - - # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies - to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] - - # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, - # have transects that are NOT in the waterbody - to_keep <- dplyr::bind_rows(to_keep, to_check) - - # 'tmp_ids' of transects that are being checked and also the transects within trans_check - # that were determined to be valid (are being kept) - check_ids <- unique(trans_check$tmp_id) - keep_ids <- unique(trans_keep$tmp_id) - - # logical vectors of which flowlines/transects to keep (KEEP == TRUE) - # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. - # - Keep original transects that are not on flowlines that intersect waterbodies AND - # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody - valid_flowlines <- flowlines$id %in% to_keep$id - valid_transects <- trans$tmp_id %in% dplyr::filter(trans, - !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id - - # return alist of updated flowlines and transects - return( - list( - "valid_flowlines" = valid_flowlines, - "valid_transects" = valid_transects - ) - ) -} diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 5c4a713..dab3c25 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -170,4 +170,73 @@ COLLECT_META <- TRUE META_PATH <- '/Users/anguswatters/Desktop/cs_meta/' # META_PATH <- "/local/path/to/save/cross_section_meta_data/" +# ------------------------------------------------------------------------------------- +# ---- (New single domain) Local directory / path variables ---- +# ------------------------------------------------------------------------------------- + +# directory for new domain data +NEW_DOMAIN_DIRNAME <- "new_domain" +NEW_DOMAIN_DIR <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME) + +NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines" +NEW_DOMAIN_FLOWLINES_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME) + +NEW_DOMAIN_DEM_DIRNAME <- "dem" +NEW_DOMAIN_DEM_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME) + +NEW_DOMAIN_FLOWLINES_FILE <- "AllDiffusiveCombined.gpkg" +NEW_DOMAIN_FLOWLINES_PATH <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE) + +# # Local DEM file +# NEW_DOMAIN_DEM_FILE <- "hi_dem.tif" +# NEW_DOMAIN_DEM_PATH <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE) + +# Remote DEM file +NEW_DOMAIN_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +NEW_DOMAIN_TRANSECTS_DIRNAME <- "transects" +NEW_DOMAIN_CS_PTS_DIRNAME <- "cs_pts" +NEW_DOMAIN_CROSS_SECTIONS_DIRNAME <- "cross_sections" + +NEW_DOMAIN_TRANSECTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME) +NEW_DOMAIN_CS_PTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME) +NEW_DOMAIN_CROSS_SECTIONS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME) + +# ------------------------------------------------------------------------------------- +# ---- (New single domain) Local directory / path variables ---- +# ------------------------------------------------------------------------------------- + +# directory for new domain data +DOMAIN_WITH_FEMA_DIRNAME <- "domain_with_fema" +DOMAIN_WITH_FEMA_DIR <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME) + +DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines" +DOMAIN_WITH_FEMA_FLOWLINES_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME) + +DOMAIN_WITH_FEMA_SUBSET_DIRNAME <- "domain_subset" +DOMAIN_WITH_FEMA_SUBSET_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME) + +DOMAIN_WITH_FEMA_DEM_DIRNAME <- "dem" +DOMAIN_WITH_FEMA_DEM_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME) + +DOMAIN_WITH_FEMA_FLOWLINES_FILE <- "ls_conus.gpkg" +DOMAIN_WITH_FEMA_FLOWLINES_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE) + +# Geopackage containing area to subset flowlines to before processing +DOMAIN_WITH_FEMA_SUBSET_FILE <- "AllDiffusiveCombined.gpkg" +DOMAIN_WITH_FEMA_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE) + +# # Local DEM file +# DOMAIN_WITH_FEMA_DEM_FILE <- "hi_dem.tif" +# DOMAIN_WITH_FEMA_DEM_PATH <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE) + +# Remote DEM file +DOMAIN_WITH_FEMA_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME <- "transects" +DOMAIN_WITH_FEMA_CS_PTS_DIRNAME <- "cs_pts" +DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME <- "cross_sections" +DOMAIN_WITH_FEMA_TRANSECTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME) +DOMAIN_WITH_FEMA_CS_PTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME) +DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME) diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R new file mode 100644 index 0000000..b425de5 --- /dev/null +++ b/runners/cs_runner/domain_with_fema.R @@ -0,0 +1,471 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# # # # load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) +# install.packages("devtools") + +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") + +# paths to NEW DOMAIN datasets +# NEW_DOMAIN_FILES <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE) + +# Unique Flowline ID column name +CROSSWALK_ID <- "id" + +# --------------------------------------------------------------------- +# --- Read in flowlines +# --------------------------------------------------------------------- +# sf::st_layers(DOMAIN_WITH_FEMA_SUBSET_PATH) + +# Subsetting area +aoi <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides") +aoi <- rmapshaper::ms_simplify(aoi, keep = 0.05) + +id_col <- "id" +# id_col <- "divide_id" + +# query the conus.gpkg for matching IDs +ids <- unique(aoi[[id_col]]) +query_ids <- paste0(paste0("'", ids, "'"), collapse= ", ") + +gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +layer <- gpkg_layers$name[gpkg_layers$name == "flowpaths"] + +wkt <- + aoi %>% + rmapshaper::ms_dissolve() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sfc() %>% + sf::st_sf() %>% + sf::st_geometry() %>% + sf::st_as_text() + +# wkt <- sf::st_as_text(sf::st_geometry(aoi)) +# wkt <- +# aoi %>% +# # sf::st_bbox() %>% +# sf::st_as_sfc() %>% +# sf::st_sf() %>% +# sf::st_geometry() %>% +# sf::st_as_text() + +# length(wkt) + +# read in flowlines based on IDs in AOI +flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths", + wkt_filter = wkt + # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids), + # query = query + ) + +# --------------------------------------------------------------------- +# --- Split flowlines by VPU +# --------------------------------------------------------------------- + +# VPUs polygons +VPU_boundaries <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines)) + +# add a VPU ID column to each flowline +flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID") + +# set of unique VPUs +VPU_IDS <- unnest_ids(flines$VPUID) + +# all possible FEMA dirs +AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ] + +# flines %>% +# sf::st_drop_geometry() %>% +# dplyr::group_by(VPUID) %>% +# dplyr::count() +# unique(flines$VPUID) + +# calculate bankfull width +flines <- + flines %>% + dplyr::mutate( + bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm) + # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) + ) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + VPUID, + # hy_id = id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + mainstem + ) %>% + hydroloom::rename_geometry("geometry") + +# save the flowlines subset +DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg") +sf::write_sf( + flines, + DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH +) + +# split the flowlines into groups by VPU +fline_groups <- dplyr::group_split(flines, VPUID) + +# loop through each VPU group of flowlines and save a local copy +for (i in seq_along(fline_groups)) { + flowlines <- fline_groups[[i]] + VPU <- unique(flowlines$VPUID) + + # save the flowlines subset + vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg") + + sf::write_sf( + flowlines, + vpu_flowlines_path + ) +} + +for (i in seq_along(fline_groups)) { + + flowlines <- fline_groups[[i]] + VPU <- unique(flowlines$VPUID) + + message("(", i, "/", length(fline_groups), ")\n", + " > Processing flowlines in VPU group: '", VPU, "'") + + # unique VPUs for group + GROUP_VPU_IDS <- unnest_ids(flowlines$VPUID) + + # all FEMA dirs for the current area + GROUP_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ] + GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))] + # GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T) + # GROUP_FEMA_FILES <- GROUP_FEMA_FILES[grepl("_output.gpkg", GROUP_FEMA_FILES)] + + # create transect lines + transects <- hydrofabric3D::cut_cross_sections( + net = flowlines, # flowlines network + id = CROSSWALK_ID, # Unique feature ID + cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data + ) +# } + + # add cs_source column and rename cs_widths to cs_lengthm + transects <- + transects %>% + dplyr::mutate( + cs_source = CS_SOURCE + ) + + # --------------------------------------------------------------------- + # --- Extend transects out to FEMA 100yr floodplains + # --------------------------------------------------------------------- + message("Reading in FEMA polygons...") + + # unique(flowlines$VPUID) + + # fema polygons and transect lines + # fema <- sf::read_sf(vpu_fema_file) + + # read each FEMA geopackage into a list + fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) + + fema <- + fema %>% + dplyr::bind_rows() %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) + + message("Simplifying FEMA polygons...") + message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + + # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. + # keep 1% of the original points for speed + fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + + message(" - Number of geoms AFTER simplifying: ", nrow(fema)) + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + + transects <- + transects %>% + dplyr::left_join( + dplyr::select(sf::st_drop_geometry(flowlines), + dplyr::any_of(CROSSWALK_ID), + mainstem + ), + by = CROSSWALK_ID + ) + + # TODO: make sure this 3000m extension distance is appropriate across VPUs + # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = "mainstem", + max_extension_distance = 3000 + ) + + # mapview::mapview(transects, color = "green") + + # mapview::mapview(transects2, color = "red") + + transects <- + transects %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::mutate(is_extended = FALSE) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + cs_source, + cs_measure, + is_extended, + geometry + ) + + out_path <- paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/", + paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" + ) + + message("Writting transect lines for VPU group: '", VPU, "'", + "\n > '", out_path, "'") + + sf::write_sf(transects, out_path) + + message("Finished writting transects!") + +} +# --------------------------------------------------------------------- +# --- Bind together transects into single dataset +# --------------------------------------------------------------------- + +transect_paths <- list.files(DOMAIN_WITH_FEMA_TRANSECTS_DIR, full.names = T) +flowline_paths <- list.files(DOMAIN_WITH_FEMA_FLOWLINES_DIR, full.names = T) +flowline_paths <- flowline_paths[!basename(flowline_paths) %in% c("flowlines_subset.gpkg", "ls_conus.gpkg")] + +# read each transect geopackages into a list +transects <- lapply(transect_paths, function(gpkg) sf::read_sf(gpkg)) +transects[[5]] + +transects <- + transects %>% + dplyr::bind_rows() + +paths_df <- data.frame( + t = transect_paths, + f = flowline_paths +) %>% + dplyr::mutate( + vpu = gsub("_transects.gpkg", "", basename(t)) + ) + +for (i in 1:nrow(paths_df)) { + + VPU <- paths_df$vpu[i] + t_path <- paths_df$t[i] + f_path <- paths_df$f[i] + + message("Creating VPU ", VPU, " cross section points:", + "\n - flowpaths: '", f_path, "'", + "\n - transects: '", t_path, "'" + ) + + ################### + message("Reading in transects...\n > ", t_path) + # read in transects data + transects <- sf::read_sf(t_path) + + message("Reading in flowlines... \n > ", f_path) + # read in nextgen data + flines <- sf::read_sf(f_path) + + message("Extracting cross section points...") + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 1: Extract cs points from DEM ---- + # ---------------------------------------------------------------------------------------------------------------- + # system.time({ + # get cross section point elevations + cs_pts <- hydrofabric3D::cross_section_pts( + cs = transects, + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_URL + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- + # ---------------------------------------------------------------------------------------------------------------- + # cs_pts2 %>% + # dplyr::slice(1:200) %>% + # dplyr::rename(hy_id = id) %>% + # hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type") + + cs_pts <- + # cs_pts2 <- + cs_pts %>% + hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% + hydrofabric3D::classify_points( + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + # }) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id + # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- + # ---------------------------------------------------------------------------------------------------------------- + + # system.time({ + fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_URL, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) + # }) + + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Update transects with extended transects (if exists) ---- + # ---------------------------------------------------------------------------------------------------------------- + + out_transects <- match_transects_to_extended_cs_pts( + transect_lines = transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID + ) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Re enumerate the transects & cross section points "cs_id" ---- + # ---------------------------------------------------------------------------------------------------------------- + + # fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") + # out_transects <- hydrofabric3D:::renumber_cs_ids( + # df = dplyr::mutate(out_transects, pt_id = 1), + # crosswalk_id = "hy_id" + # ) %>% + # dplyr::select(-pt_id) + + fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) + out_transects <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 4: Update transects with extended transects (if exists) ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + # add Z_source column for source of elevation data + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + + ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + + message("Aligning banks and smoothing bottoms...") + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID) + + ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + + message("Reclassifying cross section points...") + + fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + + if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + } else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + } + + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + sf::write_sf( + out_transects, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/", VPU, "_transects.gpkg") + ) + + arrow::write_parquet( + fixed_pts, + paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/", VPU, "_cs_pts.gpkg") + ) + +} + +# TODO: Save all cross section points + transects into single geopackages (transects.gpkg, cs_pts.gpkg) + + diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R new file mode 100644 index 0000000..5aae38e --- /dev/null +++ b/runners/cs_runner/new_domain.R @@ -0,0 +1,510 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# # # # load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) +# install.packages("devtools") + +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") + +# paths to NEW DOMAIN datasets +# NEW_DOMAIN_FILES <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE) + +# --------------------------------------------------------------------- +# --- Read in flowlines +# --------------------------------------------------------------------- +# read in nextgen data +flines <- sf::read_sf(NEW_DOMAIN_FLOWLINES_PATH, layer = "flowpaths") + +VPUS <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines)) +vpu_flines_int <- sf::st_intersects(VPUS, flines) +vpu_aoi <- VPUS[lengths(vpu_flines_int) > 0, ] + +# VPU IDs of interest +VPU_IDS <- vpu_aoi$VPUID + +# sf::write_sf(aoi, "all_diffusive_combined_vpus.gpkg") +# VPUS[lengths(vpu_fl_int) > 0, ] %>% mapview::mapview() +# nhdplusTools::vpu_boundaries + +# flines %>% +# dplyr::filter(order >= 3) %>% +# # flines %>% +# sf::st_transform(4326) %>% +# mapview::mapview() +# # ggplot2::ggplot() + +# # ggplot2::geom_sf() +# +# +# flines %>% +# sf::st_transform(4326) %>% +# sf::st_bbox() %>% +# sf::st_as_sfc() %>% +# sf::st_as_sf() %>% +# mapview::mapview() + +# --------------------------------------------------------------------- +# --- Add bankful width estimate +# --------------------------------------------------------------------- + +# calculate bankfull width +flines <- + flines %>% + sf::st_transform(26904) %>% + dplyr::mutate( + bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) + ) %>% + dplyr::select( + hy_id = id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + mainstem, + geometry = geom + ) + +# --------------------------------------------------------------------- +# --- Create transect lines +# --------------------------------------------------------------------- +system.time({ + +# create transect lines +transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + id = "hy_id", # Unique feature ID + cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data +) + +}) + +# add cs_source column and rename cs_widths to cs_lengthm +transects <- + transects %>% + dplyr::mutate( + cs_source = CS_SOURCE + ) + +# --------------------------------------------------------------------- +# --- Extract cross section points from DEM +# --------------------------------------------------------------------- +# dem <- terra::rast(NEW_DOMAIN_DEM_PATH) + +system.time({ + +# get cross section point elevations +cs_pts <- hydrofabric3D::cross_section_pts( + + cs = transects, + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = NEW_DOMAIN_DEM_PATH +) + +}) + + +# --------------------------------------------------------------------- +# --- Classify cross section points +# --------------------------------------------------------------------- +# STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points + +cs_pts <- +# cs_pts2 <- + cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% + hydrofabric3D::classify_points( + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +# cs_pts3 <- +# cs_pts %>% +# # dplyr::group_by(hy_id, cs_id) %>% +# # dplyr::filter(!any(is.na(Z))) %>% +# # dplyr::ungroup() %>% +# hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% +# hydrofabric3D::classify_points2( +# crosswalk_id = "hy_id", +# pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +# ) +# +# old_plot <- +# cs_pts2 %>% +# dplyr::slice(1:100) %>% +# hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4)+ +# ggplot2::labs(title = "OLD CLASSES") +# +# new_plot <- +# cs_pts3 %>% +# dplyr::slice(1:100) %>% +# hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4) + +# ggplot2::labs(title = "NEW CLASSES") +# +# library(patchwork) +# old_plot / new_plot +# }) + +ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Improve cross section points based on the bank validity & amount of relief in a cross section +# ---------------------------------------------------------------------------------------------------------------- + +# system.time({ +fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = NEW_DOMAIN_DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE +) +# }) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- + +out_transects <- match_transects_to_extended_cs_pts( + transect_lines = transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = "hy_id" + ) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- + +# fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +# out_transects <- hydrofabric3D:::renumber_cs_ids( +# df = dplyr::mutate(out_transects, pt_id = 1), +# crosswalk_id = "hy_id" +# ) %>% +# dplyr::select(-pt_id) + +fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +out_transects <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = "hy_id") + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 4: Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- + +# classify the cross section points +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + +# add Z_source column for source of elevation data +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + +ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + +message("Aligning banks and smoothing bottoms...") +fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts) + +ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + +message("Reclassifying cross section points...") +fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + +if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") +} else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") +} + +if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") +} else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") +} + +if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") +} else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") +} + +sf::write_sf( + out_transects, + paste0(NEW_DOMAIN_TRANSECTS_DIR, "/hi_transects.gpkg"), +) + +arrow::write_parquet( + fixed_pts, + paste0(NEW_DOMAIN_CS_PTS_DIR, "/hi_cs_pts.parquet"), +) + +# ---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 4: Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- + +# get the counts of each point type to add this data to the transects dataset +point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts) + +# # check the number of cross sections that were extended +message("Subsetting cross section points generated after extending transects...") + +# extract cross section points that have an "is_extended" value of TRUE +extended_pts <- + fixed_pts %>% + dplyr::filter(is_extended) %>% + hydrofabric3D::add_tmp_id() + +# extract transects that have a "hy_id" in the "extended_pts" dataset +update_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) + +# if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages +if (nrow(update_transects) > 0) { + message("Updating ", nrow(update_transects), " transects") + + update_transects <- + update_transects %>% + # apply extend_by_percent function to each transect line: + hydrofabric3D:::extend_by_percent( + pct = EXTENSION_PCT, + length_col = "cs_lengthm" + ) + + # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() + # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") + # and then replace with old transects with the "update_transects" + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids + dplyr::bind_rows( # bring in the new updated extended transects + dplyr::mutate( + update_transects, + is_extended = TRUE + ) + ) + +} else { + + out_transects <- + transects %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +} + +# finalize new transects +out_transects <- + out_transects %>% + dplyr::left_join( + point_type_counts, + by = c("hy_id", "cs_id") + ) %>% + dplyr::left_join( + dplyr::ungroup( + dplyr::slice( + + dplyr::group_by( + dplyr::select(sf::st_drop_geometry(fixed_pts), + hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), + hy_id, cs_id), + 1) + ), + by = c("hy_id", "cs_id") + ) %>% + dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geom + ) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- +renumb <- hydrofabric3D:::renumber_cs_ids(fixed_pts, crosswalk_id = "hy_id") +unumb <- renumb %>% get_unique_tmp_ids() +nnumb <- fixed_pts %>% get_unique_tmp_ids() +all(unumb %in% nnumb) +all(nnumb %in% unumb) + + +length(unumb) +length(nnumb) + +# make a dataframe that has a new_cs_id column that has +# the cs_id renumbered to fill in any missing IDs, +# so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id +# The dataframe below will be used to join the "new_cs_id" with +# the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets +renumbered_ids <- + fixed_pts %>% + sf::st_drop_geometry() %>% + # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% + dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% + dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + hydrofabric3D::add_tmp_id() %>% + dplyr::group_by(hy_id) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::select(new_cs_id, tmp_id) + +# Renumber the transects to have correct CS IDs +out_transects2 <- dplyr::left_join( + hydrofabric3D::add_tmp_id(out_transects), + renumbered_ids, + by = "tmp_id" +) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::select(hy_id, cs_source, + cs_id = new_cs_id, + cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geometry = geom + ) + +# Renumber the cross sections points to have correct CS IDs +fixed_pts <- + dplyr::left_join( + hydrofabric3D::add_tmp_id(fixed_pts), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::rename(cs_id = new_cs_id) + +renumbered_ids <- + df %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(crosswalk_id), + cs_id, pt_id, cs_measure + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + # dplyr::group_by(hy_id) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + # tmp_id = paste0(hy_id, "_", cs_id) + ) %>% + add_tmp_id(x = get(crosswalk_id)) %>% + dplyr::ungroup() %>% + dplyr::select(new_cs_id, tmp_id) + +# Join the new cs_ids back with the final output data to replace the old cs_ids +df <- dplyr::left_join( + add_tmp_id(df, x = get(crosswalk_id)), + # dplyr::mutate(df,tmp_id = paste0(hy_id, "_", cs_id)), + renumbered_ids, + by = "tmp_id" +) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::rename("cs_id" = "new_cs_id") %>% + dplyr::relocate(dplyr::any_of(crosswalk_id), cs_id) +# dplyr::relocate(hy_id, cs_id) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Cross section points parquet to S3 ---- +# ---------------------------------------------------------------------------------------------------------------- + +# classify the cross section points +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + + + + + + + + + + + + + + diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R index c4495d7..9d27a61 100644 --- a/runners/cs_runner/utils.R +++ b/runners/cs_runner/utils.R @@ -1,3 +1,73 @@ +# Create an empty file structure +# base_dir: character, top level directory path +# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist) + +# Directory tree: +# base_dir/ +# └── domain_dirname/ +# ├── flowlines/ +# ├── dem/ +# ├── transects/ +# ├── cross_sections/ +# └── cs_pts/ +create_new_domain_dirs <- function(base_dir, domain_dirname) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + + create_if_not_exists <- function(dir_path) { + if (!dir.exists(dir_path)) { + dir.create(dir_path, recursive = TRUE) + message("Created directory: '", dir_path, "'\n") + } + } + + # create directories + create_if_not_exists(domain_dir) + create_if_not_exists(flowlines_dir) + create_if_not_exists(domain_subset_dir) + create_if_not_exists(dem_dir) + create_if_not_exists(transects_dir) + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cs_pts_dir) +} + +# get path strings for a domain dir (based of a base dir and domain dirname) +# NOTE: this does NOT guarentee that these folders exist, +# NOTE: it just gets the paths if they were created by create_new_domain_dirs() +get_new_domain_paths <- function(base_dir, domain_dirname) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + + # named list of file paths + return( + list( + base_dir = base_dir, + domain_dir = domain_dir, + flowlines_dir = flowlines_dir, + domain_subset_dir = domain_subset_dir, + dem_dir = dem_dir, + transects_dir = transects_dir, + cross_sections_dir = cross_sections_dir, + cs_pts_dir = cs_pts_dir + ) + ) + +} + # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to # make sure they are aligned and in the same order # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string @@ -147,6 +217,31 @@ wb_intersects <- function(flowlines, trans, waterbodies) { # ) } +add_intersects_ids <- function(x, y, id_col) { + # make sure the crs are tjhe same + y <- sf::st_transform(y, sf::st_crs(x)) + + # Perform the intersection + intersections <- sf::st_intersects(x, y) + + # add the intersected values to the first dataframe + x[[id_col]] <- unlist(lapply(intersections, function(idx) { + if (length(idx) > 0) { + paste0(unlist(y[[id_col]][idx]), collapse = ", ") + } else { + NA + } + })) + + return(x) +} + +unnest_ids <- function(ids) { + return( + unique(unlist(strsplit(unique(ids), ", "))) + ) +} + #' Get the polygons that interesect with any of the linestring geometries #' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes #' @param polygons polygon sf object. Default is NULL @@ -292,3 +387,137 @@ add_predicate_group_id <- function(polys, predicate) { } + +# utility function for getting transects extended and +# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement +# returns the extended version of the transects +match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { + # transect_lines = transects + # fixed_cs_pts = fixed_pts + # crosswalk_id = CROSSWALK_ID + + fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") + transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") + + # get the counts of each point type to add this data to the transect_lines dataset + point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, + crosswalk_id = crosswalk_id) + + # Check the number of cross sections that were extended + message("Subsetting cross section points generated after extending transect_lines...") + + # extract cross section points that have an "is_extended" value of TRUE + extended_pts <- + fixed_cs_pts %>% + dplyr::filter(is_extended) %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) + + # extended_pts %>% + # get_unique_tmp_ids() %>% + # length() + + # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset + update_transect_lines <- + transect_lines %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) + + cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) + + # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages + if (nrow(update_transect_lines) > 0) { + message("Updating ", nrow(update_transect_lines), " transect_lines") + + + update_transect_lines <- + update_transect_lines %>% + dplyr::rename(hy_id := !!sym(crosswalk_id)) + + update_transect_lines <- + update_transect_lines %>% + # apply extend_by_percent function to each transect line: + hydrofabric3D:::extend_by_percent( + pct = EXTENSION_PCT, + length_col = "cs_lengthm" + ) + + update_transect_lines <- + update_transect_lines %>% + dplyr::rename(!!sym(crosswalk_id) := hy_id) + + # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) + # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) + + # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() + # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") + # and then replace with old transect_lines with the "update_transect_lines" + out_transect_lines <- + transect_lines %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::filter(tmp_id %in% cs_pt_uids) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + dplyr::bind_rows( + dplyr::mutate(update_transect_lines, is_extended = TRUE) + ) + + # transect_lines %>% + # hydrofabric3D::add_tmp_id(x = "hy_id") %>% + # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% + # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points + # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids + # dplyr::bind_rows( # bring in the new updated extended transect_lines + # dplyr::mutate( + # update_transect_lines, + # is_extended = TRUE + # ) + # ) + } else { + # If no transect_lines were extended + out_transect_lines <- + transect_lines %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::filter(tmp_id %in% cs_pt_uids) %>% + # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% + dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) + } + + # Finalize new transect_lines + out_transect_lines <- + out_transect_lines %>% + dplyr::left_join( + point_type_counts, + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::left_join( + dplyr::ungroup( + dplyr::slice( + dplyr::group_by( + dplyr::select(sf::st_drop_geometry(fixed_cs_pts), + dplyr::any_of(crosswalk_id), + cs_id, bottom, left_bank, right_bank, valid_banks, has_relief + ), + dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) + ), + 1 + ) + ), + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::select( + dplyr::any_of(crosswalk_id), + cs_source, cs_id, cs_measure, cs_lengthm, + # sinuosity, + is_extended, + left_bank_count, right_bank_count, channel_count, bottom_count, + bottom, left_bank, right_bank, valid_banks, has_relief, + geometry + ) %>% + dplyr::mutate( + is_extended = ifelse(is.na(is_extended), FALSE, is_extended) + ) + + return(out_transect_lines) +} + + + From f8ff3d499ec5693c66a0f36bb3fb287d5388e658 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 27 Sep 2024 16:23:34 -0700 Subject: [PATCH 58/64] updated new domain runner to use new ML inputs and added a loop for replacing DEM points with diffusive DEM points where needed --- runners/cs_runner/03_inject_ml.R | 5 +- runners/cs_runner/config.R | 163 +++++- runners/cs_runner/config_vars.R | 43 ++ runners/cs_runner/domain_with_fema.R | 765 ++++++++++++++++++++++++++- runners/cs_runner/download_fema100.R | 2 +- runners/cs_runner/utils.R | 53 +- 6 files changed, 989 insertions(+), 42 deletions(-) diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index 9b34c03..8c4940b 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -296,8 +296,8 @@ for (i in 1:nrow(path_df)) { # tmp_id %in% random_uids) # hydrofabric3D::classify_points(cs_subset) %>% hydrofabric3D::plot_cs_pts(color = "point_type") - starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts) - ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs) + starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = "hy_id") + ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = "hy_id") has_same_number_of_uids <- length(starting_uids) == length(ending_uids) all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) @@ -374,5 +374,6 @@ for (i in 1:nrow(path_df)) { } + \ No newline at end of file diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index 5a435b3..c70320d 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -112,31 +112,174 @@ list_ref_features <- paste0('#!/bin/bash echo "$S3_OBJECTS"' ) +# -------------------------------------------------------------------------- +# ---- Get a list of reference features geopackages ---- +# -------------------------------------------------------------------------- + +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +ref_features <- system(list_ref_features, intern = TRUE) + +# ref features datasets +ref_features_keys <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features) +ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features) + # -------------------------------------------------------------------------- # ---- Create empty file structure for a "new_domain" ---- # -------------------------------------------------------------------------- -create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME) +create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME, with_output = TRUE) # -------------------------------------------------------------------------- # ---- Create empty file structure for a "domain_with_fema" ---- # -------------------------------------------------------------------------- -create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME) +create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME, with_output = TRUE) # -------------------------------------------------------------------------- -# ---- Create empty file structure for a "new_conus_domain" ---- +# ---- Get locations of ML parquet files in S3 --- # -------------------------------------------------------------------------- -create_new_domain_dirs(BASE_DIR, NEW_CONUS_DOMAIN_DIRNAME) +VPU_ML_BATHYMETRY_URIS <- unlist( + lapply(VPU_ML_BATHYMETRY_S3_DIRS, function(s3_dir) { + s3_file <- list_s3_objects(s3_dir, ".parquet$", AWS_PROFILE) + paste0(s3_dir, s3_file) + }) +) + +# ------------------------------------------------------------------------------------- +# ---- Download ML parquets for DOMAIN_WITH_FEMA ---- +# ------------------------------------------------------------------------------------- + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (s3_uri in VPU_ML_BATHYMETRY_URIS) { + # message(s3_uri) + # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=18/part-0.parquet" + # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=03W/part-0.parquet" + # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=21/" + + is_parquet <- endsWith(s3_uri, ".parquet") + vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) + + message("Checking S3 bucket for VPU ", vpu_id, " ML data...") + + if (is_parquet) { + s3_file <- basename(s3_uri) + # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) + new_file_name <- paste0(vpu_id, "_ml.parquet") + # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file)) + + local_save_path <- paste0(DOMAIN_WITH_FEMA_ML_DIR, "/", new_file_name) + + if(!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', s3_uri, + " ", local_save_path, " --profile ", AWS_PROFILE) + + message("S3 object:\n > '", s3_uri, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + # message("Copy command:\n > '", copy_cmd, "'") + + system(copy_cmd) + + message(" > '", new_file_name, "' download complete!") + message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") + } + + } else { + message("No S3 bucket ML data for VPU ", vpu_id, "...") + } + +} + +# VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) +# +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() # -------------------------------------------------------------------------- -# ---- Get a list of reference features geopackages ---- +# ---- Get locations of diffusive domain DEM files in S3 ---- # -------------------------------------------------------------------------- -# Run the script to get a list of the nextgen geopackages that matched the regular expression above -ref_features <- system(list_ref_features, intern = TRUE) +COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, + list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE) + ) + +# ------------------------------------------------------------------------------------- +# ---- Download diffusive domain DEM files from S3 for DOMAIN_WITH_FEMA ---- +# ------------------------------------------------------------------------------------- + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (s3_uri in COASTAL_BATHY_DEM_S3_URIS) { + message(s3_uri) + + is_tif <- endsWith(s3_uri, ".tif") + # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) + + message("Checking S3 bucket for DEM data...") + + if (is_tif) { + + s3_file <- basename(s3_uri) + # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri) + # new_file_name <- paste0(vpu_id, "_ml.parquet") + # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file)) + + local_save_path <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", s3_file) + + if (!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', s3_uri, + " ", local_save_path, " --profile ", AWS_PROFILE) + + message("S3 object:\n > '", s3_uri, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + # message("Copy command:\n > '", copy_cmd, "'") + + system(copy_cmd) + + message(" > '", s3_file, "' download complete!") + message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") + } + + } else { + message("No S3 bucket DEM data... ") + } + +} + +COASTAL_BATHY_DEM_PATHS <- list.files(DOMAIN_WITH_FEMA_DEM_DIR, full.names = TRUE) + + + + + + + + + + + + + + + + + + + + + + + + + + + -# ref features datasets -ref_features_keys <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features) -ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features) diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index dab3c25..7b86b59 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -236,7 +236,50 @@ DOMAIN_WITH_FEMA_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/S DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME <- "transects" DOMAIN_WITH_FEMA_CS_PTS_DIRNAME <- "cs_pts" DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME <- "cross_sections" +DOMAIN_WITH_FEMA_OUTPUT_DIRNAME <- "outputs" +DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME <- "vpu-subsets" +DOMAIN_WITH_FEMA_ML_DIRNAME <- "ml" DOMAIN_WITH_FEMA_TRANSECTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME) DOMAIN_WITH_FEMA_CS_PTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME) DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME) +DOMAIN_WITH_FEMA_OUTPUT_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME) +DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME) +DOMAIN_WITH_FEMA_ML_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME) + +ML_AUXILIARY_DATA_S3_URI <- paste0(S3_BUCKET_BASE_URI, "bathymetry/ml_auxiliary_data") +S3_BUCKET_BASE_URI + +LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI <- "s3://lynker-hydrofabric/" +ML_BATHYMETRY_S3_DATA_DIR <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/" +ML_BATHYMETRY_S3_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR) +VPU_ML_BATHYMETRY_S3_DIRS <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/") + +COASTAL_BATHY_DEM_S3_DIR <- "coastal_bathy/diffusive_domain/" +COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR) +# COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) + + +# # # -------------------------------------------------------------------------- +# # # ---- Get locations of diffusive domain DEM files in S3 ---- +# # # -------------------------------------------------------------------------- +# COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) +# +# # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, +# # list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE) +# # ) + + + + + + + + + + + + + + + diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R index b425de5..51ed175 100644 --- a/runners/cs_runner/domain_with_fema.R +++ b/runners/cs_runner/domain_with_fema.R @@ -2,17 +2,10 @@ source("runners/cs_runner/config.R") source("runners/cs_runner/utils.R") -# # # # load libraries +# load libraries # library(hydrofabric3D) # library(dplyr) # library(sf) -# install.packages("devtools") - -# # transect bucket prefix -# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") - -# paths to NEW DOMAIN datasets -# NEW_DOMAIN_FILES <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE) # Unique Flowline ID column name CROSSWALK_ID <- "id" @@ -63,6 +56,10 @@ flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths", # query = query ) +# bad_ids <- c("wb-14538", "wb-14686", "wb-14687") +# flines <- +# flines %>% +# dplyr::filter(id %in% bad_ids) # --------------------------------------------------------------------- # --- Split flowlines by VPU # --------------------------------------------------------------------- @@ -73,16 +70,31 @@ VPU_boundaries <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(fl # add a VPU ID column to each flowline flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID") +# TODO: improve this, manual remap some VPUIDs so that there are +# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected +flines <- + flines %>% + dplyr::mutate( + VPUID = dplyr::case_when( + VPUID == "12" ~ "11, 12, 13", + VPUID == "12, 13" ~ "11, 12, 13", + VPUID == "11, 12" ~ "11, 12, 13", + VPUID == "01" ~ "01, 02", + TRUE ~ VPUID + ) + ) + # set of unique VPUs VPU_IDS <- unnest_ids(flines$VPUID) +VPU_IDS # all possible FEMA dirs AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ] -# flines %>% -# sf::st_drop_geometry() %>% -# dplyr::group_by(VPUID) %>% -# dplyr::count() +flines %>% + sf::st_drop_geometry() %>% + dplyr::group_by(VPUID) %>% + dplyr::count() # unique(flines$VPUID) # calculate bankfull width @@ -92,6 +104,7 @@ flines <- bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm) # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) ) %>% + # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% dplyr::select( dplyr::any_of(CROSSWALK_ID), VPUID, @@ -115,11 +128,12 @@ fline_groups <- dplyr::group_split(flines, VPUID) # loop through each VPU group of flowlines and save a local copy for (i in seq_along(fline_groups)) { + flowlines <- fline_groups[[i]] VPU <- unique(flowlines$VPUID) # save the flowlines subset - vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg") + vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg") sf::write_sf( flowlines, @@ -149,7 +163,7 @@ for (i in seq_along(fline_groups)) { net = flowlines, # flowlines network id = CROSSWALK_ID, # Unique feature ID cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") - # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") num = 10, # number of cross sections per "id" linestring ("hy_id") smooth = TRUE, # smooth lines densify = 3, # densify linestring points @@ -241,7 +255,7 @@ for (i in seq_along(fline_groups)) { geometry ) - out_path <- paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/", + out_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" ) @@ -253,17 +267,18 @@ for (i in seq_along(fline_groups)) { message("Finished writting transects!") } + # --------------------------------------------------------------------- # --- Bind together transects into single dataset # --------------------------------------------------------------------- +vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T) -transect_paths <- list.files(DOMAIN_WITH_FEMA_TRANSECTS_DIR, full.names = T) -flowline_paths <- list.files(DOMAIN_WITH_FEMA_FLOWLINES_DIR, full.names = T) -flowline_paths <- flowline_paths[!basename(flowline_paths) %in% c("flowlines_subset.gpkg", "ls_conus.gpkg")] +transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)] +flowline_paths <- vpu_subset_paths[grepl("_flowlines.gpkg", vpu_subset_paths)] # read each transect geopackages into a list transects <- lapply(transect_paths, function(gpkg) sf::read_sf(gpkg)) -transects[[5]] +# transects[[5]] transects <- transects %>% @@ -278,7 +293,7 @@ paths_df <- data.frame( ) for (i in 1:nrow(paths_df)) { - + # i = 2 VPU <- paths_df$vpu[i] t_path <- paths_df$t[i] f_path <- paths_df$f[i] @@ -295,13 +310,14 @@ for (i in 1:nrow(paths_df)) { message("Reading in flowlines... \n > ", f_path) # read in nextgen data - flines <- sf::read_sf(f_path) + flowlines <- sf::read_sf(f_path) message("Extracting cross section points...") # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 1: Extract cs points from DEM ---- # ---------------------------------------------------------------------------------------------------------------- # system.time({ + # get cross section point elevations cs_pts <- hydrofabric3D::cross_section_pts( cs = transects, @@ -344,8 +360,8 @@ for (i in 1:nrow(paths_df)) { # system.time({ fixed_pts <- hydrofabric3D::get_improved_cs_pts( cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() - net = flines, # original flowline network - # net = flines, # original flowline network + net = flowlines, # original flowline network + # net = flowlines, # original flowline network transects = transects, # original transect lines crosswalk_id = CROSSWALK_ID, points_per_cs = NULL, @@ -456,16 +472,715 @@ for (i in 1:nrow(paths_df)) { sf::write_sf( out_transects, - paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/", VPU, "_transects.gpkg") + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") ) arrow::write_parquet( fixed_pts, - paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/", VPU, "_cs_pts.gpkg") + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") ) } # TODO: Save all cross section points + transects into single geopackages (transects.gpkg, cs_pts.gpkg) +# --------------------------------------------------------------------------------- +# ---- Merge all the transects / cs points into single files +# --------------------------------------------------------------------------------- + +vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T) + +# --------------------------------------------------------------------------------- +# ---- Read all transects and save to single gpkg ---- +# --------------------------------------------------------------------------------- + +transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)] + +transects <- lapply(transect_paths, function(gpkg) {sf::read_sf(gpkg) }) %>% + dplyr::bind_rows() %>% + hydroloom::rename_geometry("geometry") + +TRANSECTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") + +sf::write_sf( + transects, + TRANSECTS_OUTPUT_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Read all CS PTs and save to single parquet ---- +# --------------------------------------------------------------------------------- + +cs_pts_paths <- vpu_subset_paths[grepl("_cs_pts.parquet", vpu_subset_paths)] +cs_pts <- lapply(cs_pts_paths, function(prq) arrow::read_parquet(prq)) %>% + dplyr::bind_rows() + +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") + +arrow::write_parquet( + cs_pts, + CS_PTS_OUTPUT_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Inject ML predicted top widths / Dingman's R ---- +# --------------------------------------------------------------------------------- + +# --------------------------------------------------------------------------------- +# ---- Read in ML data ---- +# --------------------------------------------------------------------------------- +VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) + +ML_CROSSWALK_ID <- "id" + +ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { + vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) + arrow::read_parquet(prq) %>% + dplyr::mutate(vpu_id = vpu_id) +} +) %>% + dplyr::bind_rows() %>% + dplyr::select( + dplyr::any_of(ML_CROSSWALK_ID), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID + +# Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% + dplyr::distinct( + dplyr::across(dplyr::any_of(CROSSWALK_ID)), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# --------------------------------------------------------------------------------- +# ---- Read in CS PTS data ---- +# --------------------------------------------------------------------------------- +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") + +cs_pts <- arrow::read_parquet(CS_PTS_OUTPUT_PATH) + +# --------------------------------------------------------------------------------- +# ---- Join CS PTS with ML data --- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + +# ml_outputs %>% +# dplyr::group_by(id) %>% +# dplyr::count(id) %>% +# dplyr::arrange(-n) + +# join the ML outputs data to the cross section points +cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_outputs, + dplyr::any_of(CROSSWALK_ID), + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r + ), + by = CROSSWALK_ID + ) + +# --------------------------------------------------------------------------------- +# ---- Fixing negative depths/widths estimates ---- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + +cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID) + +# TODO: for now we replace any negative TW values with the length of the bottom of the cross section +# TODO: This method + the negative model output values both need to be looked into (04/05/2024) +cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c(CROSSWALK_ID, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + +# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data +# TODO: look at this stuff with Arash (04/09/2024) +missing_cs <- + cs_pts %>% + dplyr::filter( + is.na(.data[[CROSSWALK_ID]]) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r) + ) %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) + +# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" +# readr::write_csv( +# dplyr::select(missing_cs, -tmp_id), +# paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") +# ) + +# Split the cross sections into 2 groups: +# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model +# - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model +inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + +bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + + +# sanity check that all rows are accounted for after splitting up data +split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)) +# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + +if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) +} + +message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") +# tmp <- +# inchannel_cs %>% +# dplyr::slice(1:10000) +system.time({ + +# Add bathymetry using "inchannel" estimates +inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = tmp, + crosswalk_id = CROSSWALK_ID +) + +}) + +# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") +# ml_subset %>% +# dplyr::filter(hy_id == "wb-1005207") %>% +# dplyr::select(owp_y_inchan, owp_tw_inchan) %>% +# .$owp_y_inchan +message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") +system.time({ + +# Add bathymetry using "bankful" estimates +bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + # cross_section_pts = dplyr::slice(bankful_cs, 1:10000), + crosswalk_id = CROSSWALK_ID +) + +}) + +# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points +final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + # inchannel_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + bankful_cs, + # bankful_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + dplyr::mutate( + missing_cs, + is_dem_point = FALSE + ), + # -hf_id, + # -is_dem_point, + -dplyr::starts_with("owp"), + -tmp_id + ) + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet") +# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet") + + +message(round(Sys.time()), " - Reclassifying cross section point types...") + +# reclassify +final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID) +ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID) + +has_same_number_of_uids <- length(starting_uids) == length(ending_uids) +all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) +all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + +# throw some warnings if: +# - the number of uids in the input is different from the output +# - there are missing hy_id/cs_id +if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) +} + +if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + +} + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# --------------------------------------------------------------------------------- + +CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'") + +# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet) +arrow::write_parquet( + dplyr::select(final_cs, + -is_dem_point + ), + CROSS_SECTIONS_OUTPUT_PATH +) + +INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet") +arrow::write_parquet( + final_cs, + INTERNAL_CROSS_SECTIONS_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Substitue diffusive domain DEMs Z values ---- +# --------------------------------------------------------------------------------- +final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") + +bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) { + r <- terra::rast(path) + extent <- terra::ext(r) + # r <- terra::project(r, "EPSG:5070") + # terra::set.crs(r, "EPSG:5070") + ext_df <- data.frame(lapply(extent, function(i) {i})) + ext_df$crs <- terra::crs(r) + ext_df$file <- basename(path) + ext_df$path <- path + + return(ext_df) + }) %>% + dplyr::bind_rows() +# final_cs + +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet") +# arrow::write_parquet( +# final_cs, +# INTERNAL_CROSS_SECTIONS_PATH +# ) + +# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") +# +# arrow::write_parquet( +# final_cs, +# START_PATH +# ) + +# rm(i, cs, df, pts_subset, has_pts_in_bb, bb, updated_depths) + +for (i in 1:nrow(bb_df)) { + + # i = 1 + + message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...") + + df <- bb_df[i, ] + + START_EPSG_CODE <- 5070 + + cs <- arrow::read_parquet(START_PATH) + cs <- + cs %>% + # dplyr::slice(1:10000) %>% + sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE) + + # convert to bounding box CRS + cs <- + cs %>% + sf::st_transform(df$crs[1]) + + # create bounding box shape + bb <- sf::st_as_sf( + sf::st_as_sfc( + sf::st_bbox( + c(xmin = df$xmin, + xmax = df$xmax, + ymax = df$ymax, + ymin = df$ymin + ), + crs = sf::st_crs(df$crs) + ) + ) + ) + + # get pts that are in the bounding box + pts_subset <- sf::st_filter( + cs, + bb + ) + + has_pts_in_bb <- nrow(pts_subset) > 0 + + message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box") + + if(!has_pts_in_bb) { + message(" > No points to update!") + next + } + + if(has_pts_in_bb) { + + message(" > Loading Raster") + + # load DEM + dem <- terra::rast(df$path) + + message(" > Extracting new cross section depth values from DEM...") + + updated_depths <- + pts_subset %>% + dplyr::mutate( + Z2 = hydrofabric3D:::extract_pt_val(dem, .), + Z_source2 = df$file + # Z = extract_pt_val(terra::rast(dem), .) + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + # Z, + Z2, + # Z_source, + Z_source2 + ) + + message(" > Replacing old depth values with new depth values...") + cs <- + cs %>% + dplyr::left_join( + updated_depths, + by = c(CROSSWALK_ID, "cs_id", "pt_id") + ) %>% + dplyr::mutate( + Z = dplyr::case_when( + !is.na(Z2) ~ Z2, + TRUE ~ Z + ), + Z_source = dplyr::case_when( + !is.na(Z_source2) ~ Z_source2, + TRUE ~ Z_source + ) + ) %>% + dplyr::select(-Z2, -Z_source2) + + message(" > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...") + cs <- + cs %>% + sf::st_transform(START_EPSG_CODE) + + message(" > Dropping point geometries and preserving X / Y coordinates...") + + cs <- + cs %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, + Z, + class, point_type, + Z_source, + bottom, left_bank, right_bank, valid_banks, + has_relief + + # newly added columns (03/06/2024) + ) + + message(" > Overwritting original cross section points parquet with updated depth values ...") + message(" > '", START_PATH, "'") + + arrow::write_parquet(cs, START_PATH) + + } + + } + +cross_sections <- arrow::read_parquet(START_PATH) + +cross_sections <- + cross_sections %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# reclassify +# system.time({ +cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) +# }) + +# final_cross_sections %>% +# dplyr::filter(id == "wb-1000") %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(color = "point_type") + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# ---- Diffusive Domain DEM + FEMA + ML +# --------------------------------------------------------------------------------- + +CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") + +message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'") + +# sum(is.na(final_cross_sections$id)) +# sum(is.na(final_cross_sections$cs_id)) +# sum(is.na(final_cross_sections$pt_id)) +# sum(is.na(final_cross_sections$X)) + +# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet) +arrow::write_parquet( + # dplyr::select(final_cs, + # -is_dem_point + # ), + cross_sections, + CROSS_SECTIONS_ML_OUTPUT_PATH +) + +# cross_sections %>% +# dplyr::select(id, Z_source) %>% +# dplyr::group_by(Z_source) %>% +# dplyr::count(Z_source) %>% +# dplyr::arrange(-n) + +# CROSS_SECTIONS_ML_OUTPUT_PATH +# +# CROSS_SECTIONS_OUTPUT_PATH +# +# CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +# +# CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet") +# DEM_CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross_sections.parquet") +# +# cs <- arrow::read_parquet(CROSS_SECTIONS_PATH) %>% +# dplyr::rename(hy_id = id) +# +# dem_cs <- arrow::read_parquet(DEM_CROSS_SECTIONS_PATH) %>% +# dplyr::rename(hy_id = id) +# +# z_ids <- +# cs %>% +# dplyr::group_by(Z_source) %>% +# dplyr::slice(1:4) %>% +# dplyr::pull(hy_id) +# +# # subset_ids <- z_ids[1:1] +# # z_ids <- z_ids[1:2] +# # for (i in seq_along(z_ids)) { +# for (i in seq_along(z_ids)) { +# message(i) +# # i = 1 +# subset_id <- z_ids[i] +# +# new_cs <- +# cs %>% +# dplyr::filter(hy_id %in% subset_id) +# +# old_cs <- +# dem_cs %>% +# dplyr::filter(hy_id %in% subset_id) +# +# title_str <- paste0( +# unique(new_cs$Z_source)[1], " vs. ", +# unique(old_cs$Z_source)[1] +# ) +# +# save_path = paste0( +# "/Users/anguswatters/Desktop/z_source_plots/", +# gsub(".tif", "", unique(new_cs$Z_source)[1]), +# "_vs_", +# unique(old_cs$Z_source)[1], "_", i, ".png" +# ) +# +# message("making plot at \n > '", save_path, "'") +# +# comp_plot <- dplyr::bind_rows( +# new_cs, +# old_cs +# ) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = Z_source +# ), +# size = 3, +# alpha = 0.9 +# ) + +# ggplot2::labs(title = title_str) + +# ggplot2::facet_wrap(hy_id~cs_id) + +# ggplot2::theme_bw() + +# ggplot2::theme( +# plot.title = ggplot2::element_text(size = 14), +# legend.text = ggplot2::element_text(size = 14), +# legend.title = ggplot2::element_text(size = 14, face = "bold") +# ) +# +# ggplot2::ggsave(plot = comp_plot, +# filename = save_path, +# scale = 1 +# ) +# +# } +# +# +# dplyr::bind_rows( +# cs %>% +# dplyr::filter(hy_id %in% subset_ids), +# dem_cs %>% +# dplyr::filter(hy_id %in% subset_ids) +# +# ) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = Z_source +# +# )) + +# ggplot2::labs(title = "al_nwfl_ncei_1 vs hydrofabric3D") + +# ggplot2::facet_wrap(hy_id~cs_id) +# +# cs %>% +# dplyr::filter(hy_id %in% subset_ids) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = point_type +# +# )) + +# ggplot2::facet_wrap(hy_id~cs_id) +# +# +# dem_cs %>% +# dplyr::filter(hy_id %in% subset_ids) %>% +# ggplot2::ggplot() + +# ggplot2::geom_point(ggplot2::aes(x = pt_id, +# y = Z, +# color = point_type +# +# )) + +# ggplot2::facet_wrap(hy_id~cs_id) +# +# +# +# +# +# + + + + + + + + + + + + diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R index 9fa2f8a..b23b438 100644 --- a/runners/cs_runner/download_fema100.R +++ b/runners/cs_runner/download_fema100.R @@ -105,7 +105,7 @@ for (key in FEMA_BUCKET_KEYS) { message("Downloading S3 object to:\n > '", local_save_path, "'") # message("Copy command:\n > '", copy_cmd, "'") - # system(copy_cmd) + system(copy_cmd) message(" > '", key, "' download complete!") message("----------------------------------") diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R index 9d27a61..e52352b 100644 --- a/runners/cs_runner/utils.R +++ b/runners/cs_runner/utils.R @@ -10,7 +10,7 @@ # ├── transects/ # ├── cross_sections/ # └── cs_pts/ -create_new_domain_dirs <- function(base_dir, domain_dirname) { +create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) { # build paths domain_dir <- paste0(base_dir, "/", domain_dirname) @@ -20,6 +20,11 @@ create_new_domain_dirs <- function(base_dir, domain_dirname) { transects_dir <- paste0(domain_dir, "/transects") cross_sections_dir <- paste0(domain_dir, "/cross_sections") cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + + if(with_output) { + output_dir <- paste0(domain_dir, "/outputs") + } create_if_not_exists <- function(dir_path) { if (!dir.exists(dir_path)) { @@ -36,12 +41,18 @@ create_new_domain_dirs <- function(base_dir, domain_dirname) { create_if_not_exists(transects_dir) create_if_not_exists(cross_sections_dir) create_if_not_exists(cs_pts_dir) + create_if_not_exists(vpu_subsets_dir) + + if(with_output) { + create_if_not_exists(output_dir) + } + } # get path strings for a domain dir (based of a base dir and domain dirname) # NOTE: this does NOT guarentee that these folders exist, # NOTE: it just gets the paths if they were created by create_new_domain_dirs() -get_new_domain_paths <- function(base_dir, domain_dirname) { +get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) { # build paths domain_dir <- paste0(base_dir, "/", domain_dirname) @@ -51,6 +62,9 @@ get_new_domain_paths <- function(base_dir, domain_dirname) { transects_dir <- paste0(domain_dir, "/transects") cross_sections_dir <- paste0(domain_dir, "/cross_sections") cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + output_dir <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA) + # named list of file paths return( @@ -62,12 +76,41 @@ get_new_domain_paths <- function(base_dir, domain_dirname) { dem_dir = dem_dir, transects_dir = transects_dir, cross_sections_dir = cross_sections_dir, - cs_pts_dir = cs_pts_dir + cs_pts_dir = cs_pts_dir, + vpu_subsets_dir = vpu_subsets_dir, + output_dir = output_dir ) ) } +list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { + + profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else "" + + if (is.null(pattern) || pattern == "") { + grep_command <- "" # no filtering if empty or NULL + } else { + grep_command <- paste0(" | grep -E \"", pattern, "\"") # grep if a pattern is given + } + + cmd <- paste0( + '#!/bin/bash\n', + 'S3_BUCKET="', s3_bucket, '"\n', + 'PATTERN="', pattern, '"\n', + 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n', + 'echo "$S3_OBJECTS"' + ) + # cmd <- paste0( + # '#!/bin/bash\n', + # 'S3_BUCKET="', s3_bucket, '"\n', + # 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n', + # 'echo "$S3_OBJECTS"' + # ) + ls_output <- system(cmd, intern = TRUE) + return(ls_output) +} + # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to # make sure they are aligned and in the same order # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string @@ -392,6 +435,7 @@ add_predicate_group_id <- function(polys, predicate) { # matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement # returns the extended version of the transects match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { + # transect_lines = transects # fixed_cs_pts = fixed_pts # crosswalk_id = CROSSWALK_ID @@ -402,7 +446,6 @@ match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, cro # get the counts of each point type to add this data to the transect_lines dataset point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, crosswalk_id = crosswalk_id) - # Check the number of cross sections that were extended message("Subsetting cross section points generated after extending transect_lines...") @@ -441,6 +484,8 @@ match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, cro length_col = "cs_lengthm" ) + update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") + update_transect_lines <- update_transect_lines %>% dplyr::rename(!!sym(crosswalk_id) := hy_id) From 3b61de2221ae193665d48748cf73bb71ce7863d5 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 30 Sep 2024 21:38:11 -0700 Subject: [PATCH 59/64] made a temp bathy.R file that ill delete tomorrow that was for running cs point generation from terminal --- runners/cs_runner/02_cs_pts.R | 4 +- runners/cs_runner/bathy.R | 337 +++++++++++++++++++++++++++ runners/cs_runner/config_vars.R | 3 +- runners/cs_runner/domain_with_fema.R | 86 +++++-- 4 files changed, 407 insertions(+), 23 deletions(-) create mode 100644 runners/cs_runner/bathy.R diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 4713c4b..264a2ab 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -200,8 +200,10 @@ for (i in 20:nrow(path_df)) { ) # }) + # transects %>% + # sf::st_drop_geometry() %>% + # dplyr::group_by(id, cs_id) # fixed_pts2$is_extended %>% sum() - ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more diff --git a/runners/cs_runner/bathy.R b/runners/cs_runner/bathy.R new file mode 100644 index 0000000..7a550f9 --- /dev/null +++ b/runners/cs_runner/bathy.R @@ -0,0 +1,337 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) + +# Unique Flowline ID column name +CROSSWALK_ID <- "id" + +VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) + +ML_CROSSWALK_ID <- "id" + +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() %>% +# dplyr::select( +# dplyr::any_of(ML_CROSSWALK_ID), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +# ml_outputs <- +# ml_outputs %>% +# dplyr::distinct( +# dplyr::across(dplyr::any_of(CROSSWALK_ID)), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) + +# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +# rm(ml_outputs, ml) +ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml") + +ml_outputs <- + ml_outputs %>% + dplyr::select( + dplyr::any_of(ML_CROSSWALK_ID), + vpuid, + owp_y_bf = YCC, + owp_y_inchan = Y, + owp_tw_bf = TopWdthCC, + owp_tw_inchan = TopWdth, + owp_dingman_r = dingman_r + ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% + dplyr::distinct( + dplyr::across(dplyr::any_of(CROSSWALK_ID)), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# --------------------------------------------------------------------------------- +# ---- Read in CS PTS data ---- +# --------------------------------------------------------------------------------- +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") + +cs_pts <- arrow::read_parquet(CS_PTS_OUTPUT_PATH) + +# --------------------------------------------------------------------------------- +# ---- Join CS PTS with ML data --- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + +# ml_outputs %>% +# dplyr::group_by(id) %>% +# dplyr::count(id) %>% +# dplyr::arrange(-n) + +# join the ML outputs data to the cross section points +cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_outputs, + dplyr::any_of(CROSSWALK_ID), + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r + ), + by = CROSSWALK_ID + ) + +# --------------------------------------------------------------------------------- +# ---- Fixing negative depths/widths estimates ---- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + +cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID) + +# TODO: for now we replace any negative TW values with the length of the bottom of the cross section +# TODO: This method + the negative model output values both need to be looked into (04/05/2024) +cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c(CROSSWALK_ID, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + +# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data +# TODO: look at this stuff with Arash (04/09/2024) +missing_cs <- + cs_pts %>% + dplyr::filter( + is.na(.data[[CROSSWALK_ID]]) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r) + ) %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) + +# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" +# readr::write_csv( +# dplyr::select(missing_cs, -tmp_id), +# paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") +# ) + +# Split the cross sections into 2 groups: +# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model +# - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model +inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + # dplyr::slice(1:1000) + +bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + # dplyr::slice(1:1000) + +# # Replace any topwidth values that are GREATER than the actual cross section length (meters) +# bankful_cs2 <- hydrofabric3D:::fix_oversized_topwidths( +# cross_section_pts = bankful_cs2, +# crosswalk_id = CROSSWALK_ID +# ) +# bankful_cs %>% +# dplyr::filter(TW >= cs_lengthm) %>% +# dplyr::select(TW) + +# sanity check that all rows are accounted for after splitting up data +split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)) +# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + +if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) +} + +message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") +# tmp <- +# inchannel_cs %>% +# dplyr::slice(1:10000) +# system.time({ + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = tmp, + crosswalk_id = CROSSWALK_ID + ) + +# }) + + gc() + + +# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") +# ml_subset %>% +# dplyr::filter(hy_id == "wb-1005207") %>% +# dplyr::select(owp_y_inchan, owp_tw_inchan) %>% +# .$owp_y_inchan +message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") +# system.time({ + + # Add bathymetry using "bankful" estimates + bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + # cross_section_pts = dplyr::slice(bankful_cs, 1:10000), + crosswalk_id = CROSSWALK_ID + ) + +# }) + +# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points +final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + # inchannel_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + bankful_cs, + # bankful_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + dplyr::mutate( + missing_cs, + is_dem_point = FALSE + ), + # -hf_id, + # -is_dem_point, + -dplyr::starts_with("owp"), + -tmp_id + ) +) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet") +# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet") + + +message(round(Sys.time()), " - Reclassifying cross section point types...") + +# reclassify +final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID) +ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID) + +has_same_number_of_uids <- length(starting_uids) == length(ending_uids) +all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) +all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + +# throw some warnings if: +# - the number of uids in the input is different from the output +# - there are missing hy_id/cs_id +if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) +} + +if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + +} +tmp_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs.parquet") +message("saving file", tmp_path) + +arrow::write_parquet( + final_cs, + tmp_path +) + diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 7b86b59..d95e534 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -248,8 +248,7 @@ DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOM DOMAIN_WITH_FEMA_ML_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME) ML_AUXILIARY_DATA_S3_URI <- paste0(S3_BUCKET_BASE_URI, "bathymetry/ml_auxiliary_data") -S3_BUCKET_BASE_URI - +# S3_BUCKET_BASE_URI LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI <- "s3://lynker-hydrofabric/" ML_BATHYMETRY_S3_DATA_DIR <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/" ML_BATHYMETRY_S3_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR) diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R index 51ed175..ec13e6e 100644 --- a/runners/cs_runner/domain_with_fema.R +++ b/runners/cs_runner/domain_with_fema.R @@ -533,32 +533,63 @@ VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) ML_CROSSWALK_ID <- "id" -ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { - vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) - arrow::read_parquet(prq) %>% - dplyr::mutate(vpu_id = vpu_id) -} -) %>% - dplyr::bind_rows() %>% +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() %>% +# dplyr::select( +# dplyr::any_of(ML_CROSSWALK_ID), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +# ml_outputs <- + # ml_outputs %>% + # dplyr::distinct( + # dplyr::across(dplyr::any_of(CROSSWALK_ID)), + # vpu_id, + # owp_y_bf, owp_y_inchan, + # owp_tw_bf, owp_tw_inchan, + # owp_dingman_r + # ) + +# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +# rm(ml_outputs, ml) +ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml") + +ml_outputs <- + ml_outputs %>% dplyr::select( dplyr::any_of(ML_CROSSWALK_ID), - vpu_id, - owp_y_bf, owp_y_inchan, - owp_tw_bf, owp_tw_inchan, - owp_dingman_r + vpuid, + owp_y_bf = YCC, + owp_y_inchan = Y, + owp_tw_bf = TopWdthCC, + owp_tw_inchan = TopWdth, + owp_dingman_r = dingman_r ) - -# rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS -# TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID - -# Keep only distinct ID rows -ml_outputs <- - ml_outputs %>% +# +# # Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% dplyr::distinct( dplyr::across(dplyr::any_of(CROSSWALK_ID)), - vpu_id, - owp_y_bf, owp_y_inchan, + vpu_id, + owp_y_bf, owp_y_inchan, owp_tw_bf, owp_tw_inchan, owp_dingman_r ) @@ -699,6 +730,8 @@ inchannel_cs <- hydrofabric3D::add_cs_bathymetry( }) + + # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") # ml_subset %>% # dplyr::filter(hy_id == "wb-1005207") %>% @@ -803,6 +836,19 @@ if (!all_starting_uids_in_ending_uids) { } +# tmp_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# message("saving file", tmp_path) +# +# final_cs <- arrow::read_parquet(tmp_path) +# +# # save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet) +# arrow::write_parquet( +# dplyr::select(final_cs, +# -is_dem_point +# ), +# paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# ) + # --------------------------------------------------------------------------------- # ---- Write final cross section points data ---- # --------------------------------------------------------------------------------- From 7c8ad1026901fbfe2eae4c2d7bbe792a1b9b7a8a Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 8 Oct 2024 10:16:24 -0700 Subject: [PATCH 60/64] removed excess code from new_domain.R cs runner file --- runners/cs_runner/config_vars.R | 2 +- runners/cs_runner/new_domain.R | 219 +------------------------------- 2 files changed, 2 insertions(+), 219 deletions(-) diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index d95e534..1a834a4 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -239,7 +239,7 @@ DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME <- "cross_sections" DOMAIN_WITH_FEMA_OUTPUT_DIRNAME <- "outputs" DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME <- "vpu-subsets" DOMAIN_WITH_FEMA_ML_DIRNAME <- "ml" - +# aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt DOMAIN_WITH_FEMA_TRANSECTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME) DOMAIN_WITH_FEMA_CS_PTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME) DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME) diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R index 5aae38e..4d2f223 100644 --- a/runners/cs_runner/new_domain.R +++ b/runners/cs_runner/new_domain.R @@ -290,221 +290,4 @@ arrow::write_parquet( # ---------------------------------------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------------------------------------- -# ---------------------------------------------------------------------------------------------------------------- - -# ---------------------------------------------------------------------------------------------------------------- -# ---- STEP 4: Update transects with extended transects (if exists) ---- -# ---------------------------------------------------------------------------------------------------------------- - -# get the counts of each point type to add this data to the transects dataset -point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts) - -# # check the number of cross sections that were extended -message("Subsetting cross section points generated after extending transects...") - -# extract cross section points that have an "is_extended" value of TRUE -extended_pts <- - fixed_pts %>% - dplyr::filter(is_extended) %>% - hydrofabric3D::add_tmp_id() - -# extract transects that have a "hy_id" in the "extended_pts" dataset -update_transects <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) - -# if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages -if (nrow(update_transects) > 0) { - message("Updating ", nrow(update_transects), " transects") - - update_transects <- - update_transects %>% - # apply extend_by_percent function to each transect line: - hydrofabric3D:::extend_by_percent( - pct = EXTENSION_PCT, - length_col = "cs_lengthm" - ) - - # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() - # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go") - # and then replace with old transects with the "update_transects" - out_transects <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% - dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids - dplyr::bind_rows( # bring in the new updated extended transects - dplyr::mutate( - update_transects, - is_extended = TRUE - ) - ) - -} else { - - out_transects <- - transects %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) -} - -# finalize new transects -out_transects <- - out_transects %>% - dplyr::left_join( - point_type_counts, - by = c("hy_id", "cs_id") - ) %>% - dplyr::left_join( - dplyr::ungroup( - dplyr::slice( - - dplyr::group_by( - dplyr::select(sf::st_drop_geometry(fixed_pts), - hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), - hy_id, cs_id), - 1) - ), - by = c("hy_id", "cs_id") - ) %>% - dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, - # sinuosity, - is_extended, - left_bank_count, right_bank_count, channel_count, bottom_count, - bottom, left_bank, right_bank, valid_banks, has_relief, - geom - ) - -# ---------------------------------------------------------------------------------------------------------------- -# ---- Re enumerate the transects & cross section points "cs_id" ---- -# ---------------------------------------------------------------------------------------------------------------- -renumb <- hydrofabric3D:::renumber_cs_ids(fixed_pts, crosswalk_id = "hy_id") -unumb <- renumb %>% get_unique_tmp_ids() -nnumb <- fixed_pts %>% get_unique_tmp_ids() -all(unumb %in% nnumb) -all(nnumb %in% unumb) - - -length(unumb) -length(nnumb) - -# make a dataframe that has a new_cs_id column that has -# the cs_id renumbered to fill in any missing IDs, -# so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id -# The dataframe below will be used to join the "new_cs_id" with -# the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets -renumbered_ids <- - fixed_pts %>% - sf::st_drop_geometry() %>% - # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>% - dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - hydrofabric3D::add_tmp_id() %>% - dplyr::group_by(hy_id) %>% - dplyr::mutate( - new_cs_id = 1:dplyr::n() - ) %>% - dplyr::ungroup() %>% - dplyr::select(new_cs_id, tmp_id) - -# Renumber the transects to have correct CS IDs -out_transects2 <- dplyr::left_join( - hydrofabric3D::add_tmp_id(out_transects), - renumbered_ids, - by = "tmp_id" -) %>% - dplyr::select(-cs_id, -tmp_id) %>% - dplyr::select(hy_id, cs_source, - cs_id = new_cs_id, - cs_measure, cs_lengthm, - # sinuosity, - is_extended, - left_bank_count, right_bank_count, channel_count, bottom_count, - bottom, left_bank, right_bank, valid_banks, has_relief, - geometry = geom - ) - -# Renumber the cross sections points to have correct CS IDs -fixed_pts <- - dplyr::left_join( - hydrofabric3D::add_tmp_id(fixed_pts), - renumbered_ids, - by = "tmp_id" - ) %>% - dplyr::select(-cs_id, -tmp_id) %>% - dplyr::rename(cs_id = new_cs_id) - -renumbered_ids <- - df %>% - sf::st_drop_geometry() %>% - dplyr::select( - # hy_id, - dplyr::any_of(crosswalk_id), - cs_id, pt_id, cs_measure - ) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::group_by(hy_id, cs_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% - # dplyr::group_by(hy_id) %>% - dplyr::mutate( - new_cs_id = 1:dplyr::n() - # tmp_id = paste0(hy_id, "_", cs_id) - ) %>% - add_tmp_id(x = get(crosswalk_id)) %>% - dplyr::ungroup() %>% - dplyr::select(new_cs_id, tmp_id) - -# Join the new cs_ids back with the final output data to replace the old cs_ids -df <- dplyr::left_join( - add_tmp_id(df, x = get(crosswalk_id)), - # dplyr::mutate(df,tmp_id = paste0(hy_id, "_", cs_id)), - renumbered_ids, - by = "tmp_id" -) %>% - dplyr::select(-cs_id, -tmp_id) %>% - dplyr::rename("cs_id" = "new_cs_id") %>% - dplyr::relocate(dplyr::any_of(crosswalk_id), cs_id) -# dplyr::relocate(hy_id, cs_id) - -# ---------------------------------------------------------------------------------------------------------------- -# ---- Cross section points parquet to S3 ---- -# ---------------------------------------------------------------------------------------------------------------- - -# classify the cross section points -fixed_pts <- - fixed_pts %>% - dplyr::mutate( - X = sf::st_coordinates(.)[,1], - Y = sf::st_coordinates(.)[,2] - ) %>% - sf::st_drop_geometry() %>% - dplyr::select( - hy_id, cs_id, pt_id, - cs_lengthm, - relative_distance, - X, Y, Z, - class, point_type, - bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) - ) - - - - - - - - - - - - - - - +# ---------------------------------------------------------------------------------------------------------------- \ No newline at end of file From 3441b885ff7e9a3fa01ca21d528bb5765ada4e8f Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Wed, 16 Oct 2024 15:54:01 -0700 Subject: [PATCH 61/64] updated inject ml script to use new version of hydrofabric3D --- runners/cs_runner/03_inject_ml.R | 35 +++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index 8c4940b..ec777ae 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -40,7 +40,7 @@ ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH) # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. # Save parquet locally and upload to specified S3 bucket for (i in 1:nrow(path_df)) { - + # i = 8 start <- round(Sys.time()) # nextgen file and full path @@ -157,7 +157,7 @@ for (i in 1:nrow(path_df)) { message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") - cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cs_pts) + cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = "hy_id") # TODO: for now we replace any negative TW values with the length of the bottom of the cross section # TODO: This method + the negative model output values both need to be looked into (04/05/2024) @@ -241,23 +241,31 @@ for (i in 1:nrow(path_df)) { ) } message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") - - # Add bathymetry using "inchannel" estimates - inchannel_cs <- hydrofabric3D::add_cs_bathymetry( - cross_section_pts = inchannel_cs - ) + + # system.time({ + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + crosswalk_id = "hy_id" + ) + + # }) # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") # ml_subset %>% # dplyr::filter(hy_id == "wb-1005207") %>% # dplyr::select(owp_y_inchan, owp_tw_inchan) %>% # .$owp_y_inchan message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") - + + # system.time({ # Add bathymetry using "bankful" estimates bankful_cs <- hydrofabric3D::add_cs_bathymetry( - cross_section_pts = bankful_cs + cross_section_pts = bankful_cs, + crosswalk_id = "hy_id" ) - + # }) + # combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points final_cs <- dplyr::bind_rows( dplyr::select( @@ -287,9 +295,12 @@ for (i in 1:nrow(path_df)) { message(round(Sys.time()), " - Reclassifying cross section point types...") + # system.time({ # reclassify - final_cs <- hydrofabric3D::classify_points(final_cs, crosswalk_id = "hy_id") - + final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = "hy_id") + # }) + # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids() # random_uids <- sample(x=final_uids, size=12) # cs_subset <- dplyr::filter(hydrofabric3D::add_tmp_id(final_cs), From 22cabf362dffec94d2a948ebf02ad61bcb759219 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Fri, 15 Nov 2024 14:54:50 -0800 Subject: [PATCH 62/64] slowly moving all cs runner files and code over to a new folder for cleanliness, going to delete old folder eventually when done moving stuff over/making changes --- runners/cs_runner/01_transects.R | 25 +- runners/cs_runner/02_cs_pts.R | 10 +- runners/cs_runner/03_inject_ml.R | 4 +- runners/cs_runner/config.R | 6 +- runners/cs_runner/config_vars.R | 51 +- runners/cs_runner/domain_with_fema.R | 363 ++- runners/cs_runner/domain_with_fema2.R | 2245 +++++++++++++++ runners/cs_runner/download_fema100.R | 123 - runners/cs_runner/new_domain.R | 2 +- runners/cs_runner/test_cs_pts.R | 256 ++ runners/cs_runner/test_fix_cs_pts.R | 278 ++ runners/cs_runner/utils.R | 2315 +++++++++++++-- runners/cs_runner2/base_variables.R | 396 +++ runners/cs_runner2/config_env.R | 26 + runners/cs_runner2/download_conus_nextgen.R | 28 + .../cs_runner2/download_conus_ref_features.R | 31 + runners/cs_runner2/download_dem_from_vrt.R | 32 + runners/cs_runner2/download_fema100.R | 123 + .../partition_fema_by_vpu.R | 384 ++- runners/cs_runner2/utils.R | 2562 +++++++++++++++++ 20 files changed, 8764 insertions(+), 496 deletions(-) create mode 100644 runners/cs_runner/domain_with_fema2.R delete mode 100644 runners/cs_runner/download_fema100.R create mode 100644 runners/cs_runner/test_cs_pts.R create mode 100644 runners/cs_runner/test_fix_cs_pts.R create mode 100644 runners/cs_runner2/base_variables.R create mode 100644 runners/cs_runner2/config_env.R create mode 100644 runners/cs_runner2/download_conus_nextgen.R create mode 100644 runners/cs_runner2/download_conus_ref_features.R create mode 100644 runners/cs_runner2/download_dem_from_vrt.R create mode 100644 runners/cs_runner2/download_fema100.R rename runners/{cs_runner => cs_runner2}/partition_fema_by_vpu.R (69%) create mode 100644 runners/cs_runner2/utils.R diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R index 01bc78a..ee609aa 100644 --- a/runners/cs_runner/01_transects.R +++ b/runners/cs_runner/01_transects.R @@ -9,7 +9,7 @@ source("runners/cs_runner/utils.R") # install.packages("devtools") # # transect bucket prefix -# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") +# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") # paths to nextgen datasets and model attribute parquet files NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) @@ -57,9 +57,10 @@ for(i in 1:nrow(path_df)) { # calculate bankfull width flines <- flines %>% - dplyr::mutate( - bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) - ) %>% + hydrofabric3D::add_powerlaw_bankful_width( + total_drainage_area_sqkm_col = "tot_drainage_areasqkm", + min_bf_width = 50 + ) %>% dplyr::select( hy_id = id, lengthkm, @@ -68,6 +69,17 @@ for(i in 1:nrow(path_df)) { mainstem, geometry = geom ) + # dplyr::mutate( + # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) + # ) %>% + # dplyr::select( + # hy_id = id, + # lengthkm, + # tot_drainage_areasqkm, + # bf_width, + # mainstem, + # geometry = geom + # ) # flines$bf_width <- ifelse(is.na(flines$bf_width), exp(0.700 + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width) @@ -76,8 +88,9 @@ for(i in 1:nrow(path_df)) { # create transect lines transects <- hydrofabric3D::cut_cross_sections( net = flines, # flowlines network - id = "hy_id", # Unique feature ID - cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + crosswalk_id = "hy_id", # Unique feature ID + cs_widths = flines$bf_width, # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") # cs_widths = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id") num = 10, # number of cross sections per "id" linestring ("hy_id") smooth = TRUE, # smooth lines diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R index 264a2ab..1614e10 100644 --- a/runners/cs_runner/02_cs_pts.R +++ b/runners/cs_runner/02_cs_pts.R @@ -113,7 +113,7 @@ for (i in 20:nrow(path_df)) { crosswalk_id = "hy_id", points_per_cs = NULL, min_pts_per_cs = 10, - dem = DEM_URL + dem = DEM_PATH ) # }) @@ -175,7 +175,7 @@ for (i in 20:nrow(path_df)) { # transects = transects, # original transect lines # points_per_cs = NULL, # min_pts_per_cs = 10, # number of points per cross sections - # dem = DEM_URL, # DEM to extract points from + # dem = DEM_PATH, # DEM to extract points from # scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" # fix_ids = FALSE, @@ -192,7 +192,7 @@ for (i in 20:nrow(path_df)) { crosswalk_id = "hy_id", points_per_cs = NULL, min_pts_per_cs = 10, # number of points per cross sections - dem = DEM_URL, # DEM to extract points from + dem = DEM_PATH, # DEM to extract points from scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" fix_ids = FALSE, @@ -214,7 +214,7 @@ for (i in 20:nrow(path_df)) { # transects = transects, # original transect lines # points_per_cs = NULL, # min_pts_per_cs = 10, # number of points per cross sections - # dem = DEM_URL, # DEM to extract points from + # dem = DEM_PATH, # DEM to extract points from # scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" # fix_ids = FALSE, @@ -407,7 +407,7 @@ for (i in 20:nrow(path_df)) { ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id message("Aligning banks and smoothing bottoms...") - fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts) + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R index ec777ae..f29efc7 100644 --- a/runners/cs_runner/03_inject_ml.R +++ b/runners/cs_runner/03_inject_ml.R @@ -11,8 +11,8 @@ source("runners/cs_runner/config.R") source("runners/cs_runner/utils.R") # cross section bucket prefix -S3_CS_ML_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/") -# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, "v20/3D/dem-cross-sections/") +S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/") +# S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, "v20/3D/dem-cross-sections/") ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE) diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R index c70320d..b70ef37 100644 --- a/runners/cs_runner/config.R +++ b/runners/cs_runner/config.R @@ -21,16 +21,16 @@ sf::sf_use_s2(FALSE) # # ------------------------------------------------------------------------------------- # # AWS S3 bucket URI -# S3_BUCKET_URI <- "s3://lynker-spatial/" +# LYNKER_SPATIAL_HF_S3_URI <- "s3://lynker-spatial/" # # name of bucket with nextgen data -# S3_BUCKET_NAME <- "lynker-spatial" +# LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial" # # the name of the folder in the S3 bucket with the nextgen data # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" # # full URI to the S3 bucket folder with the nextgen data -# S3_BUCKET_NEXTGEN_DIR_URI <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR) +# S3_BUCKET_NEXTGEN_DIR_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR) # # reference features S3 bucket prefix # S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R index 1a834a4..bee5b45 100644 --- a/runners/cs_runner/config_vars.R +++ b/runners/cs_runner/config_vars.R @@ -1,4 +1,4 @@ -### EDIT BASE_DIR, AWS_PROFILE, and DEM_URL ### +### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ### # --------------------------------------------------------------------------------- # ---- General paths and constants variables ---- @@ -7,7 +7,6 @@ # - AWS_PROFILE: AWS profile to run CLI commands # - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for # --------------------------------------------------------------------------------- - # Base directory for local file storage BASE_DIR <- '/Users/anguswatters/Desktop/lynker-spatial' @@ -18,16 +17,16 @@ AWS_PROFILE <- "angus-lynker" VERSION <- "v20.1" # string to fill in "CS_SOURCE" column in output datasets -CS_SOURCE <- "hydrofabric3D" +CS_SOURCE <- "hydrofabric3D" # name of bucket with nextgen data -S3_BUCKET_NAME <- "lynker-spatial" -S3_BUCKET_SUBDIR <- "hydrofabric" +LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial" +LYNKER_SPATIAL_HF_S3_PREFIX <- "hydrofabric" # AWS S3 bucket URI -S3_BUCKET_BASE_URI <- paste0("s3://", S3_BUCKET_NAME, "/") -S3_BUCKET_URI <- paste0(S3_BUCKET_BASE_URI, S3_BUCKET_SUBDIR, "/") -# S3_BUCKET_URI <- "s3://lynker-spatial/" +LYNKER_SPATIAL_BASE_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/") +LYNKER_SPATIAL_HF_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/") +# LYNKER_SPATIAL_HF_S3_URI <- "s3://lynker-spatial/" # ------------------------------------------------------------------------------------- # ---- S3 output directories ----- @@ -37,13 +36,13 @@ S3_BUCKET_URI <- paste0(S3_BUCKET_BASE_URI, S3_BUCKET_SUBDIR, "/") # ------------------------------------------------------------------------------------- # transect bucket prefix -S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") +S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") # cross section bucket prefix -S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/") +S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/") # cross section bucket prefix -S3_CS_ML_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/") +S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/") # ------------------------------------------------------------------------------------- # ---- S3 nextgen data paths / directories ----- @@ -54,10 +53,10 @@ S3_BUCKET_NEXTGEN_DIR <- paste0(VERSION, "/gpkg/") # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" # full URI to the S3 bucket folder with the nextgen data -S3_BUCKET_NEXTGEN_DIR_URI <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR) +S3_BUCKET_NEXTGEN_DIR_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR) # reference features S3 bucket prefix -S3_BUCKET_REF_FEATURES_URI <- paste0("s3://", S3_BUCKET_NAME, "/00_reference_features/gpkg/") +S3_BUCKET_REF_FEATURES_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/") # S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" # ---------------------------------------------------------------------------- @@ -69,16 +68,18 @@ ML_OUTPUTS_S3_FILE <- "channel_ml_outputs.parquet" # ML_OUTPUTS_S3_DIR <- paste0(VERSION, "/3D/ml-outputs/") # ML_OUTPUTS_S3_DIR <- "v20.1/3D/ml-outputs/" -ML_OUTPUTS_S3_URI <- paste0(S3_BUCKET_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE) -# ML_OUTPUTS_S3_URI <- paste0(S3_BUCKET_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE) +ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE) +# ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE) ML_OUTPUTS_PATH <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE) # path to the remote CONUS net parquet file CONUS_NETWORK_FILE <- "conus_net.parquet" -CONUS_NETWORK_URI <- paste0(S3_BUCKET_URI, VERSION, "/", CONUS_NETWORK_FILE) +CONUS_NETWORK_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE) # ---------------------------------------------------------------------------- + + # ------------------------------------------------------------------------------------- # ---- Local directory / path variables ---- # ------------------------------------------------------------------------------------- @@ -104,14 +105,18 @@ REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/") # make a directory for the ML outputs data ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") +DEM_DIR <- paste0(BASE_DIR, "/dem") +DEM_VRT_DIR <- paste0(DEM_DIR, "/vrt") +DEM_TIF_DIR <- paste0(DEM_DIR, "/tif") + # ------------------------------------------------------------------------------------- # ---- Create local directory / path variables (FEMA data) ---- # ------------------------------------------------------------------------------------- # location of FEMA 100 year flood plain FGB files FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" -FEMA_S3_BUCKET_PREFIX <- "FEMA100/" -FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX) +LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100/" +LYNKER_HF_FEMA_S3_URI <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX) # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) FEMA_FGB_PATH <- paste0(BASE_DIR, "/FEMA100") @@ -132,6 +137,10 @@ FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) # ) # ) +CS_EXTENSION_POLYGONS_DIR <- paste0(BASE_DIR, "/cs-extension-polygons") +CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg') + +# paste0(BASE_DIR/ # ------------------------------------------------------------------------------------- # ---- OVERWRITE_FEMA_FILES constant logicals---- # ---- > if TRUE, processing steps will be run again @@ -148,7 +157,7 @@ DELETE_STAGING_GPKGS <- TRUE # remove intermediary files from the main output f # ---------------------------------------------------------------------------- # DEM URL -DEM_URL <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" # scale argument for cross_section_pts() function. # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line @@ -247,8 +256,8 @@ DOMAIN_WITH_FEMA_OUTPUT_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOM DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME) DOMAIN_WITH_FEMA_ML_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME) -ML_AUXILIARY_DATA_S3_URI <- paste0(S3_BUCKET_BASE_URI, "bathymetry/ml_auxiliary_data") -# S3_BUCKET_BASE_URI +ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data") +# LYNKER_SPATIAL_BASE_S3_URI LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI <- "s3://lynker-hydrofabric/" ML_BATHYMETRY_S3_DATA_DIR <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/" ML_BATHYMETRY_S3_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR) diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R index ec13e6e..f673ffb 100644 --- a/runners/cs_runner/domain_with_fema.R +++ b/runners/cs_runner/domain_with_fema.R @@ -83,9 +83,13 @@ flines <- TRUE ~ VPUID ) ) +# rm(flines2) +# unnest_ids(flines2$VPUID2) # set of unique VPUs + VPU_IDS <- unnest_ids(flines$VPUID) +# VPU_IDS <- unnest_ids(flines2$VPUID2) VPU_IDS # all possible FEMA dirs @@ -98,13 +102,9 @@ flines %>% # unique(flines$VPUID) # calculate bankfull width -flines <- - flines %>% - dplyr::mutate( - bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm) - # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) - ) %>% - # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% +flines <- + flines %>% + hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% dplyr::select( dplyr::any_of(CROSSWALK_ID), VPUID, @@ -116,8 +116,309 @@ flines <- ) %>% hydroloom::rename_geometry("geometry") +transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flines$bf_width, + # cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data +) + +sf::write_sf( + transects, + "/Users/anguswatters/Desktop/tmp.gpkg" +) + +`# flines <- +# flines %>% +# dplyr::mutate( +# bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm) +# # bf_width = exp(0.700 + 0.365* log(tot_drainage_areasqkm)) +# ) %>% +# # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% +# dplyr::select( +# dplyr::any_of(CROSSWALK_ID), +# VPUID, +# # hy_id = id, +# lengthkm, +# tot_drainage_areasqkm, +# bf_width, +# mainstem +# ) %>% +# hydroloom::rename_geometry("geometry") + + +transects <- + transects %>% + dplyr::left_join( + flines %>% + dplyr::select(id, VPUID, tot_drainage_areasqkm) %>% + sf::st_drop_geometry(), + by = "id" + ) + +# read each FEMA geopackage into a list +fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) +transects$VPUID %>% unique() + +GROUP_VPU_IDS <- unnest_ids(transects$VPUID) + +# all FEMA dirs for the current area +GROUP_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ] +GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))] + +fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) + +fema <- + fema %>% + dplyr::bind_rows() %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) + +message("Simplifying FEMA polygons...") +message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + +# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. +# keep 1% of the original points for speed +fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) +# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + +message(" - Number of geoms AFTER simplifying: ", nrow(fema)) +message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + +transects <- + transects %>% + dplyr::left_join( + dplyr::select(sf::st_drop_geometry(flines), + dplyr::any_of(CROSSWALK_ID), + mainstem + ), + by = CROSSWALK_ID + ) + +# TODO: make sure this 3000m extension distance is appropriate across VPUs +# TODO: also got to make sure that this will be feasible on memory on the larger VPUs... +ext_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + crosswalk_id = CROSSWALK_ID, + grouping_id = "mainstem", + max_extension_distance = 3000 +) + +sf::write_sf( + ext_transects, + "/Users/anguswatters/Desktop/tmp_ext.gpkg" +) + +ext_transects <- + ext_transects %>% + dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, lengthm, sinuosity, geometry) + +# get cross section point elevations +cs_pts <- hydrofabric3D::cross_section_pts( + cs = ext_transects, + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH +) + +sf::write_sf( + cs_pts, + "/Users/anguswatters/Desktop/tmp_cs_pts.gpkg" +) +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- +# ---------------------------------------------------------------------------------------------------------------- +# cs_pts2 %>% +# dplyr::slice(1:200) %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type") + +cs_pts <- + # cs_pts2 <- + cs_pts %>% + hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% + hydrofabric3D::classify_points( + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +# }) + +ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id +# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + +# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") +# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- +# ---------------------------------------------------------------------------------------------------------------- + +# system.time({ +fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = flowlines, # original flowline network + transects = ext_transects, # original transect lines + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE +) +# }) + +ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- +ext_transects <- + ext_transects %>% + dplyr::mutate(cs_source = CS_SOURCE) + +out_transects <- match_transects_to_extended_cs_pts( + transect_lines = ext_transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + extension_pct = EXTENSION_PCT +) + +sf::write_sf( + out_transects, + "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +) + +trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id") +ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id") +fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id") + +all(trans_uids %in% ext_trans_uids) +all(ext_trans_uids %in% trans_uids) + +all(trans_uids %in% fixed_pts_uids) +all(fixed_pts_uids %in% trans_uids) + +all(ext_trans_uids %in% fixed_pts_uids) +all(fixed_pts_uids %in% ext_trans_uids) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- + +# fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +# out_transects <- hydrofabric3D:::renumber_cs_ids( +# df = dplyr::mutate(out_transects, pt_id = 1), +# crosswalk_id = "hy_id" +# ) %>% +# dplyr::select(-pt_id) + +fixed_pts2 <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) +out_transects2 <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) +sf::write_sf( + out_transects2, + "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg" +) +object.size(out_transects2) +# classify the cross section points +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + +# add Z_source column for source of elevation data +fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + +ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Aligning banks and smoothing bottoms...") +fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID) + +ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Reclassifying cross section points...") + +fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") +} else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") +} + +if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") +} else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") +} + +if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") +} else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") +} + +sf::write_sf( + out_transects, + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") +) + +arrow::write_parquet( + fixed_pts, + paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") +) # save the flowlines subset DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg") + sf::write_sf( flines, DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH @@ -142,7 +443,8 @@ for (i in seq_along(fline_groups)) { } for (i in seq_along(fline_groups)) { - + + # i = 3 flowlines <- fline_groups[[i]] VPU <- unique(flowlines$VPUID) @@ -158,11 +460,14 @@ for (i in seq_along(fline_groups)) { # GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T) # GROUP_FEMA_FILES <- GROUP_FEMA_FILES[grepl("_output.gpkg", GROUP_FEMA_FILES)] + # ((flowlines$bf_width) / 11)[1] * 11 + # create transect lines transects <- hydrofabric3D::cut_cross_sections( net = flowlines, # flowlines network - id = CROSSWALK_ID, # Unique feature ID - cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flowlines$bf_width, + # cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") num = 10, # number of cross sections per "id" linestring ("hy_id") smooth = TRUE, # smooth lines @@ -229,7 +534,7 @@ for (i in seq_along(fline_groups)) { # TODO: make sure this 3000m extension distance is appropriate across VPUs # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... - transects <- hydrofabric3D::extend_transects_to_polygons( + ext_transects <- hydrofabric3D::extend_transects_to_polygons( transect_lines = transects, polygons = fema, flowlines = flowlines, @@ -241,8 +546,10 @@ for (i in seq_along(fline_groups)) { # mapview::mapview(transects, color = "green") + # mapview::mapview(transects2, color = "red") - transects <- - transects %>% + # transects <- + # transects %>% + ext_transects <- + ext_transects %>% hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% dplyr::mutate(is_extended = FALSE) %>% dplyr::select( @@ -259,10 +566,28 @@ for (i in seq_along(fline_groups)) { paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" ) + # ext_transects %>% + # dplyr::filter(id == "wb-2414904") %>% + # .$geometry %>% + # mapview::mapview() + + # ext_transects %>% + # dplyr::slice(90000:100000) %>% + # .$geometry %>% + # mapview::mapview() + + # wb-2414904 + + # length_data <- + # ext_transects %>% + # dplyr::mutate( + # line_len = as.numeric(sf::st_length(geometry)) + # ) + message("Writting transect lines for VPU group: '", VPU, "'", "\n > '", out_path, "'") - sf::write_sf(transects, out_path) + sf::write_sf(ext_transects, out_path) message("Finished writting transects!") @@ -293,6 +618,7 @@ paths_df <- data.frame( ) for (i in 1:nrow(paths_df)) { + # i =3 # i = 2 VPU <- paths_df$vpu[i] t_path <- paths_df$t[i] @@ -324,7 +650,7 @@ for (i in 1:nrow(paths_df)) { crosswalk_id = CROSSWALK_ID, points_per_cs = NULL, min_pts_per_cs = 10, - dem = DEM_URL + dem = DEM_PATH ) # ---------------------------------------------------------------------------------------------------------------- @@ -366,7 +692,7 @@ for (i in 1:nrow(paths_df)) { crosswalk_id = CROSSWALK_ID, points_per_cs = NULL, min_pts_per_cs = 10, # number of points per cross sections - dem = DEM_URL, # DEM to extract points from + dem = DEM_PATH, # DEM to extract points from scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" fix_ids = FALSE, @@ -383,7 +709,8 @@ for (i in 1:nrow(paths_df)) { out_transects <- match_transects_to_extended_cs_pts( transect_lines = transects, fixed_cs_pts = fixed_pts, - crosswalk_id = CROSSWALK_ID + crosswalk_id = CROSSWALK_ID, + extension_pct = EXTENSION_PCT ) # ---------------------------------------------------------------------------------------------------------------- @@ -400,6 +727,8 @@ for (i in 1:nrow(paths_df)) { fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) out_transects <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) + # out_transects %>% + # dplyr::filter(id == "wb-2425607") %>% .$geometry %>% plot() # ---------------------------------------------------------------------------------------------------------------- # ---- STEP 4: Update transects with extended transects (if exists) ---- # ---------------------------------------------------------------------------------------------------------------- diff --git a/runners/cs_runner/domain_with_fema2.R b/runners/cs_runner/domain_with_fema2.R new file mode 100644 index 0000000..3d574e0 --- /dev/null +++ b/runners/cs_runner/domain_with_fema2.R @@ -0,0 +1,2245 @@ +# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file +source("runners/cs_runner/config.R") +source("runners/cs_runner/utils.R") + +# load libraries +# library(hydrofabric3D) +# library(dplyr) +# library(sf) + +# Unique Flowline ID column name +CROSSWALK_ID <- "id" + +# --------------------------------------------------------------------- +# --- Read in flowlines +# --------------------------------------------------------------------- + +# Subsetting area +aoi <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides") +aoi <- rmapshaper::ms_simplify(aoi, keep = 0.05) + +id_col <- "id" +# id_col <- "divide_id" + +# query the conus.gpkg for matching IDs +ids <- unique(aoi[[id_col]]) +query_ids <- paste0(paste0("'", ids, "'"), collapse= ", ") + +gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +layer <- gpkg_layers$name[gpkg_layers$name == "flowpaths"] + +wkt <- + aoi %>% + rmapshaper::ms_dissolve() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sfc() %>% + sf::st_sf() %>% + sf::st_geometry() %>% + sf::st_as_text() + +# read in flowlines based on IDs in AOI +flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths", + wkt_filter = wkt + # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids), + # query = query + ) + +# flines <- +# flines %>% +# dplyr::slice(1:1000) + +# bad_ids <- c("wb-14538", "wb-14686", "wb-14687") +# flines <- +# flines %>% +# dplyr::filter(id %in% bad_ids) +# --------------------------------------------------------------------- +# --- Split flowlines by VPU +# --------------------------------------------------------------------- + +# VPUs polygons +VPU_boundaries <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines)) + +# add a VPU ID column to each flowline +flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID") + +# TODO: improve this, manual remap some VPUIDs so that there are +# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected +flines <- + flines %>% + dplyr::mutate( + VPUID = dplyr::case_when( + VPUID == "12" ~ "11, 12, 13", + VPUID == "12, 13" ~ "11, 12, 13", + VPUID == "11, 12" ~ "11, 12, 13", + VPUID == "01" ~ "01, 02", + TRUE ~ VPUID + ) + ) + +# rm(flines2) +# unnest_ids(flines2$VPUID2) + +# set of unique VPUs + +VPU_IDS <- unnest_ids(flines$VPUID) +# VPU_IDS <- unnest_ids(flines2$VPUID2) +VPU_IDS + +# all possible FEMA dirs +AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ] + +flines %>% + sf::st_drop_geometry() %>% + dplyr::group_by(VPUID) %>% + dplyr::count() + +flines %>% + sf::st_drop_geometry() %>% + dplyr::group_by(order) %>% + dplyr::count() +# unique(flines$VPUID) + +# calculate bankfull width +flines <- + flines %>% + hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + VPUID, + # hy_id = id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + order, + mainstem + ) %>% + hydroloom::rename_geometry("geometry") + + +sf::write_sf( + flines, + paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg") +) + +transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flines$bf_width, + # cs_widths = pmax(50, flowlines$bf_width * 11), # cross section width of each "id" linestring ("hy_id") + # cs_widths = pmax(50, flowlines$bf_width), # cross section width of each "id" linestring ("hy_id") + num = 10, # number of cross sections per "id" linestring ("hy_id") + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + #### Arguments used for when fix_braids = TRUE # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id) + # terminal_id = NULL, + # braid_threshold = NULL, + # version = 2, + # braid_method = "comid", + # precision = 1, + add = TRUE # whether to add back the original data +) + +transects <- + transects %>% + dplyr::mutate(cs_source = CS_SOURCE) %>% + dplyr::select(id, + cs_source, + cs_id, + cs_measure, + ds_distance, + cs_lengthm, + sinuosity, + geometry + ) + +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre_extension_transects.gpkg") + +sf::write_sf( + transects, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg") +) + +# [1] "wb-2416492" "wb-2414869" "wb-2415765" "wb-2416657" "wb-2415479" "wb-2420307" "wb-2425952" "wb-2421269" "wb-2425750" +# [10] "wb-2422028" "wb-2419212" "wb-2426092" "wb-2414971" "wb-2416120" "wb-2414960" "wb-2433259" "wb-14381" "wb-18022" +# [19] "wb-15173" "wb-15160" "wb-571" "wb-2413739" "wb-508015" "wb-507770" "wb-2424752" "wb-2424545" "wb-2422849" +# [28] "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714" "wb-2419650" "wb-2419650" "wb-2416492" "wb-2414869" "wb-2415765" +# [37] "wb-2416657" "wb-2415479" "wb-2420307" "wb-2419694" "wb-2419694" "wb-2419974" "wb-2419974" "wb-2420065" "wb-2420065" +# [46] "wb-2425952" "wb-2420678" "wb-2420678" "wb-2421269" "wb-2419695" "wb-2419695" "wb-2425750" "wb-2422028" "wb-2419212" +# [55] "wb-2426091" "wb-2414971" "wb-2407999" "wb-2407999" "wb-2426131" "wb-2426131" "wb-2416120" "wb-2433525" "wb-2433525" +# [64] "wb-2433259" "wb-14381" "wb-14576" "wb-14576" "wb-18022" "wb-15173" "wb-15160" "wb-11097" "wb-11097" +# [73] "wb-14804" "wb-14804" "wb-571" "wb-2408123" "wb-2408123" "wb-2413739" "wb-508015" "wb-507770" "wb-2424752" +# [82] "wb-2435791" "wb-2435791" "wb-2435798" "wb-2435798" "wb-2435779" "wb-2435779" "wb-2435781" "wb-2435781" "wb-2424545" +# [91] "wb-2422849" "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714" + +# transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg")) + +# out_transects +# out_transects2 + +transects <- + transects %>% + dplyr::left_join( + flines %>% + dplyr::select(id, VPUID, mainstem, tot_drainage_areasqkm) %>% + sf::st_drop_geometry(), + by = "id" + ) + +# sf::write_sf( +# transects, +# paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg") +# ) + +GROUP_VPU_IDS <- unnest_ids(transects$VPUID) + +# all FEMA dirs for the current area +GROUP_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ] +GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))] + + +# read each FEMA geopackage into a list +fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg)) +# transects$VPUID %>% unique() + +fema <- + fema %>% + dplyr::bind_rows() %>% + dplyr::mutate( + fema_id = 1:dplyr::n() + ) + +message("Simplifying FEMA polygons...") +message(" - Number of geoms BEFORE simplifying: ", nrow(fema)) + +# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing.. +# keep 1% of the original points for speed +fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) +# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + +# sf::write_sf( +# fema, +# paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg") +# ) + +message(" - Number of geoms AFTER simplifying: ", nrow(fema)) +message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + +# transects <- +# transects %>% +# dplyr::left_join( +# dplyr::select(sf::st_drop_geometry(flines), +# dplyr::any_of(CROSSWALK_ID), +# mainstem +# ), +# by = CROSSWALK_ID +# ) + +# test_ids <- c("wb-2416492", "wb-2414869", "wb-2415765" , "wb-2416657" , "wb-2415479", +# "wb-2420307" , "wb-2425952" , "wb-2421269" , "wb-2425750", +# "wb-14804", "wb-14804", "wb-571", "wb-2408123", "wb-2408123" +# ) +# test_trans <- +# transects %>% +# dplyr::filter(id %in% test_ids) +# +# test_flines <- +# flines %>% +# dplyr::filter(id %in% test_ids) + +# test_fema +# test_trans$VPUID + +# TODO: make sure this 3000m extension distance is appropriate across VPUs +# TODO: also got to make sure that this will be feasible on memory on the larger VPUs... +# transects <- hydrofabric3D::extend_transects_to_polygons( +ext_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + # transect_lines = test_trans, + # polygons = fema, + # flowlines = test_flines, + crosswalk_id = CROSSWALK_ID, + grouping_id = "mainstem", + max_extension_distance = 3000 +) + +# sf::write_sf( +# flines, +# paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg") +# ) + +# flines <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg")) +# transects <- sf::read_sf(paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg")) +# fema <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg")) +# +# transect_lines <- transects +# polygons <- fema +# flowlines <- flines +# bad_id <- "wb-10813" +# mainstem <- "1977479" +# crosswalk_id = "id" +# grouping_id = "mainstem" +# max_extension_distance = 3000 + + +# hydrofabric3D:::rm_self_intersections(ext_transects) +# transects <- +# transects %>% +ext_transects <- + ext_transects %>% + dplyr::mutate(cs_source = CS_SOURCE) %>% + dplyr::select(id, + cs_source, + cs_id, + cs_measure, + ds_distance, + cs_lengthm, + sinuosity, + geometry + ) + +# ext_transects <- +# ext_transects %>% +# dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, sinuosity, +# # tot_drainage_areasqkm, +# geometry) + +sf::write_sf( + # transects, + ext_transects, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg") +) + +# get cross section point elevations +cs_pts <- hydrofabric3D::cross_section_pts( + # cs = transects, + cs = ext_transects, + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH +) + +# mapview::mapview(cs_pts) + test_trans + +sf::write_sf( + cs_pts, + paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/raw-cs-pts.gpkg") +) + +# DOMAIN_WITH_FEMA_CS_PTS_DIR +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- +# ---------------------------------------------------------------------------------------------------------------- +# cs_pts2 %>% +# dplyr::slice(1:200) %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type") + +cs_pts <- + # cs_pts2 <- + cs_pts %>% + hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% + hydrofabric3D::classify_points( + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + +# cs_pts %>% +# dplyr::slice(150:250) %>% +# hydrofabric3D::plot_cs_pts("id", color = "point_type", size = 4) + +# }) + +ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id +# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + +# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") +# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + +# ---------------------------------------------------------------------------------------------------------------- +# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ---- +# ---------------------------------------------------------------------------------------------------------------- + +system.time({ +fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = test_flines, + # net = flowlines, # original flowline network + # transects = transects, # original transect lines + transects = ext_transects, # original transect lines + crosswalk_id = CROSSWALK_ID, + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE +) +}) + +# sf::write_sf( +# test_flines, +# "/Users/anguswatters/Desktop/test_flines.gpkg" +# ) +# +# sf::write_sf( +# test_trans, +# "/Users/anguswatters/Desktop/test_start_trans.gpkg" +# ) +# +# sf::write_sf( +# ext_transects, +# "/Users/anguswatters/Desktop/test_ext_trans.gpkg" +# ) +# +# sf::write_sf( +# cs_pts, +# "/Users/anguswatters/Desktop/test_cs_pts.gpkg" +# ) + +# mapview::mapview(fixed_pts) + +# mapview::mapview(ext_transects, color = "green") + +# mapview::mapview(out_transects, color = "red") + +fixed_pts$Z %>% is.null() %>% any() +fixed_pts$Z %>% is.na() %>% any() + +ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Update transects with extended transects (if exists) ---- +# ---------------------------------------------------------------------------------------------------------------- +# ext_transects <- +# ext_transects %>% +# dplyr::mutate(cs_source = CS_SOURCE) + +# end_pts <- +# fixed_pts %>% +# dplyr::filter(id == "wb-2435034", cs_id == 1) %>% +# # hydrofabric3D::plot_cs_pts("id") +# dplyr::slice( +# which.min(pt_id), +# which.max(pt_id) +# ) +# mapview::mapview(end_pts) + tt +# tt <- +# ext_transects %>% +# dplyr::filter(id == "wb-2435034", cs_id == 1) +# hydrofabric3D:::match_transects_to_extended_cs_pts() + +out_transects <- hydrofabric3D:::match_transects_to_extended_cs_pts( + # transect_lines = transects, + transect_lines = ext_transects, + fixed_cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID + # extension_pct = EXTENSION_PCT +) + +# sf::write_sf( +# out_transects, +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") +# # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +# ) + +# fixed_pts2 <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) +# out_transects2 <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) + +fixed_pts <- + fixed_pts %>% +# fixed_pts2 <- + # fixed_pts %>% + hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% + dplyr::group_by(id) %>% + dplyr::arrange(cs_id, .by_group = TRUE) %>% + dplyr::ungroup() %>% + dplyr::group_by(id, cs_id) %>% + dplyr::arrange(pt_id, .by_group = TRUE) %>% + dplyr::ungroup() + + +out_transects <- + out_transects %>% +# out_transects2 <- +# out_transects %>% + hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% + dplyr::group_by(id) %>% + dplyr::arrange(cs_id, .by_group = TRUE) %>% + dplyr::ungroup() %>% + dplyr::select( + -left_bank_count, + -right_bank_count, + -channel_count, + -bottom_count, + -bottom, + -left_bank, + -right_bank, + -valid_banks, + -has_relief, + -is_extended + ) + +sf::write_sf( + out_transects, + # out_transects2, + paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") + # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +) + +# sf::write_sf( +# fixed_pts2, +# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg") +# ) + +# transects %>% +# dplyr::group_by(id) %>% +# dplyr::arrange(cs_id, .by_group = TRUE) %>% +# dplyr::ungroup() +# +# out_transects2 %>% +# # dplyr::filter(id %in% c("wb-2435034", "wb-2435045")) %>% +# dplyr::group_by(id) %>% +# dplyr::arrange(cs_id, .by_group = TRUE) %>% +# dplyr::ungroup() + +# sf::write_sf( +# out_transects2, +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") +# # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg" +# ) +# +# sf::write_sf( +# fixed_pts2, +# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg") +# ) + +# trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id") +# ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id") +# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id") + +# all(trans_uids %in% ext_trans_uids) +# all(ext_trans_uids %in% trans_uids) +# all(trans_uids %in% fixed_pts_uids) +# all(fixed_pts_uids %in% trans_uids) +# all(ext_trans_uids %in% fixed_pts_uids) +# all(fixed_pts_uids %in% ext_trans_uids) + +# ---------------------------------------------------------------------------------------------------------------- +# ---- Re enumerate the transects & cross section points "cs_id" ---- +# ---------------------------------------------------------------------------------------------------------------- + +# fixed_pts <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id") +# out_transects <- hydrofabric3D:::renumber_cs_ids( +# df = dplyr::mutate(out_transects, pt_id = 1), +# crosswalk_id = "hy_id" +# ) %>% +# dplyr::select(-pt_id) +# length(out_transects_uids) +# length(fixed_pts_uids) +# +# out_transects_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id") +# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id") +# +# length(out_transects_uids) == length(fixed_pts_uids) & all(out_transects_uids %in% fixed_pts_uids) & all(fixed_pts_uids %in% out_transects_uids) +# +# fixed_pts2 <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID) +# out_transects2 <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID) +# +# out_transects2_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects2, "id") +# fixed_pts2_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts2, "id") +# +# length(out_transects2_uids) == length(fixed_pts2_uids) & all(out_transects2_uids %in% fixed_pts2_uids) & all(fixed_pts2_uids %in% out_transects2_uids) + + # paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/extended_transects.gpkg") + +# sf::write_sf( +# out_transects2, +# # "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg" +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/final_transects.gpkg") +# ) + +# object.size(out_transects2) + +# classify the cross section points +fixed_pts <- + fixed_pts %>% +# fixed_pts2 <- + # fixed_pts2 %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief + ) + +# # add Z_source column for source of elevation data +# fixed_pts2 <- +# fixed_pts2 %>% +# dplyr::mutate( +# Z_source = CS_SOURCE +# ) %>% +# dplyr::relocate( +# dplyr::any_of(CROSSWALK_ID), +# cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source +# # class, point_type, +# # bottom, left_bank, right_bank, valid_banks, has_relief +# ) + +ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Aligning banks and smoothing bottoms...") +fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID) + +ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +message("Reclassifying cross section points...") + +fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id + +if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") +} else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") +} + +if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") +} else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") +} + +if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") +} else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") +} + +# sf::write_sf( +# out_transects, +# paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") +# ) +# + +arrow::write_parquet( + fixed_pts, + paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet") +) +# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet") + + +# --------------------------------------------------------------------------------- +# ---- Inject ML predicted top widths / Dingman's R ---- +# --------------------------------------------------------------------------------- + +# --------------------------------------------------------------------------------- +# ---- Read in ML data ---- +# --------------------------------------------------------------------------------- + +VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T) + +ML_CROSSWALK_ID <- "id" + +# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) { +# vpu_id <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq) +# arrow::read_parquet(prq) %>% +# dplyr::mutate(vpu_id = vpu_id) +# } +# ) %>% +# dplyr::bind_rows() %>% +# dplyr::select( +# dplyr::any_of(ML_CROSSWALK_ID), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) +# +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +# ml_outputs <- +# ml_outputs %>% +# dplyr::distinct( +# dplyr::across(dplyr::any_of(CROSSWALK_ID)), +# vpu_id, +# owp_y_bf, owp_y_inchan, +# owp_tw_bf, owp_tw_inchan, +# owp_dingman_r +# ) + +# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH) +# rm(ml_outputs, ml) +ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml") + +ml_outputs <- + ml_outputs %>% + dplyr::select( + dplyr::any_of(ML_CROSSWALK_ID), + vpuid, + owp_y_bf = YCC, + owp_y_inchan = Y, + owp_tw_bf = TopWdthCC, + owp_tw_inchan = TopWdth, + owp_dingman_r = dingman_r + ) + + +# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS +# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network) +names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID +# +# # Keep only distinct ID rows +ml_outputs <- + ml_outputs %>% + dplyr::distinct( + dplyr::across(dplyr::any_of(CROSSWALK_ID)), + vpu_id, + owp_y_bf, owp_y_inchan, + owp_tw_bf, owp_tw_inchan, + owp_dingman_r + ) + +# --------------------------------------------------------------------------------- +# ---- Read in CS PTS data ---- +# --------------------------------------------------------------------------------- + +# CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet") +CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet") + +cs_pts <- arrow::read_parquet(CS_PTS_OUTPUT_PATH) + +# --------------------------------------------------------------------------------- +# ---- Join CS PTS with ML data --- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...") + +# ml_outputs %>% +# dplyr::group_by(id) %>% +# dplyr::count(id) %>% +# dplyr::arrange(-n) + +# join the ML outputs data to the cross section points +cs_pts <- + cs_pts %>% + dplyr::left_join( + dplyr::select(ml_outputs, + dplyr::any_of(CROSSWALK_ID), + owp_tw_inchan, + owp_y_inchan, + owp_tw_bf, + owp_y_bf, + owp_dingman_r + ), + by = CROSSWALK_ID + ) + +# --------------------------------------------------------------------------------- +# ---- Fixing negative depths/widths estimates ---- +# --------------------------------------------------------------------------------- +message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...") + +cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID) + +# TODO: for now we replace any negative TW values with the length of the bottom of the cross section +# TODO: This method + the negative model output values both need to be looked into (04/05/2024) +cs_pts <- + cs_pts %>% + dplyr::left_join( + cs_bottom_lengths, + by = c(CROSSWALK_ID, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::mutate( + owp_tw_inchan = dplyr::case_when( + owp_tw_inchan <= 0 ~ bottom_length, + TRUE ~ owp_tw_inchan + ), + owp_tw_bf = dplyr::case_when( + owp_tw_bf <= 0 ~ bottom_length, + TRUE ~ owp_tw_bf + ) + ) %>% + dplyr::select(-bottom_length) + +# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data +# TODO: look at this stuff with Arash (04/09/2024) +missing_cs <- + cs_pts %>% + dplyr::filter( + is.na(.data[[CROSSWALK_ID]]) | + is.na(owp_tw_inchan) | is.na(owp_y_inchan) | + is.na(owp_tw_bf) | is.na(owp_y_bf) | + is.na(owp_dingman_r) + ) %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) + +# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" +# readr::write_csv( +# dplyr::select(missing_cs, -tmp_id), +# paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv") +# ) + +# Split the cross sections into 2 groups: +# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model +# - "Bankful cs" group are points WITHOUT valid banks OR any relief --> These get the BANKFUL TW and Y values from the ML model +inchannel_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% # NOTE: makes sure to remove any of the "missing" cross sections without data + dplyr::select(-tmp_id) %>% + dplyr::filter(valid_banks & has_relief) %>% + # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they + # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points + # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section + dplyr::rename( + TW = owp_tw_inchan, + DEPTH = owp_y_inchan, + DINGMAN_R = owp_dingman_r + ) + +bankful_cs <- + cs_pts %>% + hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% + dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% + dplyr::select(-tmp_id) %>% + dplyr::filter(!valid_banks | !has_relief) %>% + dplyr::rename( + TW = owp_tw_bf, + DEPTH = owp_y_bf, + DINGMAN_R = owp_dingman_r + ) + + +# sanity check that all rows are accounted for after splitting up data +split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)) +# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + +if (!split_kept_all_rows) { + warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", + "\nsome points were not put in either group.", + "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", + "values or contain values other than TRUE/FALSE") + ) +} + +message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...") +# tmp <- +# inchannel_cs %>% +# dplyr::slice(1:10000) +system.time({ + + # Add bathymetry using "inchannel" estimates + inchannel_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = inchannel_cs, + # cross_section_pts = tmp, + crosswalk_id = CROSSWALK_ID + ) + +}) + + + +# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet") +# ml_subset %>% +# dplyr::filter(hy_id == "wb-1005207") %>% +# dplyr::select(owp_y_inchan, owp_tw_inchan) %>% +# .$owp_y_inchan +message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...") + +system.time({ + + # Add bathymetry using "bankful" estimates + bankful_cs <- hydrofabric3D::add_cs_bathymetry( + cross_section_pts = bankful_cs, + # cross_section_pts = dplyr::slice(bankful_cs, 1:10000), + crosswalk_id = CROSSWALK_ID + ) + +}) + +# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points +final_cs <- dplyr::bind_rows( + dplyr::select( + inchannel_cs, + # inchannel_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + bankful_cs, + # bankful_cs2, + # -hf_id, + -TW, -DEPTH, -DINGMAN_R, + # -is_dem_point, + -dplyr::starts_with("owp") + ), + dplyr::select( + dplyr::mutate( + missing_cs, + is_dem_point = FALSE + ), + # -hf_id, + # -is_dem_point, + -dplyr::starts_with("owp"), + -tmp_id + ) + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + tidyr::fill( + c(cs_lengthm, Z_source) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet") +# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet") + + +message(round(Sys.time()), " - Reclassifying cross section point types...") + +# reclassify +final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID) +ending_uids <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID) + +has_same_number_of_uids <- length(starting_uids) == length(ending_uids) +all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids) +all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids) + +# throw some warnings if: +# - the number of uids in the input is different from the output +# - there are missing hy_id/cs_id +if (!has_same_number_of_uids) { + warning(paste0("The number of unique hy_id/cs_id is different in the ", + "starting cross section points from the final cross section points:", + "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids), + "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids) + )) +} + +if (!all_starting_uids_in_ending_uids) { + number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids]) + + # starting_uids %in% ending_uids + warning( + paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ", + "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids + ) + ) + + # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ", + # "is missing from the output cross section points")) + +} + +# tmp_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# message("saving file", tmp_path) +# +# final_cs <- arrow::read_parquet(tmp_path) +# +# # save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet) +# arrow::write_parquet( +# dplyr::select(final_cs, +# -is_dem_point +# ), +# paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet") +# ) + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# --------------------------------------------------------------------------------- + +# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet") +CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'") + +# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet) +arrow::write_parquet( + dplyr::select(final_cs, + -is_dem_point + ), + CROSS_SECTIONS_OUTPUT_PATH +) + +INTERNAL_CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections-is-dem-point.parquet") + +arrow::write_parquet( + final_cs, + INTERNAL_CROSS_SECTIONS_PATH +) + +# --------------------------------------------------------------------------------- +# ---- Substitue diffusive domain DEMs Z values ---- +# --------------------------------------------------------------------------------- + +final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") + +bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) { + r <- terra::rast(path) + extent <- terra::ext(r) + # r <- terra::project(r, "EPSG:5070") + # terra::set.crs(r, "EPSG:5070") + ext_df <- data.frame(lapply(extent, function(i) {i})) + ext_df$crs <- terra::crs(r) + ext_df$file <- basename(path) + ext_df$path <- path + + return(ext_df) +}) %>% + dplyr::bind_rows() + +# final_cs + +# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross-sections-is-dem-point.parquet") +# arrow::write_parquet( +# final_cs, +# INTERNAL_CROSS_SECTIONS_PATH +# ) + +# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet") +# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH) +# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet") +# +# arrow::write_parquet( +# final_cs, +# START_PATH +# ) + +# rm(i, count, cs, df, tmp, aoi, new_tmp, pts_subset, dem, has_pts_in_bb, bb, updated_depths) + + +# CROSS_SECTIONS_OUTPUT_PATH +# START_CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +START_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +UPDATED_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet") + +count <- 0 + +for (i in 1:nrow(bb_df)) { + + # i = 1 + + is_first_iter <- count == 0 + count <- count + 1 + CURR_PATH <- ifelse(is_first_iter, START_PATH, UPDATED_PATH) + + # CURR_PATH + + message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...") + + df <- bb_df[i, ] + + START_EPSG_CODE <- 5070 + + cs <- arrow::read_parquet(CURR_PATH) + + cs <- + cs %>% + # dplyr::slice(1:10000) %>% + sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE) + + # convert to bounding box CRS + cs <- + cs %>% + sf::st_transform(df$crs[1]) + + # create bounding box shape + bb <- sf::st_as_sf( + sf::st_as_sfc( + sf::st_bbox( + c(xmin = df$xmin, + xmax = df$xmax, + ymax = df$ymax, + ymin = df$ymin + ), + crs = sf::st_crs(df$crs) + ) + ) + ) + + # get pts that are in the bounding box + pts_subset <- sf::st_filter( + cs, + bb + ) + + has_pts_in_bb <- nrow(pts_subset) > 0 + + message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box") + + if(!has_pts_in_bb) { + + message(" > No points to update!") + message(" > Overwritting original cross section points parquet with updated depth values ...") + message(" > '", UPDATED_PATH, "'") + + cs <- + cs %>% + sf::st_transform(START_EPSG_CODE) %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, + Z, + class, point_type, + Z_source, + bottom, left_bank, right_bank, valid_banks, + has_relief + ) + + arrow::write_parquet(cs, UPDATED_PATH) + + next + } + + if(has_pts_in_bb) { + + message(" > Loading Raster") + + # load DEM + dem <- terra::rast(df$path) + + message(" > Extracting new cross section depth values from DEM...") + + updated_depths <- + pts_subset %>% + dplyr::mutate( + Z2 = hydrofabric3D:::extract_pt_val(dem, .), + Z_source2 = df$file + # Z = extract_pt_val(terra::rast(dem), .) + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + # Z, + Z2, + # Z_source, + Z_source2 + ) + + message(" > Replacing old depth values with new depth values...") + cs <- + cs %>% + dplyr::left_join( + updated_depths, + by = c(CROSSWALK_ID, "cs_id", "pt_id") + ) %>% + dplyr::mutate( + Z = dplyr::case_when( + !is.na(Z2) ~ Z2, + TRUE ~ Z + ), + Z_source = dplyr::case_when( + !is.na(Z_source2) ~ Z_source2, + TRUE ~ Z_source + ) + ) %>% + dplyr::select(-Z2, -Z_source2) + + message(" > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...") + cs <- + cs %>% + sf::st_transform(START_EPSG_CODE) + + message(" > Dropping point geometries and preserving X / Y coordinates...") + + cs <- + cs %>% + dplyr::mutate( + X = sf::st_coordinates(.)[,1], + Y = sf::st_coordinates(.)[,2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, + Z, + class, point_type, + Z_source, + bottom, left_bank, right_bank, valid_banks, + has_relief + + # newly added columns (03/06/2024) + ) + + + # cs %>% + # dplyr::slice(1:27) %>% + # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4) + # hydrofabric3D::classify_points(crosswalk_id = "id") + # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4) + + # cs <- + # cs %>% + # # dplyr::slice(1:27) %>% + # dplyr::select(-class, -point_type, + # -valid_banks, -has_relief, + # -bottom, -left_bank, -right_bank + # ) %>% + # hydrofabric3D::classify_points(crosswalk_id = "id") %>% + # dplyr::select( + # # hy_id, + # dplyr::any_of(CROSSWALK_ID), + # cs_id, pt_id, + # Z, + # relative_distance, + # cs_lengthm, + # class, + # point_type, + # X, Y, + # Z_source, + # bottom, left_bank, right_bank, + # valid_banks, has_relief + # # newly added columns (03/06/2024) + # ) + # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4) + + # tmp <- final_cs %>% dplyr::filter(id == "wb-507785", cs_id == 4) + # new_tmp <- cs %>% dplyr::filter(id == "wb-507785", cs_id == 4) + + # hydrofabric3D::plot_cs_pts(tmp, crosswalk_id = "id", color = "point_type", size = 4) + # hydrofabric3D::plot_cs_pts(new_tmp, crosswalk_id = "id", color = "point_type", size = 4) + + message(" > Overwritting original cross section points parquet with updated depth values ...") + message(" > '", UPDATED_PATH, "'") + + arrow::write_parquet(cs, UPDATED_PATH) + + } + +} + +cross_sections <- arrow::read_parquet(UPDATED_PATH) + +# cross_sections %>% +# dplyr::select(Z_source) %>% +# dplyr::group_by(Z_source) %>% +# dplyr::count() + +cross_sections <- + cross_sections %>% + dplyr::select( + -point_type, + -class, + -bottom, -left_bank, -right_bank, + -has_relief, -valid_banks + ) + +# reclassify +# system.time({ +cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, + crosswalk_id = CROSSWALK_ID, + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF +) + +cross_sections <- + cross_sections %>% + dplyr::select( + # hy_id, + dplyr::any_of(CROSSWALK_ID), + cs_id, pt_id, + Z, + relative_distance, + cs_lengthm, + class, + point_type, + X, Y, + Z_source, + bottom, left_bank, right_bank, + valid_banks, has_relief + # newly added columns (03/06/2024) + ) +# }) + +# START_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet") +# UPDATED_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet") + +arrow::write_parquet( + cross_sections, + paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet") + ) + +# ------------------------------------------------- +# ---- Move final datasets to outputs/ ---- +# ------------------------------------------------- + +select_cs_pts <- function(cs_pts, crosswalk_id = NULL) { + + if(is.null(crosswalk_id)) { + # crosswalk_id <- 'hydrofabric_id' + stop("Please provide a valid 'crosswalk_id' which uniquely identifies each cross section in 'cs_pts'") + } + + cs_pts <- + cs_pts %>% + dplyr::select( + dplyr::any_of(c( + crosswalk_id, + "cs_id", + "pt_id", + "relative_distance", + "cs_lengthm", + "X", + "Y", + "Z", + "Z_source", + "class", + "point_type", + "valid_banks", + "has_relief" + ) + ) + ) + + return(cs_pts) +} + +select_transects <- function(transects, crosswalk_id = NULL) { + + if(is.null(crosswalk_id)) { + # crosswalk_id <- 'hydrofabric_id' + stop("Please provide a valid 'crosswalk_id' which uniquely identifies the flowline associated with each transect in 'transects'") + } + + transects <- hydroloom::rename_geometry(transects, "geometry") + + transects <- + transects %>% + dplyr::select( + dplyr::any_of(c( + crosswalk_id, + "cs_source", + "cs_id", + "cs_measure", + "cs_lengthm", + "geometry" + ) + ) + ) + + return(transects) +} + + +# arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% +# select_cs_pts("id") + +# DEM based cross section points parquet +# - FEMA extended +# - CS based extensions +arrow::write_parquet( + arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% + select_cs_pts("id"), + # arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet") +) + +# DEM based transect geometry gpkg +# - FEMA extended +# - CS based extensions +sf::write_sf( + sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) %>% + select_transects("id"), + # dplyr::rename(geometry = geom), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") +) + + +# DEM + ML cross section points parquet +# - FEMA extended +# - CS based extensions +# - ML based Z value updates +arrow::write_parquet( + arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet")) %>% + select_cs_pts("id"), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/ml_cross-sections.parquet") +) + +# DEM + COASTAL BATHY cross section points parquet +# - FEMA extended +# - CS based extensions +# - ML based Z value updates +# - Coastal bathy DEM Z value updates (where applicable) +arrow::write_parquet( + arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet")) %>% + select_cs_pts("id"), + paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/coastal-bathy_cross-sections.parquet") +) + +# ------------------------------------------------- +# ---- Data validation ---- +# ------------------------------------------------- + +# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg") +# new_trans_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") + +# sf::st_layers(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) +# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")) + +# og_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) +# new_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")) + +# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR + +output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T) +# rm(transects, dropped_trans, cs_pts, flowlines, lengths_check, transect_lengths, rel_dist_check, wrong_lengths, mismatches, duplicate_ids, cs_pt_lengths, cs_pts_ids) + +tmp_ids_list <- lapply(output_files, function(i) { + is_gpkg <- endsWith(i, ".gpkg") + is_parquet <- endsWith(i, ".parquet") + + if(is_gpkg) { + x <- + i %>% + sf::read_sf() %>% + hydrofabric3D::get_unique_tmp_ids(x = "id") + return(x) + } + + if(is_parquet) { + x <- + i %>% + arrow::read_parquet() %>% + hydrofabric3D::get_unique_tmp_ids(x = "id") + return(x) + } + return(NULL) + + + }) + + +tmp_ids_list + +# tmp_ids_list[[1]] %in% tmp_ids_list[[2]] + +res <- list() +idxs <- seq_along(tmp_ids_list) + +for (i in seq_along(tmp_ids_list)) { + # idxs <- seq_along(tmp_ids_list) + # i = 1 + curr <- tmp_ids_list[[i]] + + other_idxs <- idxs[idxs != i] + + in_all_others <- lapply(tmp_ids_list[other_idxs], function(k) { + all(k %in% curr) & all(curr %in% k) + }) %>% + unlist() %>% + all() + res[[i]] <- in_all_others +} + +all_ids_are_matching <- + res %>% + unlist() %>% + all() + +# validate_transects(transects, "id") +if (!all_ids_are_matching) { + warning("Not all id/cs_id are included in all transects / cross section point datasets") +} else { + + message("All id / cs_id are correctly within all transects / cross section points datasets") +} + +trans_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") +transects <- sf::read_sf(trans_path) %>% + dplyr::rename(geometry = geom) + +is_validated_transects <- validate_transects(transects, "id") + +is_flowline_validated_transects <- validate_transects_against_flowlines(transects, flines, "id") + +if (!(is_validated_transects && is_flowline_validated_transects)) { + stop(trans_path, " transects failed validity check:", + "\n > is_validated_transects: ", is_validated_transects, + "\n > is_flowline_validated_transects: ", is_flowline_validated_transects + ) +} else { + message("Transects look good to go!") +} + +output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T) +cs_pts_files <- output_files[grepl(".parquet", output_files)] + +for (i in seq_along(cs_pts_files)) { + path <- cs_pts_files[i] + message(i, " - Checking cross section points validity for > '", basename(path), "'") + + cs_pts <-arrow::read_parquet(path) + + is_validated_cs_pts <- validate_cs_pts(cs_pts, crosswalk_id = "id") + is_transect_validated_cs_pts <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id") + + is_valid <- is_validated_cs_pts & is_transect_validated_cs_pts + + if(!is_valid) { + stop(path, " cs_pts failed validity check:", + "\n > is_validated_cs_pts: ", is_validated_cs_pts, + "\n > is_transect_validated_cs_pts: ", is_transect_validated_cs_pts + ) + } else { + message("'", basename(path), "' is valid and is good to go!") + } + + # message("'", basename(path), "' is validated") + + } + +# og_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg")) +# post_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg")) +# dplyr::filter(transects, id == "wb-10813") %>% +# sf::st_buffer(2500) +# +# bb <- +# dplyr::filter(transects, id == "wb-10813") %>% +# sf::st_buffer(5000) %>% +# sf::st_bbox() %>% +# sf::st_as_sfc() %>% +# sf::st_sf() +# +# sf::st_filter(transects, bb) +# sf::st_filter(transects, bb) +# sf::st_filter(transects, bb) +# +# mapview::mapview(dplyr::filter(flines, id == "wb-10813")) + +# mapview::mapview( sf::st_filter(transects, bb) , color = "green") + +# mapview::mapview( sf::st_filter(og_trans, bb) , color = "red") + +# mapview::mapview( sf::st_filter(post_trans, bb) , color = "gold") +# +# transects %>% +# sf::st_filter( +# bb +# ) %>% mapview::mapview() +# +# mapview::mapview() + mapview::mapview(dplyr::filter(flines, id == "wb-10813")) + +# mapview::mapview( dplyr::filter(transects, id == "wb-10813"), color = "green") + +# mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red") +# +# +# dplyr::filter(og_trans, id == "wb-10813") +# dplyr::filter(post_trans, id == "wb-10813") +# dplyr::filter(transects, id == "wb-10813") +# +# mapview::mapview(dplyr::filter(flines, id == "wb-10813")) + +# mapview::mapview( dplyr::filter(transects, id == "wb-10813"), color = "green") + +# mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red") +# +# transects %>% +# dplyr::group_by(id) %>% +# dplyr::filter(cs_id == max(cs_id)) %>% +# dplyr::arrange(cs_id) +# +# # cs_pts_files <- +# cs_pts <-arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet")) +# +# is_validated_transects <- validate_transects(transects, "id") +# is_flowline_validated_transects <- validate_transects_against_flowlines(transects, flines, "id") +# +# is_validated_cs_pts <- validate_cs_pts(cs_pts, crosswalk_id = "id") +# is_transect_validated_cs_pts <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id") + +# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))_ + +validate_transects_self_intersections <- function(transects) { + return( + nrow(transects) == nrow(hydrofabric3D:::rm_self_intersections(transects)) + ) +} + +validate_transects_cs_id_enumeration <- function(transects, crosswalk_id = NULL) { + + # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered + mismatches <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::arrange(cs_measure, .by_group = TRUE) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::filter(cs_id != new_cs_id) + + # FALSE if there are any transects with different cs_ids to the newly created cs_id + # Otherwise TRUE + has_valid_cs_ids <- !(nrow(mismatches) > 0) + + return( + has_valid_cs_ids + ) + +} + +validate_transects_cs_length <- function(transects, crosswalk_id = NULL) { + + # re calculate transect geometry length and compare to cs_lengthm column + wrong_lengths <- + transects %>% + dplyr::mutate( + new_cs_length = as.numeric(sf::st_length(.)) + ) %>% + dplyr::filter(cs_lengthm != new_cs_length) + + # FALSE if there are any transects with different cs_lengthm than the freshly calculated new_cs_length + has_correct_lengths <- !(nrow(wrong_lengths) > 0) + + return( + has_correct_lengths + ) + +} + +validate_transects_unique_ids <- function(transects, crosswalk_id = NULL) { + + duplicate_ids <- + transects %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::select(tmp_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::count() %>% + dplyr::ungroup() %>% + dplyr::filter(n > 1) + + # FALSE if there are ANY rows in the duplicate_ids dataframe above + # (i.e. a count of greater than 1 for any tmp_id (<crosswalk_id>_cs_id)) + has_unique_ids <- !(nrow(duplicate_ids) > 0) + + return( + has_unique_ids + ) + +} + +validate_transects_cs_measure <- function(transects) { + + min_cs_measure <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::pull(cs_measure) %>% + min() + + max_cs_measure <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::pull(cs_measure) %>% + max() + + # cs_measure should always be: + # greater than or equal to 0 AND + # less than or equal to 100 as its a percentage along a flowline + has_valid_cs_measure <- min_cs_measure >= 0 & max_cs_measure <= 100 + + return( + has_valid_cs_measure + ) + +} + +validate_transects_has_complete_geometries <- function(transects, crosswalk_id = NULL) { + + has_empty_geoms <- + transects %>% + sf::st_is_empty() %>% + any() + + return( + !has_empty_geoms + ) + +} + +validate_transects_has_crs <- function(transects) { + + missing_crs <- + transects %>% + sf::st_crs() %>% + is.na() + + return( + !missing_crs + ) + +} + +validate_transects <- function(transects, + crosswalk_id = NULL + ) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry") + + # validate dataframe has all correct columns + has_all_valid_cols <- hydrofabric3D:::validate_df( + x = transects, + cols = REQUIRED_COLS, + obj_name = "transects" + ) + + # Validate every flowline (id) has a cs_id of 1:number of transects + has_valid_cs_ids <- validate_transects_cs_id_enumeration(transects, crosswalk_id = crosswalk_id) + + # Validate there are no self intersections + has_no_self_intersections <- validate_transects_self_intersections(transects) + + # validate the cs_lengthm column equals the actual transect geometry length + has_correct_lengths <- validate_transects_cs_length(transects, crosswalk_id = crosswalk_id) + + # validate no duplicate id / cs_id combos + has_unique_ids <- validate_transects_unique_ids(transects, crosswalk_id = crosswalk_id) + + # validate cs measure is never greater than 100 (i think) + has_valid_cs_measure <- validate_transects_cs_measure(transects) + + # make sure transects have no empty geometries + has_complete_geoemetries <- validate_transects_has_complete_geometries(transects) + + # make sure transects have a CRS + has_crs <- validate_transects_has_crs(transects) + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_validated_transects <- all( + c( + has_all_valid_cols, + has_valid_cs_ids, + has_no_self_intersections, + has_correct_lengths, + has_unique_ids, + has_valid_cs_measure, + has_complete_geoemetries, + has_crs + ) + ) + + return(is_validated_transects) + +} + +# all ids in transects are in flowlines +validate_transects_ids_in_flowlines <- function(transects, flowlines, crosswalk_id = NULL) { + + # flowlines <- flines + + transect_ids <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id)) %>% + dplyr::pull(dplyr::any_of(crosswalk_id)) %>% + unique() + + flowline_ids <- + flowlines %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id)) %>% + dplyr::pull(dplyr::any_of(crosswalk_id)) %>% + unique() + + all_transect_ids_in_flowline_ids <- all(transect_ids %in% flowline_ids) + + return( + all_transect_ids_in_flowline_ids + ) + +} + +# make sure no transect crosses more than a single flowline, a single time +validate_transects_flowline_intersections <- function(transects, flowlines) { + + # flowlines <- flines + return ( + nrow(transects) == nrow(hydrofabric3D:::rm_multiflowline_intersections(transects, flowlines)) + ) +} + +# validate 2 SF objects have the same CRS +validate_same_crs <- function(x, y) { + + return ( + sf::st_crs(x) == sf::st_crs(y) + ) + +} + +validate_transects_against_flowlines <- function(transects, + flowlines, + crosswalk_id = NULL + ) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry") + + # validate dataframe has all correct columns + has_all_valid_cols <- hydrofabric3D:::validate_df( + x = transects, + cols = REQUIRED_COLS, + obj_name = "transects" + ) + + # all ids in transects are in flowlines + all_transect_ids_in_flowline_ids <- validate_transects_ids_in_flowlines(transects, flowlines, crosswalk_id = crosswalk_id) + + # transects only intersects a single flowline, a single time + has_valid_flowline_intersects <- validate_transects_flowline_intersections(transects, flowlines) + + # transects and flowlines have the same CRS + has_same_crs <- validate_same_crs(transects, flowlines) + + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_flowline_validated_transects <- all( + c( + has_all_valid_cols, + all_transect_ids_in_flowline_ids, + has_valid_flowline_intersects, + has_same_crs + ) + ) + + return(is_flowline_validated_transects) + +} + +# validate_transects_against_cs_pts <- function( +# transects, +# cs_pts, +# crosswalk_id = NULL +# ) { +# +# # # standardize geometry name +# # transects <- hydroloom::rename_geometry(transects, "geometry") +# +# REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry") +# +# # validate dataframe has all correct columns +# has_all_valid_cols <- hydrofabric3D:::validate_df( +# x = transects, +# cols = REQUIRED_COLS, +# obj_name = "transects" +# ) +# } + +validate_cs_pts_cs_id_enumeration <- function(cs_pts, crosswalk_id = NULL) { + + # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered + mismatches <- + cs_pts %>% + # dplyr::slice(1:150) %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::arrange(cs_id, .by_group = TRUE) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::filter(cs_id != new_cs_id) + + # FALSE if there are any transects with different cs_ids to the newly created cs_id + # Otherwise TRUE + has_valid_cs_ids <- !(nrow(mismatches) > 0) + + return( + has_valid_cs_ids + ) + +} + +validate_cs_pts_pt_id_enumeration <- function(cs_pts, crosswalk_id = NULL) { + + # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section + mismatches <- + cs_pts %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::arrange(pt_id, .by_group = TRUE) %>% + dplyr::mutate( + new_pt_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() %>% + dplyr::filter(pt_id != new_pt_id) + + # FALSE if there are any cs_pts with pt_ids different from the newly created new_pt_id + # Otherwise TRUE + has_valid_pt_ids <- !(nrow(mismatches) > 0) + + return( + has_valid_pt_ids + ) + +} + +validate_cs_pts_relative_distance <- function(cs_pts, crosswalk_id = NULL) { + + # make sure relative distance is greater than or equal to 0 + min_relative_distance <- + cs_pts %>% + # dplyr::slice(1:50) %>% + sf::st_drop_geometry() %>% + dplyr::pull(relative_distance) %>% + min() + + + # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section + rel_dist_check <- + cs_pts %>% + # dplyr::slice(1:50) %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::summarise( + cs_lengthm = max(cs_lengthm), + # min_rel_dist = min(relative_distance), + max_rel_dist = max(relative_distance) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + # TODO: as long as the lengths are within 1 meter, thats equal + is_valid_relative_dist = abs(cs_lengthm - max_rel_dist) <= 1 + # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01) + ) + + # dplyr::filter(max_rel_dist > cs_lengthm) + + # relative distance is always greater than or equal to 0 and less than the cross sections length + has_valid_relative_dist_min <- min_relative_distance >= 0 + has_valid_relative_dist_maximums <- all(rel_dist_check$is_valid_relative_dist) + + return( + has_valid_relative_dist_min && has_valid_relative_dist_maximums + ) + +} + +validate_cs_pts_point_types <- function(cs_pts) { + + # make sure only "left_bank", "right_bank", "channel", and "bottom" values exist in cs_pts point_type column + valid_point_types <- c("left_bank", "right_bank", "channel", "bottom") + + # unique point types in cs_pts + unique_point_types <- unique(cs_pts$point_type) + + has_only_valid_point_types <- all(unique_point_types %in% valid_point_types) + + return( + has_only_valid_point_types + ) + +} + +validate_cs_pts <- function( + cs_pts, + crosswalk_id = NULL +) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", + "relative_distance", "cs_lengthm", "X", "Y", "Z", "Z_source", + "class", "point_type", "valid_banks", "has_relief" + ) + + # validate dataframe has all correct columns + has_all_valid_cols <- hydrofabric3D:::validate_df( + x = cs_pts, + cols = REQUIRED_COLS, + obj_name = "cs_pts" + ) + + # make sure valid cs_ids + has_valid_cs_pts_cs_ids <- validate_cs_pts_cs_id_enumeration(cs_pts, crosswalk_id = crosswalk_id) + + # make sure valid pt_ids + has_valid_cs_pts_pt_ids <- validate_cs_pts_pt_id_enumeration(cs_pts, crosswalk_id = crosswalk_id) + + # check cs_pts have only valid relative_distance values + has_valid_relative_distances <- validate_cs_pts_relative_distance(cs_pts, crosswalk_id = crosswalk_id) + + has_valid_point_types <- validate_cs_pts_point_types(cs_pts) + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_validated_cs_pts <- all( + c( + has_all_valid_cols, + has_valid_cs_pts_cs_ids, + has_valid_cs_pts_pt_ids, + has_valid_relative_distances, + has_valid_point_types + ) + ) + + return(is_validated_cs_pts) + +} + +# validate all cs_pts id/cs_ids are in the transects +validate_cs_pt_ids_in_transects <- function(cs_pts, transects, crosswalk_id = NULL) { + + # flowlines <- flines + cs_pts_ids <- + cs_pts %>% + sf::st_drop_geometry() %>% + hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id) + + transect_ids <- + transects %>% + sf::st_drop_geometry() %>% + hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id) + + + all_cs_pts_ids_in_transects <- all(cs_pts_ids %in% transect_ids) + all_transect_ids_in_cs_pts <- all(transect_ids %in% cs_pts_ids) + same_number_of_ids <- length(cs_pts_ids) == length(transect_ids) + + is_valid_cs_pts_ids <- all( + c( + all_cs_pts_ids_in_transects, + all_transect_ids_in_cs_pts, + same_number_of_ids + ) + ) + + return( + is_valid_cs_pts_ids + ) + +} + +validate_cs_pts_length_against_transects <- function(cs_pts, transects, crosswalk_id = NULL) { + + cs_pt_lengths <- + cs_pts %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::rename( + cs_pts_lengthm = cs_lengthm + ) + + transect_lengths <- + transects %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() + + lengths_check <- + dplyr::left_join( + transect_lengths, + cs_pt_lengths, + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::mutate( + # TODO: as long as the lengths are within 1 meter, thats equal + approx_equal_lengths = abs(cs_lengthm - cs_pts_lengthm) <= 1 + # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01) + ) + + all_lengths_are_equal <- all(lengths_check$approx_equal_lengths) + + return( + all_lengths_are_equal + ) + +} + +validate_cs_pts_against_transects <- function( + cs_pts, + transects, + crosswalk_id = NULL +) { + + # # standardize geometry name + # transects <- hydroloom::rename_geometry(transects, "geometry") + + # make sure all id/cs_id combos are in both transects and cs_pts + has_valid_cs_pts_ids <- validate_cs_pt_ids_in_transects(cs_pts, transects, crosswalk_id = crosswalk_id) + + # make sure cs_lengthm matches from transects to cs_pts + has_matching_lengths <- validate_cs_pts_length_against_transects(cs_pts, transects, crosswalk_id = crosswalk_id) + + # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?) + is_transect_validated_cs_pts <- all( + c( + has_valid_cs_pts_ids, + has_matching_lengths + ) + ) + + return(is_transect_validated_cs_pts) + +} + +# og_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) +# new_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")) + +# new_transects %>% +# dplyr::arrange(cs_lengthm) %>% +# dplyr::slice(1:1000) %>% +# mapview::mapview() +# +# hydrofabric3D:::rm_self_intersections(new_transects) + +# -------------------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------------------------- +# +og_trans <- sf::read_sf( "/Users/anguswatters/Desktop/lynker-spatial/domain_with_fema/transects/pre_extension_transects.gpkg") + +transects <- sf::read_sf(output_files[[4]]) + +transects2 <- hydrofabric3D:::rm_self_intersections(transects) + +old_ids <- transects %>% hydrofabric3D::get_unique_tmp_ids("id") +new_ids <- transects2 %>% hydrofabric3D::get_unique_tmp_ids("id") +diff_tmp_ids <- old_ids[!old_ids %in% new_ids] +strsplit(diff_ids, "_") + +diff_ids <- lapply(diff_tmp_ids, function(i) { + strsplit(i, "_")[[1]][1] +}) %>% unlist() + +diff_trans <- + transects %>% + # hydrofabric3D::add_tmp_id(x = "id") %>% + dplyr::filter(id %in% diff_ids) + # sf::st_buffer(50) %>% + # sf::st_bbox() %>% + # sf::st_as_sfc() %>% + # sf::st_sf() + +og_diff_trans <- + og_trans %>% + # hydrofabric3D::add_tmp_id(x = "id") %>% + dplyr::filter(id %in% diff_ids) + +mapview::mapview(diff_trans, color = "green") + + mapview::mapview(og_diff_trans, color = "red") + +old_trans <- transects[lengths(sf::st_intersects(transects, diff_bb)) > 0, ] + +# final_cross_sections %>% +# dplyr::filter(id == "wb-1000") %>% +# dplyr::rename(hy_id = id) %>% +# hydrofabric3D::plot_cs_pts(color = "point_type") + +# --------------------------------------------------------------------------------- +# ---- Write final cross section points data ---- +# ---- Diffusive Domain DEM + FEMA + ML +# --------------------------------------------------------------------------------- + +CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet") + +message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'") +# transects <- +# sum(is.na(final_cross_sections$id)) +# sum(is.na(final_cross_sections$cs_id)) +# sum(is.na(final_cross_sections$pt_id)) +# sum(is.na(final_cross_sections$X)) + +# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet) +arrow::write_parquet( + # dplyr::select(final_cs, + # -is_dem_point + # ), + cross_sections, + CROSS_SECTIONS_ML_OUTPUT_PATH +) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R deleted file mode 100644 index b23b438..0000000 --- a/runners/cs_runner/download_fema100.R +++ /dev/null @@ -1,123 +0,0 @@ -# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR" -# BASE_DIR is defined within runners/workflow/root_dir.R - -# NOTE: The lynker-hydrofabric S3 bucket is private at the moment - -# load config variables -source("runners/cs_runner/config_vars.R") - -# ------------------------------------------------------------------------------------- -# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ---- -# ------------------------------------------------------------------------------------- - -if (!dir.exists(FEMA_FGB_PATH)) { - message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'")) - dir.create(FEMA_FGB_PATH) -} - -# create geojsons directory (if not exists) -if (!dir.exists(FEMA_GEOJSON_PATH)) { - message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'")) - dir.create(FEMA_GEOJSON_PATH) -} - -# create directory for cleaned FEMA geometries (if not exists) -if (!dir.exists(FEMA_CLEAN_PATH)) { - message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'")) - dir.create(FEMA_CLEAN_PATH) -} - -# create directory for cleaned FEMA geometries as geopackages (if not exists) -if (!dir.exists(FEMA_GPKG_PATH)) { - message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) - dir.create(FEMA_GPKG_PATH) -} - -# create simplified geojsons directory (if not exists) -if (!dir.exists(FEMA_SIMPLIFIED_PATH)) { - message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'")) - dir.create(FEMA_SIMPLIFIED_PATH) -} - -# create simplified geojsons directory (if not exists) -if (!dir.exists(FEMA_DISSOLVED_PATH)) { - message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'")) - dir.create(FEMA_DISSOLVED_PATH) -} - -# create exploded geojsons directory (if not exists) -if (!dir.exists(FEMA_EXPLODED_PATH)) { - message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'")) - dir.create(FEMA_EXPLODED_PATH) -} - -# create FEMA GPKG Bounding Boxes directory (if not exists) -if (!dir.exists(FEMA_GPKG_BB_PATH)) { - message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) - dir.create(FEMA_GPKG_BB_PATH) -} - -if (!dir.exists(FEMA_FGB_BB_PATH)) { - message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'")) - dir.create(FEMA_FGB_BB_PATH) -} - -# ------------------------------------------------------------------------------------- -# ---- Get list of FEMA FGB files in S3 bucket ---- -# ------------------------------------------------------------------------------------- - -# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern -fema_list_command <- paste0('#!/bin/bash - # AWS S3 Bucket and Directory information - S3_BUCKET="', FEMA_S3_DIR, '" - - # Regular expression pattern to match object keys - PATTERN=".fgb$" - - # AWS CLI command to list objects in the S3 bucket and use grep to filter them - S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN") - - echo "$S3_OBJECTS"' -) - -# ------------------------------------------------------------------------------------- -# ---- Get the S3 buckets object keys for FEMA 100 FGB files ---- -# ------------------------------------------------------------------------------------- - -# Run the script to get a list of the nextgen geopackages that matched the regular expression above -FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) - -# create bucket object URIs -# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, FEMA_BUCKET_KEYS) - -# ------------------------------------------------------------------------------------- -# ---- Download FEMA 100 year FGB files from S3 ---- -# ------------------------------------------------------------------------------------- - -# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet -for (key in FEMA_BUCKET_KEYS) { - local_save_path <- paste0(FEMA_FGB_PATH, "/", key) - - if(!file.exists(local_save_path)) { - copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", AWS_PROFILE) - - message("S3 object:\n > '", FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, "'") - message("Downloading S3 object to:\n > '", local_save_path, "'") - # message("Copy command:\n > '", copy_cmd, "'") - - system(copy_cmd) - - message(" > '", key, "' download complete!") - message("----------------------------------") - } else { - message("File already exists at:\n > '", local_save_path, "'") - } -} - - - - - - - - diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R index 4d2f223..9dda641 100644 --- a/runners/cs_runner/new_domain.R +++ b/runners/cs_runner/new_domain.R @@ -9,7 +9,7 @@ source("runners/cs_runner/utils.R") # install.packages("devtools") # # transect bucket prefix -# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/") +# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") # paths to NEW DOMAIN datasets # NEW_DOMAIN_FILES <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE) diff --git a/runners/cs_runner/test_cs_pts.R b/runners/cs_runner/test_cs_pts.R new file mode 100644 index 0000000..7031bb6 --- /dev/null +++ b/runners/cs_runner/test_cs_pts.R @@ -0,0 +1,256 @@ +# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU +source("runners/cs_runner/config.R") + +# # load libraries +library(hydrofabric3D) +library(dplyr) +library(sf) + +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) + +# paths to nextgen datasets +transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) +transect_files <- transect_files[!grepl("updated_", transect_files)] + +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) + +# reference features dataframe +ref_df <- data.frame( + vpu = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = REF_FEATURES +) + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = NEXTGEN_FILES, + y = transect_files, + base = BASE_DIR +) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) + +# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# Save parquet locally and upload to specified S3 bucket + +# output_path <- paste0(BASE_DIR, "/test_out/") + +for (i in 20:nrow(path_df)) { + + # i = 8 + + start <- Sys.time() + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) + + # model attributes file and full path + transect_file <- path_df$y[i] + transect_path <- paste0(TRANSECTS_DIR, transect_file) + + # model attributes file and full path + ref_file <- path_df$ref_file[i] + ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) + + # current VPU being processed + VPU <- path_df$vpu[i] + + start <- Sys.time() + + message("Creating VPU ", VPU, + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - transects: '", transect_file, "'", + "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) + + ################### + message("Reading in transects...\n > ", transect_file) + # read in transects data + transects <- sf::read_sf(transect_path) + + message("Reading in flowlines... \n > ", nextgen_file) + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + message("Reading in waterbodies... \n > ", ref_file) + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] + + rm(waterbodies) + gc() + + start_cs_pts <- Sys.time() + + # # ------------------------------------------------------------------------ + # # ------ TESTING DATA ------- + # # ------------------------------------------------------------------------ + # flines <- + # flines %>% + # dplyr::slice(1:3500) + # + # transects <- + # transects %>% + # dplyr::filter(hy_id %in% flines$id) + + # ------------------------------------------------------------------------ + + message("Extracting cross section points (", start_cs_pts, ")") + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 1: Extract cs points from DEM ---- + # ---------------------------------------------------------------------------------------------------------------- + # system.time({ + # tmp <- transects[1:10,] + # + # get cross section point elevations + cs_pts <- hydrofabric3D::cross_section_pts( + + cs = transects, + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = DEM_PATH + ) + + # }) + + # ---------------------------------------------------------------------------------------------------------------- + # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ---- + # ---------------------------------------------------------------------------------------------------------------- + + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg") + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg") + + # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg") + # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg") + # cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() + # + # cs_pts %>% + # hydrofabric3D::drop_incomplete_cs_pts("hy_id") + + # system.time({ + + # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points + cs_pts <- + # cs_pts2 <- + cs_pts %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::filter(!any(is.na(Z))) %>% + # dplyr::ungroup() %>% + hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% + hydrofabric3D::classify_points( + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + # }) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id + + # output_path <- paste0(BASE_DIR, "/test_out/") + # ---------------------------------------------------------------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + # fixed_pts <- + cs_pts <- + cs_pts %>% + hydrofabric3D:::pts_to_XY() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + + ids_before_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + message("Aligning banks and smoothing bottoms...") + cs_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = cs_pts, crosswalk_id = "hy_id") + # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + + ids_after_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + message("Reclassifying cross section points...") + cs_pts <- hydrofabric3D::classify_points( + cs_pts = cs_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + # if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + # message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + # } else { + # message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + # } + # + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # # name of file and path to save transects gpkg too + out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet") + out_path <- paste0(BASE_DIR, "/test_out/", out_file) + + message("Saving cross section points to:\n - filepath: '", out_path, "'") + + # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) + arrow::write_parquet(cs_pts, out_path) + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + end <- Sys.time() + + message("Finished cross section point generation for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(cs_pts) + gc() + gc() +} diff --git a/runners/cs_runner/test_fix_cs_pts.R b/runners/cs_runner/test_fix_cs_pts.R new file mode 100644 index 0000000..cbfaae8 --- /dev/null +++ b/runners/cs_runner/test_fix_cs_pts.R @@ -0,0 +1,278 @@ +# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU +source("runners/cs_runner/config.R") + +# # load libraries +library(hydrofabric3D) +library(dplyr) +library(sf) + +# paths to nextgen datasets +NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE) + +# paths to nextgen datasets +transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE) +transect_files <- transect_files[!grepl("updated_", transect_files)] + +REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE) + +START_CS_PTS_DIR <- '/Users/anguswatters/Desktop/lynker-spatial/test_out' +start_cs_pts_paths <- list.files(START_CS_PTS_DIR, full.names = T) + +END_CS_PTS_DIR <- '/Users/anguswatters/Desktop/lynker-spatial/test_fix_out' + +# reference features dataframe +ref_df <- data.frame( + vpu = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), + ref_file = REF_FEATURES +) + +# ensure the files are in the same order and matched up by VPU +path_df <- align_files_by_vpu( + x = NEXTGEN_FILES, + y = transect_files, + base = BASE_DIR +) %>% + dplyr::left_join( + ref_df, + by = "vpu" + ) +path_df$cs_pts_path <- start_cs_pts_paths + +# path_df + +# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line, +# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data. +# Save parquet locally and upload to specified S3 bucket + +# output_path <- paste0(BASE_DIR, "/test_out/") + +for (i in 15:nrow(path_df)) { + # i = 8 + + start <- Sys.time() + + start_cs_pts_path <- path_df$cs_pts_path[i] + + # nextgen file and full path + nextgen_file <- path_df$x[i] + nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file) + + # model attributes file and full path + transect_file <- path_df$y[i] + transect_path <- paste0(TRANSECTS_DIR, transect_file) + + # model attributes file and full path + ref_file <- path_df$ref_file[i] + ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file) + + # current VPU being processed + VPU <- path_df$vpu[i] + + start <- Sys.time() + + message("Creating VPU ", VPU, + " cross section points:\n - flowpaths: '", nextgen_file, + "'\n - start cs pts: '", basename(start_cs_pts_path), "'", + "'\n - transects: '", transect_file, "'", + "\n - waterbodies: '", ref_file, "'", + "'\n - start time: '", start, "'" + ) + + ################### + message("Reading in starting CS points...\n > ", basename(start_cs_pts_path)) + cs_pts <- arrow::read_parquet(start_cs_pts_path) + + message("Reading in transects...\n > ", transect_file) + # read in transects data + transects <- sf::read_sf(transect_path) + + message("Reading in flowlines... \n > ", nextgen_file) + # read in nextgen data + flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + message("Reading in waterbodies... \n > ", ref_file) + # read in waterbodies reference features layer + waterbodies <- sf::read_sf(ref_path, layer = "waterbodies") + + # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies + feature_subsets <- wb_intersects(flines, transects, waterbodies) + + # replace flowlines and transects objects with updated versions in "updated_features" + flines <- flines[feature_subsets$valid_flowlines, ] + transects <- transects[feature_subsets$valid_transects, ] + + rm(waterbodies) + gc() + + start_cs_pts <- Sys.time() + flines <- dplyr::rename(flines, hy_id = id) + + # test_ids <- + # flines %>% + # dplyr::slice(1:100) %>% + # .$hy_id %>% + # unique() + # flines <- + # flines %>% + # dplyr::filter( + # hy_id %in% test_ids + # ) + # transects <- + # transects %>% + # dplyr::filter( + # hy_id %in% test_ids + # ) + # cs_pts <- + # cs_pts %>% + # dplyr::filter( + # hy_id %in% test_ids + # ) + + ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id + + # # ------------------------------------------------------------------------ + # # ------ TESTING DATA ------- + # # ------------------------------------------------------------------------ + # system.time({ + fixed_pts <- hydrofabric3D::get_improved_cs_pts( + cs_pts = cs_pts, # cross section points generated from hydrofabric3D::cross_section_pts() + net = flines, # original flowline network + # net = flines, # original flowline network + transects = transects, # original transect lines + crosswalk_id = "hy_id", + points_per_cs = NULL, + min_pts_per_cs = 10, # number of points per cross sections + dem = DEM_PATH, # DEM to extract points from + scale = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief" + fix_ids = FALSE, + verbose = TRUE + ) + # }) + # fixed_pts + + # fixed_pts %>% + # # cs_pts %>% + # sf::st_drop_geometry() %>% + # # dplyr::left_join( + # # stream_order, + # # by = "hy_id" + # # ) %>% + # dplyr::select(dplyr::any_of("hy_id"), cs_id, + # # vpu, order, + # valid_banks, has_relief) %>% + # dplyr::group_by( + # dplyr::across(dplyr::any_of(c("hy_id", "cs_id"))) + # ) %>% + # dplyr::slice(1) %>% + # dplyr::ungroup() %>% + # # dplyr::group_by( + # # dplyr::across(dplyr::any_of(c("vpu", "order"))) + # # ) %>% + # dplyr::count(valid_banks, has_relief) + + + # transects %>% + # sf::st_drop_geometry() %>% + # dplyr::group_by(id, cs_id) + # fixed_pts2$is_extended %>% sum() + ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + # ---------------------------------------------------------------------------------------------------------------- + # ---- Cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # classify the cross section points + fixed_pts <- + fixed_pts %>% + # dplyr::mutate( + # X = sf::st_coordinates(.)[,1], + # Y = sf::st_coordinates(.)[,2] + # ) %>% + sf::st_drop_geometry() %>% + dplyr::select( + hy_id, cs_id, pt_id, + cs_lengthm, + relative_distance, + X, Y, Z, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024) + ) + + # # Drop point geometries, leaving just X, Y, Z values + # fixed_pts <- sf::st_drop_geometry(fixed_pts) + + # add Z_source column for source of elevation data + fixed_pts <- + fixed_pts %>% + dplyr::mutate( + Z_source = CS_SOURCE + ) %>% + dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, + class, point_type, + bottom, left_bank, right_bank, valid_banks, has_relief) + + ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Aligning banks and smoothing bottoms...") + fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id") + + ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + message("Reclassifying cross section points...") + fixed_pts <- hydrofabric3D::classify_points( + cs_pts = fixed_pts, + crosswalk_id = "hy_id", + pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + ) + + ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id + + if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) { + message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points") + } else { + message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points") + } + + if(all(ids_before_align %in% ids_after_align)) { + message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing") + } else { + message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing") + } + + if(all(ids_after_align %in% ids_after_reclassify)) { + message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION") + } else { + message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION") + } + + # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id) + + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------- + # ---- Upload the cross section points parquet to S3 ---- + # ---------------------------------------------------------------------------------------------------------------- + + # # name of file and path to save transects gpkg too + out_file <- paste0("nextgen_fixed_", path_df$vpu[i], "_cross_sections.parquet") + out_path <- paste0(BASE_DIR, "/test_fix_out/", out_file) + + message("Saving cross section points to:\n - filepath: '", out_path, "'") + + # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet) + arrow::write_parquet(fixed_pts, out_path) + # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg") + # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg") + + end <- Sys.time() + + message("Finished cross section point generation for VPU ", VPU) + message("- Completed at: ", end) + message("==========================") + + rm(cs_pts, fixed_pts) + gc() + gc() +} diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R index e52352b..ef43e18 100644 --- a/runners/cs_runner/utils.R +++ b/runners/cs_runner/utils.R @@ -1,47 +1,160 @@ # Create an empty file structure # base_dir: character, top level directory path -# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist) - # Directory tree: # base_dir/ -# └── domain_dirname/ -# ├── flowlines/ +# └── lynker-spatial/ +# ├── hydrofabric/ # ├── dem/ -# ├── transects/ -# ├── cross_sections/ -# └── cs_pts/ -create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) { +# ├── vrt/ +# ├── tif/ +# ├── cs-extension-polygons/ +create_local_hydrofabric_base_dirs <- function(base_dir) { + # build paths - domain_dir <- paste0(base_dir, "/", domain_dirname) - flowlines_dir <- paste0(domain_dir, "/flowlines") - domain_subset_dir <- paste0(domain_dir, "/domain_subset") - dem_dir <- paste0(domain_dir, "/dem") - transects_dir <- paste0(domain_dir, "/transects") - cross_sections_dir <- paste0(domain_dir, "/cross_sections") - cs_pts_dir <- paste0(domain_dir, "/cs_pts") - vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") - if(with_output) { - output_dir <- paste0(domain_dir, "/outputs") + # DEM dirs + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(dem_dir, "vrt") + dem_tif_dir <- file.path(dem_dir, "tif") + + # polygons for transect extensions + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + # create base directories + create_if_not_exists(base_dir) + create_if_not_exists(hydrofabric_dir) + + # DEM dirs + create_if_not_exists(dem_dir) + create_if_not_exists(dem_vrt_dir) + create_if_not_exists(dem_tif_dir) + + # extension polygons + create_if_not_exists(cs_extension_polygons_dir) + + + # Create FEMA folders + create_if_not_exists(fema_dir) + create_if_not_exists(fema_fgb_dir) + create_if_not_exists(fema_geojson_dir) + create_if_not_exists(fema_clean_dir) + create_if_not_exists(fema_gpkg_dir) + create_if_not_exists(fema_by_vpu_dir) + + for (path in fema_by_vpu_subdirs) { + create_if_not_exists(path) } - create_if_not_exists <- function(dir_path) { - if (!dir.exists(dir_path)) { - dir.create(dir_path, recursive = TRUE) - message("Created directory: '", dir_path, "'\n") - } +} + +get_vpu_ids <- function() { + VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', + '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') + # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + + return(VPU_IDS) + +} + +# # Base directory for local file storage +# BASE_DIR <- '/Volumes/T7SSD/lynker-spatial' +# base_dir <- BASE_DIR +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb") +# FEMA_GEOJSON_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson") +# FEMA_CLEAN_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean") +# FEMA_GPKG_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg") +# FEMA_BY_VPU_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU") +# +# +# VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', +# '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') +# # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID +# +# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ") +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# Create an empty file structure for a new version within a specified base_dir +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ + # ├── version_number/ + # ├── network/ + # ├── transects/ + # ├── cross-sections/ + # ├── dem/ + # ├── dem-ml/ + # ├── dem-coastal-bathy/ + # ├── dem-points/ +create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { + # version = "v3.0" + # base_dir <- BASE_DIR + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + version_base_dir <- paste0(hydrofabric_dir, "/", version) + + # polygons for transect extensions + ml_dir <- paste0(version_base_dir, "/ml") + + # reference features + ref_features_dir <- paste0(version_base_dir, "/reference-features") + + # conus network gpkg + network_dir <- paste0(version_base_dir, "/network") + + # transects + transects_dir <- paste0(version_base_dir, "/transects") + + # cross sections dirs + cross_sections_dir <- paste0(version_base_dir, "/cross-sections") + cross_sections_dem_dir <- paste0(cross_sections_dir, "/dem") + cross_sections_ml_dir <- paste0(cross_sections_dir, "/dem-ml") + cross_sections_coastal_bathy_dir <- paste0(cross_sections_dir, "/dem-coastal-bathy") + cross_sections_dem_pts_dir <- paste0(cross_sections_dir, "/dem-points") + + if(with_output) { + output_dir <- paste0(version_base_dir, "/outputs") } - # create directories - create_if_not_exists(domain_dir) - create_if_not_exists(flowlines_dir) - create_if_not_exists(domain_subset_dir) - create_if_not_exists(dem_dir) + # create version BASE dir + create_if_not_exists(version_base_dir) + + # CONUS dir + create_if_not_exists(network_dir) + + # ML data + create_if_not_exists(ml_dir) + + # reference features data + create_if_not_exists(ref_features_dir) + + # transects create_if_not_exists(transects_dir) + + # CS pts create_if_not_exists(cross_sections_dir) - create_if_not_exists(cs_pts_dir) - create_if_not_exists(vpu_subsets_dir) + create_if_not_exists(cross_sections_dem_dir) + create_if_not_exists(cross_sections_ml_dir) + create_if_not_exists(cross_sections_coastal_bathy_dir) + create_if_not_exists(cross_sections_dem_pts_dir) if(with_output) { create_if_not_exists(output_dir) @@ -49,39 +162,101 @@ create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE } -# get path strings for a domain dir (based of a base dir and domain dirname) -# NOTE: this does NOT guarentee that these folders exist, -# NOTE: it just gets the paths if they were created by create_new_domain_dirs() -get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) { +create_if_not_exists <- function(dir_path) { + if (!dir.exists(dir_path)) { + dir.create(dir_path, recursive = TRUE) + message("Created directory: '", dir_path, "'\n") + } +} + +# get a list of top level directories for main directory +get_base_dir_paths <- function(base_dir) { + # base_dir = BASE_DIR + # version = "v3.0" - # build paths - domain_dir <- paste0(base_dir, "/", domain_dirname) - flowlines_dir <- paste0(domain_dir, "/flowlines") - domain_subset_dir <- paste0(domain_dir, "/domain_subset") - dem_dir <- paste0(domain_dir, "/dem") - transects_dir <- paste0(domain_dir, "/transects") - cross_sections_dir <- paste0(domain_dir, "/cross_sections") - cs_pts_dir <- paste0(domain_dir, "/cs_pts") - vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") - output_dir <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA) + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(base_dir, "dem", "vrt") + dem_tif_dir <- file.path(base_dir, "dem", "tif") + + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + return( + list( + hydrofabric_dir = hydrofabric_dir, + dem_dir = dem_dir, + dem_vrt_dir = dem_vrt_dir, + dem_tif_dir = dem_tif_dir, + cs_extension_polygons_dir = cs_extension_polygons_dir, + fema_dir = fema_dir, + fema_fgb_dir = fema_fgb_dir, + fema_geojson_dir = fema_geojson_dir, + fema_clean_dir = fema_clean_dir, + fema_gpkg_dir = fema_gpkg_dir, + fema_by_vpu_dir = fema_by_vpu_dir, + fema_by_vpu_subdirs = fema_by_vpu_subdirs + ) + ) +} +# get list of a specific directories in a version directory +get_version_base_dir_paths <- function(base_dir, version) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + version_base_dir <- file.path(hydrofabric_dir, version) + + # polygons for transect extensions + ml_dir <- file.path(version_base_dir, "ml") + + # reference features + ref_features_dir <- file.path(version_base_dir, "reference-features") + + # conus network gpkg + network_dir <- file.path(version_base_dir, "network") + + # transects + transects_dir <- file.path(version_base_dir, "transects") + + # cross sections dirs + cross_sections_dir <- file.path(version_base_dir, "cross-sections") + cross_sections_dem_dir <- file.path(cross_sections_dir, "dem") + cross_sections_ml_dir <- file.path(cross_sections_dir, "dem-ml") + cross_sections_coastal_bathy_dir <- file.path(cross_sections_dir, "dem-coastal-bathy") + cross_sections_dem_pts_dir <- file.path(cross_sections_dir, "dem-points") - # named list of file paths return( list( - base_dir = base_dir, - domain_dir = domain_dir, - flowlines_dir = flowlines_dir, - domain_subset_dir = domain_subset_dir, - dem_dir = dem_dir, + hydrofabric_dir = hydrofabric_dir, + version_base_dir = version_base_dir, + ref_features_dir = ref_features_dir, + network_dir = network_dir, + ml_dir = ml_dir, transects_dir = transects_dir, cross_sections_dir = cross_sections_dir, - cs_pts_dir = cs_pts_dir, - vpu_subsets_dir = vpu_subsets_dir, - output_dir = output_dir + cross_sections_dem_dir = cross_sections_dem_dir, + cross_sections_dem_pts_dir = cross_sections_dem_pts_dir, + cross_sections_ml_dir = cross_sections_ml_dir, + cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir ) ) - } list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { @@ -111,6 +286,49 @@ list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { return(ls_output) } +download_tiles <- function(tile_paths, output_dir) { + # output_dir <- DEM_TIF_DIR + # tile_paths + # error_tiles <- + tif_save_paths <- paste0(output_dir, "/", basename(tile_paths)) + + error_tiles <- data.frame( + tile = basename(tile_paths), + status = TRUE + ) + + for (i in seq_along(tile_paths)) { + # i = 1 + tile_path <- tile_paths[i] + tif_save_path <- tif_save_paths[i] + + message("[", i, "]", + "\n > Tile: ", basename(tile_path), + "\n > Output path: ", tif_save_path + ) + + download_tif_cmd <- paste0("curl -o ", tif_save_path, " ", tile_path) + + tryCatch({ + + tif_download_output <- system(download_tif_cmd, intern = TRUE) + message(" > Succesfully downloaded tile: ", basename(tile_path)) + + }, error = function(e) { + + message("Error downloading tile: ", basename(tile_path)) + message("ERROW below: \n ", e) + + error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE + + }) + + } + + return(error_tiles) + +} + # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to # make sure they are aligned and in the same order # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string @@ -430,139 +648,1872 @@ add_predicate_group_id <- function(polys, predicate) { } +# TODO: DElete these NEW DOMAIN functions... +# Create an empty file structure +# base_dir: character, top level directory path +# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist) -# utility function for getting transects extended and -# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement -# returns the extended version of the transects -match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { - - # transect_lines = transects - # fixed_cs_pts = fixed_pts - # crosswalk_id = CROSSWALK_ID - - fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") - transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") - - # get the counts of each point type to add this data to the transect_lines dataset - point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, - crosswalk_id = crosswalk_id) - # Check the number of cross sections that were extended - message("Subsetting cross section points generated after extending transect_lines...") - - # extract cross section points that have an "is_extended" value of TRUE - extended_pts <- - fixed_cs_pts %>% - dplyr::filter(is_extended) %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) - - # extended_pts %>% - # get_unique_tmp_ids() %>% - # length() - - # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset - update_transect_lines <- - transect_lines %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% - dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) - - cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) - - # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages - if (nrow(update_transect_lines) > 0) { - message("Updating ", nrow(update_transect_lines), " transect_lines") - - - update_transect_lines <- - update_transect_lines %>% - dplyr::rename(hy_id := !!sym(crosswalk_id)) - - update_transect_lines <- - update_transect_lines %>% - # apply extend_by_percent function to each transect line: - hydrofabric3D:::extend_by_percent( - pct = EXTENSION_PCT, - length_col = "cs_lengthm" - ) - - update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") - - update_transect_lines <- - update_transect_lines %>% - dplyr::rename(!!sym(crosswalk_id) := hy_id) - - # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) - # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) - - # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() - # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") - # and then replace with old transect_lines with the "update_transect_lines" - out_transect_lines <- - transect_lines %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% - dplyr::filter(tmp_id %in% cs_pt_uids) %>% - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% - dplyr::bind_rows( - dplyr::mutate(update_transect_lines, is_extended = TRUE) - ) - - # transect_lines %>% - # hydrofabric3D::add_tmp_id(x = "hy_id") %>% - # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% - # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points - # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids - # dplyr::bind_rows( # bring in the new updated extended transect_lines - # dplyr::mutate( - # update_transect_lines, - # is_extended = TRUE - # ) - # ) - } else { - # If no transect_lines were extended - out_transect_lines <- - transect_lines %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% - dplyr::filter(tmp_id %in% cs_pt_uids) %>% - # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% - dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# Directory tree: +# base_dir/ +# └── domain_dirname/ +# ├── flowlines/ +# ├── dem/ +# ├── transects/ +# ├── cross_sections/ +# └── cs_pts/ +create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + + if(with_output) { + output_dir <- paste0(domain_dir, "/outputs") } - # Finalize new transect_lines - out_transect_lines <- - out_transect_lines %>% - dplyr::left_join( - point_type_counts, - by = c(crosswalk_id, "cs_id") - ) %>% - dplyr::left_join( - dplyr::ungroup( - dplyr::slice( - dplyr::group_by( - dplyr::select(sf::st_drop_geometry(fixed_cs_pts), - dplyr::any_of(crosswalk_id), - cs_id, bottom, left_bank, right_bank, valid_banks, has_relief - ), - dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) - ), - 1 - ) - ), - by = c(crosswalk_id, "cs_id") - ) %>% - dplyr::select( - dplyr::any_of(crosswalk_id), - cs_source, cs_id, cs_measure, cs_lengthm, - # sinuosity, - is_extended, - left_bank_count, right_bank_count, channel_count, bottom_count, - bottom, left_bank, right_bank, valid_banks, has_relief, - geometry - ) %>% - dplyr::mutate( - is_extended = ifelse(is.na(is_extended), FALSE, is_extended) - ) + # create directories + create_if_not_exists(domain_dir) + create_if_not_exists(flowlines_dir) + create_if_not_exists(domain_subset_dir) + create_if_not_exists(dem_dir) + create_if_not_exists(transects_dir) + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cs_pts_dir) + create_if_not_exists(vpu_subsets_dir) + + if(with_output) { + create_if_not_exists(output_dir) + } - return(out_transect_lines) } +# get path strings for a domain dir (based of a base dir and domain dirname) +# NOTE: this does NOT guarentee that these folders exist, +# NOTE: it just gets the paths if they were created by create_new_domain_dirs() +get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + output_dir <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA) + + + # named list of file paths + return( + list( + base_dir = base_dir, + domain_dir = domain_dir, + flowlines_dir = flowlines_dir, + domain_subset_dir = domain_subset_dir, + dem_dir = dem_dir, + transects_dir = transects_dir, + cross_sections_dir = cross_sections_dir, + cs_pts_dir = cs_pts_dir, + vpu_subsets_dir = vpu_subsets_dir, + output_dir = output_dir + ) + ) + +} + +download_3dep_vrt <- function(base_dir) { + + ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/ + ### paste w/ `datapasta::vector_paste_vertical()` + ### Some manual cleaning of header and footer mess... + ### Reason? Un-scrapable page, and no index.gpkg... + + t <- c( + "0 n06e162/", + "0 n06e163/", + "0 n07e134/", + "0 n07e151/", + "0 n07e152/", + "0 n07e158/", + "0 n08e134/", + "0 n08e151/", + "0 n08e152/", + "0 n08e158/", + "0 n09e134/", + "0 n10e138/", + "0 n14e144/", + "0 n15e145/", + "0 n16e145/", + "0 n18w065/", + "0 n18w066/", + "0 n18w067/", + "0 n18w068/", + "0 n19w065/", + "0 n19w066/", + "0 n19w067/", + "0 n19w068/", + "0 n19w156/", + "0 n20w155/", + "0 n20w156/", + "0 n20w157/", + "0 n21w156/", + "0 n21w157/", + "0 n21w158/", + "0 n22w157/", + "0 n22w158/", + "0 n22w159/", + "0 n22w160/", + "0 n22w161/", + "0 n23w160/", + "0 n23w161/", + "0 n25w081/", + "0 n25w082/", + "0 n25w083/", + "0 n26w081/", + "0 n26w082/", + "0 n26w098/", + "0 n26w099/", + "0 n27w081/", + "0 n27w082/", + "0 n27w083/", + "0 n27w098/", + "0 n27w099/", + "0 n27w100/", + "0 n28w081/", + "0 n28w082/", + "0 n28w083/", + "0 n28w097/", + "0 n28w098/", + "0 n28w099/", + "0 n28w100/", + "0 n28w101/", + "0 n29w081/", + "0 n29w082/", + "0 n29w083/", + "0 n29w090/", + "0 n29w096/", + "0 n29w097/", + "0 n29w098/", + "0 n29w099/", + "0 n29w100/", + "0 n29w101/", + "0 n29w104/", + "0 n30w081/", + "0 n30w082/", + "0 n30w083/", + "0 n30w084/", + "0 n30w085/", + "0 n30w086/", + "0 n30w089/", + "0 n30w090/", + "0 n30w091/", + "0 n30w092/", + "0 n30w093/", + "0 n30w094/", + "0 n30w095/", + "0 n30w096/", + "0 n30w097/", + "0 n30w098/", + "0 n30w099/", + "0 n30w100/", + "0 n30w101/", + "0 n30w102/", + "0 n30w103/", + "0 n30w104/", + "0 n30w105/", + "0 n31w082/", + "0 n31w083/", + "0 n31w084/", + "0 n31w085/", + "0 n31w086/", + "0 n31w087/", + "0 n31w088/", + "0 n31w089/", + "0 n31w090/", + "0 n31w091/", + "0 n31w092/", + "0 n31w093/", + "0 n31w094/", + "0 n31w095/", + "0 n31w096/", + "0 n31w097/", + "0 n31w098/", + "0 n31w099/", + "0 n31w100/", + "0 n31w101/", + "0 n31w102/", + "0 n31w103/", + "0 n31w104/", + "0 n31w105/", + "0 n31w106/", + "0 n31w107/", + "0 n32w081/", + "0 n32w082/", + "0 n32w083/", + "0 n32w084/", + "0 n32w085/", + "0 n32w086/", + "0 n32w087/", + "0 n32w088/", + "0 n32w089/", + "0 n32w090/", + "0 n32w091/", + "0 n32w092/", + "0 n32w093/", + "0 n32w094/", + "0 n32w095/", + "0 n32w096/", + "0 n32w097/", + "0 n32w098/", + "0 n32w099/", + "0 n32w100/", + "0 n32w101/", + "0 n32w102/", + "0 n32w103/", + "0 n32w104/", + "0 n32w105/", + "0 n32w106/", + "0 n32w107/", + "0 n32w108/", + "0 n32w109/", + "0 n32w110/", + "0 n32w111/", + "0 n32w112/", + "0 n32w113/", + "0 n32w114/", + "0 n33w080/", + "0 n33w081/", + "0 n33w082/", + "0 n33w083/", + "0 n33w084/", + "0 n33w085/", + "0 n33w086/", + "0 n33w087/", + "0 n33w088/", + "0 n33w089/", + "0 n33w090/", + "0 n33w091/", + "0 n33w092/", + "0 n33w093/", + "0 n33w094/", + "0 n33w095/", + "0 n33w096/", + "0 n33w097/", + "0 n33w098/", + "0 n33w099/", + "0 n33w100/", + "0 n33w101/", + "0 n33w102/", + "0 n33w103/", + "0 n33w104/", + "0 n33w105/", + "0 n33w106/", + "0 n33w107/", + "0 n33w108/", + "0 n33w109/", + "0 n33w110/", + "0 n33w111/", + "0 n33w112/", + "0 n33w113/", + "0 n33w114/", + "0 n33w115/", + "0 n33w116/", + "0 n33w117/", + "0 n33w118/", + "0 n33w119/", + "0 n34w078/", + "0 n34w079/", + "0 n34w080/", + "0 n34w081/", + "0 n34w082/", + "0 n34w083/", + "0 n34w084/", + "0 n34w085/", + "0 n34w086/", + "0 n34w087/", + "0 n34w088/", + "0 n34w089/", + "0 n34w090/", + "0 n34w091/", + "0 n34w092/", + "0 n34w093/", + "0 n34w094/", + "0 n34w095/", + "0 n34w096/", + "0 n34w097/", + "0 n34w098/", + "0 n34w099/", + "0 n34w100/", + "0 n34w101/", + "0 n34w102/", + "0 n34w103/", + "0 n34w104/", + "0 n34w105/", + "0 n34w106/", + "0 n34w107/", + "0 n34w108/", + "0 n34w109/", + "0 n34w110/", + "0 n34w111/", + "0 n34w112/", + "0 n34w113/", + "0 n34w114/", + "0 n34w115/", + "0 n34w116/", + "0 n34w117/", + "0 n34w118/", + "0 n34w119/", + "0 n34w120/", + "0 n34w121/", + "0 n35w076/", + "0 n35w077/", + "0 n35w078/", + "0 n35w079/", + "0 n35w080/", + "0 n35w081/", + "0 n35w082/", + "0 n35w083/", + "0 n35w084/", + "0 n35w085/", + "0 n35w086/", + "0 n35w087/", + "0 n35w088/", + "0 n35w089/", + "0 n35w090/", + "0 n35w091/", + "0 n35w092/", + "0 n35w093/", + "0 n35w094/", + "0 n35w095/", + "0 n35w096/", + "0 n35w097/", + "0 n35w098/", + "0 n35w099/", + "0 n35w100/", + "0 n35w101/", + "0 n35w102/", + "0 n35w103/", + "0 n35w104/", + "0 n35w105/", + "0 n35w106/", + "0 n35w107/", + "0 n35w108/", + "0 n35w109/", + "0 n35w110/", + "0 n35w111/", + "0 n35w112/", + "0 n35w113/", + "0 n35w114/", + "0 n35w115/", + "0 n35w116/", + "0 n35w117/", + "0 n35w118/", + "0 n35w119/", + "0 n35w120/", + "0 n35w121/", + "0 n36w076/", + "0 n36w077/", + "0 n36w078/", + "0 n36w079/", + "0 n36w080/", + "0 n36w081/", + "0 n36w082/", + "0 n36w083/", + "0 n36w084/", + "0 n36w085/", + "0 n36w086/", + "0 n36w087/", + "0 n36w088/", + "0 n36w089/", + "0 n36w090/", + "0 n36w091/", + "0 n36w092/", + "0 n36w093/", + "0 n36w094/", + "0 n36w095/", + "0 n36w096/", + "0 n36w097/", + "0 n36w098/", + "0 n36w099/", + "0 n36w100/", + "0 n36w101/", + "0 n36w102/", + "0 n36w103/", + "0 n36w104/", + "0 n36w105/", + "0 n36w106/", + "0 n36w107/", + "0 n36w108/", + "0 n36w109/", + "0 n36w110/", + "0 n36w111/", + "0 n36w112/", + "0 n36w113/", + "0 n36w114/", + "0 n36w115/", + "0 n36w116/", + "0 n36w117/", + "0 n36w118/", + "0 n36w119/", + "0 n36w120/", + "0 n36w121/", + "0 n36w122/", + "0 n37w076/", + "0 n37w077/", + "0 n37w078/", + "0 n37w079/", + "0 n37w080/", + "0 n37w081/", + "0 n37w082/", + "0 n37w083/", + "0 n37w084/", + "0 n37w085/", + "0 n37w086/", + "0 n37w087/", + "0 n37w088/", + "0 n37w089/", + "0 n37w090/", + "0 n37w091/", + "0 n37w092/", + "0 n37w093/", + "0 n37w094/", + "0 n37w095/", + "0 n37w096/", + "0 n37w097/", + "0 n37w098/", + "0 n37w099/", + "0 n37w100/", + "0 n37w101/", + "0 n37w102/", + "0 n37w103/", + "0 n37w104/", + "0 n37w105/", + "0 n37w106/", + "0 n37w107/", + "0 n37w108/", + "0 n37w109/", + "0 n37w110/", + "0 n37w111/", + "0 n37w112/", + "0 n37w113/", + "0 n37w114/", + "0 n37w115/", + "0 n37w116/", + "0 n37w117/", + "0 n37w118/", + "0 n37w119/", + "0 n37w120/", + "0 n37w121/", + "0 n37w122/", + "0 n37w123/", + "0 n38w076/", + "0 n38w077/", + "0 n38w078/", + "0 n38w079/", + "0 n38w080/", + "0 n38w081/", + "0 n38w082/", + "0 n38w083/", + "0 n38w084/", + "0 n38w085/", + "0 n38w086/", + "0 n38w087/", + "0 n38w088/", + "0 n38w089/", + "0 n38w090/", + "0 n38w091/", + "0 n38w092/", + "0 n38w093/", + "0 n38w094/", + "0 n38w095/", + "0 n38w096/", + "0 n38w097/", + "0 n38w098/", + "0 n38w099/", + "0 n38w100/", + "0 n38w101/", + "0 n38w102/", + "0 n38w103/", + "0 n38w104/", + "0 n38w105/", + "0 n38w106/", + "0 n38w107/", + "0 n38w108/", + "0 n38w109/", + "0 n38w110/", + "0 n38w111/", + "0 n38w112/", + "0 n38w113/", + "0 n38w114/", + "0 n38w115/", + "0 n38w116/", + "0 n38w117/", + "0 n38w118/", + "0 n38w119/", + "0 n38w120/", + "0 n38w121/", + "0 n38w122/", + "0 n38w123/", + "0 n38w124/", + "0 n39w075/", + "0 n39w076/", + "0 n39w077/", + "0 n39w078/", + "0 n39w079/", + "0 n39w080/", + "0 n39w081/", + "0 n39w082/", + "0 n39w083/", + "0 n39w084/", + "0 n39w085/", + "0 n39w086/", + "0 n39w087/", + "0 n39w088/", + "0 n39w089/", + "0 n39w090/", + "0 n39w091/", + "0 n39w092/", + "0 n39w093/", + "0 n39w094/", + "0 n39w095/", + "0 n39w096/", + "0 n39w097/", + "0 n39w098/", + "0 n39w099/", + "0 n39w100/", + "0 n39w101/", + "0 n39w102/", + "0 n39w103/", + "0 n39w104/", + "0 n39w105/", + "0 n39w106/", + "0 n39w107/", + "0 n39w108/", + "0 n39w109/", + "0 n39w110/", + "0 n39w111/", + "0 n39w112/", + "0 n39w113/", + "0 n39w114/", + "0 n39w115/", + "0 n39w116/", + "0 n39w117/", + "0 n39w118/", + "0 n39w119/", + "0 n39w120/", + "0 n39w121/", + "0 n39w122/", + "0 n39w123/", + "0 n39w124/", + "0 n40w075/", + "0 n40w076/", + "0 n40w077/", + "0 n40w078/", + "0 n40w079/", + "0 n40w080/", + "0 n40w081/", + "0 n40w082/", + "0 n40w083/", + "0 n40w084/", + "0 n40w085/", + "0 n40w086/", + "0 n40w087/", + "0 n40w088/", + "0 n40w089/", + "0 n40w090/", + "0 n40w091/", + "0 n40w092/", + "0 n40w093/", + "0 n40w094/", + "0 n40w095/", + "0 n40w096/", + "0 n40w097/", + "0 n40w098/", + "0 n40w099/", + "0 n40w100/", + "0 n40w101/", + "0 n40w102/", + "0 n40w103/", + "0 n40w104/", + "0 n40w105/", + "0 n40w106/", + "0 n40w107/", + "0 n40w108/", + "0 n40w109/", + "0 n40w110/", + "0 n40w111/", + "0 n40w112/", + "0 n40w113/", + "0 n40w114/", + "0 n40w115/", + "0 n40w116/", + "0 n40w117/", + "0 n40w118/", + "0 n40w119/", + "0 n40w120/", + "0 n40w121/", + "0 n40w122/", + "0 n40w123/", + "0 n40w124/", + "0 n40w125/", + "0 n41w073/", + "0 n41w074/", + "0 n41w075/", + "0 n41w076/", + "0 n41w077/", + "0 n41w078/", + "0 n41w079/", + "0 n41w080/", + "0 n41w081/", + "0 n41w082/", + "0 n41w083/", + "0 n41w084/", + "0 n41w085/", + "0 n41w086/", + "0 n41w087/", + "0 n41w088/", + "0 n41w089/", + "0 n41w090/", + "0 n41w091/", + "0 n41w092/", + "0 n41w093/", + "0 n41w094/", + "0 n41w095/", + "0 n41w096/", + "0 n41w097/", + "0 n41w098/", + "0 n41w099/", + "0 n41w100/", + "0 n41w101/", + "0 n41w102/", + "0 n41w103/", + "0 n41w104/", + "0 n41w105/", + "0 n41w106/", + "0 n41w107/", + "0 n41w108/", + "0 n41w109/", + "0 n41w110/", + "0 n41w111/", + "0 n41w112/", + "0 n41w113/", + "0 n41w114/", + "0 n41w115/", + "0 n41w116/", + "0 n41w117/", + "0 n41w118/", + "0 n41w119/", + "0 n41w120/", + "0 n41w121/", + "0 n41w122/", + "0 n41w123/", + "0 n41w124/", + "0 n41w125/", + "0 n42w070/", + "0 n42w071/", + "0 n42w072/", + "0 n42w073/", + "0 n42w074/", + "0 n42w075/", + "0 n42w076/", + "0 n42w077/", + "0 n42w078/", + "0 n42w079/", + "0 n42w080/", + "0 n42w081/", + "0 n42w082/", + "0 n42w083/", + "0 n42w084/", + "0 n42w085/", + "0 n42w086/", + "0 n42w087/", + "0 n42w088/", + "0 n42w089/", + "0 n42w090/", + "0 n42w091/", + "0 n42w092/", + "0 n42w093/", + "0 n42w094/", + "0 n42w095/", + "0 n42w096/", + "0 n42w097/", + "0 n42w098/", + "0 n42w099/", + "0 n42w100/", + "0 n42w101/", + "0 n42w102/", + "0 n42w103/", + "0 n42w104/", + "0 n42w105/", + "0 n42w106/", + "0 n42w107/", + "0 n42w108/", + "0 n42w109/", + "0 n42w110/", + "0 n42w111/", + "0 n42w112/", + "0 n42w113/", + "0 n42w114/", + "0 n42w115/", + "0 n42w116/", + "0 n42w117/", + "0 n42w118/", + "0 n42w119/", + "0 n42w120/", + "0 n42w121/", + "0 n42w122/", + "0 n42w123/", + "0 n42w124/", + "0 n42w125/", + "0 n43w071/", + "0 n43w072/", + "0 n43w073/", + "0 n43w074/", + "0 n43w075/", + "0 n43w076/", + "0 n43w077/", + "0 n43w078/", + "0 n43w079/", + "0 n43w080/", + "0 n43w081/", + "0 n43w082/", + "0 n43w083/", + "0 n43w084/", + "0 n43w085/", + "0 n43w086/", + "0 n43w087/", + "0 n43w088/", + "0 n43w089/", + "0 n43w090/", + "0 n43w091/", + "0 n43w092/", + "0 n43w093/", + "0 n43w094/", + "0 n43w095/", + "0 n43w096/", + "0 n43w097/", + "0 n43w098/", + "0 n43w099/", + "0 n43w100/", + "0 n43w101/", + "0 n43w102/", + "0 n43w103/", + "0 n43w104/", + "0 n43w105/", + "0 n43w106/", + "0 n43w107/", + "0 n43w108/", + "0 n43w109/", + "0 n43w110/", + "0 n43w111/", + "0 n43w112/", + "0 n43w113/", + "0 n43w114/", + "0 n43w115/", + "0 n43w116/", + "0 n43w117/", + "0 n43w118/", + "0 n43w119/", + "0 n43w120/", + "0 n43w121/", + "0 n43w122/", + "0 n43w123/", + "0 n43w124/", + "0 n43w125/", + "0 n44w069/", + "0 n44w070/", + "0 n44w071/", + "0 n44w072/", + "0 n44w073/", + "0 n44w074/", + "0 n44w075/", + "0 n44w076/", + "0 n44w077/", + "0 n44w078/", + "0 n44w079/", + "0 n44w080/", + "0 n44w081/", + "0 n44w083/", + "0 n44w084/", + "0 n44w085/", + "0 n44w086/", + "0 n44w087/", + "0 n44w088/", + "0 n44w089/", + "0 n44w090/", + "0 n44w091/", + "0 n44w092/", + "0 n44w093/", + "0 n44w094/", + "0 n44w095/", + "0 n44w096/", + "0 n44w097/", + "0 n44w098/", + "0 n44w099/", + "0 n44w100/", + "0 n44w101/", + "0 n44w102/", + "0 n44w103/", + "0 n44w104/", + "0 n44w105/", + "0 n44w106/", + "0 n44w107/", + "0 n44w108/", + "0 n44w109/", + "0 n44w110/", + "0 n44w111/", + "0 n44w112/", + "0 n44w113/", + "0 n44w114/", + "0 n44w115/", + "0 n44w116/", + "0 n44w117/", + "0 n44w118/", + "0 n44w119/", + "0 n44w120/", + "0 n44w121/", + "0 n44w122/", + "0 n44w123/", + "0 n44w124/", + "0 n44w125/", + "0 n45w067/", + "0 n45w068/", + "0 n45w069/", + "0 n45w070/", + "0 n45w071/", + "0 n45w072/", + "0 n45w073/", + "0 n45w074/", + "0 n45w075/", + "0 n45w076/", + "0 n45w077/", + "0 n45w083/", + "0 n45w084/", + "0 n45w085/", + "0 n45w086/", + "0 n45w087/", + "0 n45w088/", + "0 n45w089/", + "0 n45w090/", + "0 n45w091/", + "0 n45w092/", + "0 n45w093/", + "0 n45w094/", + "0 n45w095/", + "0 n45w096/", + "0 n45w097/", + "0 n45w098/", + "0 n45w099/", + "0 n45w100/", + "0 n45w101/", + "0 n45w102/", + "0 n45w103/", + "0 n45w104/", + "0 n45w105/", + "0 n45w106/", + "0 n45w107/", + "0 n45w108/", + "0 n45w109/", + "0 n45w110/", + "0 n45w111/", + "0 n45w112/", + "0 n45w113/", + "0 n45w114/", + "0 n45w115/", + "0 n45w116/", + "0 n45w117/", + "0 n45w118/", + "0 n45w119/", + "0 n45w120/", + "0 n45w121/", + "0 n45w122/", + "0 n45w123/", + "0 n45w124/", + "0 n45w125/", + "0 n46w068/", + "0 n46w069/", + "0 n46w070/", + "0 n46w071/", + "0 n46w072/", + "0 n46w073/", + "0 n46w074/", + "0 n46w075/", + "0 n46w084/", + "0 n46w085/", + "0 n46w086/", + "0 n46w087/", + "0 n46w088/", + "0 n46w089/", + "0 n46w090/", + "0 n46w091/", + "0 n46w092/", + "0 n46w093/", + "0 n46w094/", + "0 n46w095/", + "0 n46w096/", + "0 n46w097/", + "0 n46w098/", + "0 n46w099/", + "0 n46w100/", + "0 n46w101/", + "0 n46w102/", + "0 n46w103/", + "0 n46w104/", + "0 n46w105/", + "0 n46w106/", + "0 n46w107/", + "0 n46w108/", + "0 n46w109/", + "0 n46w110/", + "0 n46w111/", + "0 n46w112/", + "0 n46w113/", + "0 n46w114/", + "0 n46w115/", + "0 n46w116/", + "0 n46w117/", + "0 n46w118/", + "0 n46w119/", + "0 n46w120/", + "0 n46w121/", + "0 n46w122/", + "0 n46w123/", + "0 n46w124/", + "0 n46w125/", + "0 n47w068/", + "0 n47w069/", + "0 n47w070/", + "0 n47w071/", + "0 n47w084/", + "0 n47w085/", + "0 n47w086/", + "0 n47w087/", + "0 n47w088/", + "0 n47w089/", + "0 n47w090/", + "0 n47w091/", + "0 n47w092/", + "0 n47w093/", + "0 n47w094/", + "0 n47w095/", + "0 n47w096/", + "0 n47w097/", + "0 n47w098/", + "0 n47w099/", + "0 n47w100/", + "0 n47w101/", + "0 n47w102/", + "0 n47w103/", + "0 n47w104/", + "0 n47w105/", + "0 n47w106/", + "0 n47w107/", + "0 n47w108/", + "0 n47w109/", + "0 n47w110/", + "0 n47w111/", + "0 n47w112/", + "0 n47w113/", + "0 n47w114/", + "0 n47w115/", + "0 n47w116/", + "0 n47w117/", + "0 n47w118/", + "0 n47w119/", + "0 n47w120/", + "0 n47w121/", + "0 n47w122/", + "0 n47w123/", + "0 n47w124/", + "0 n47w125/", + "0 n48w068/", + "0 n48w069/", + "0 n48w070/", + "0 n48w087/", + "0 n48w088/", + "0 n48w089/", + "0 n48w090/", + "0 n48w091/", + "0 n48w092/", + "0 n48w093/", + "0 n48w094/", + "0 n48w095/", + "0 n48w096/", + "0 n48w097/", + "0 n48w098/", + "0 n48w099/", + "0 n48w100/", + "0 n48w101/", + "0 n48w102/", + "0 n48w103/", + "0 n48w104/", + "0 n48w105/", + "0 n48w106/", + "0 n48w107/", + "0 n48w108/", + "0 n48w109/", + "0 n48w110/", + "0 n48w111/", + "0 n48w112/", + "0 n48w113/", + "0 n48w114/", + "0 n48w115/", + "0 n48w116/", + "0 n48w117/", + "0 n48w118/", + "0 n48w119/", + "0 n48w120/", + "0 n48w121/", + "0 n48w122/", + "0 n48w123/", + "0 n48w124/", + "0 n48w125/", + "0 n49w089/", + "0 n49w090/", + "0 n49w091/", + "0 n49w092/", + "0 n49w093/", + "0 n49w094/", + "0 n49w095/", + "0 n49w096/", + "0 n49w097/", + "0 n49w098/", + "0 n49w099/", + "0 n49w100/", + "0 n49w101/", + "0 n49w102/", + "0 n49w103/", + "0 n49w104/", + "0 n49w105/", + "0 n49w106/", + "0 n49w107/", + "0 n49w108/", + "0 n49w109/", + "0 n49w110/", + "0 n49w111/", + "0 n49w112/", + "0 n49w113/", + "0 n49w114/", + "0 n49w115/", + "0 n49w116/", + "0 n49w117/", + "0 n49w118/", + "0 n49w119/", + "0 n49w120/", + "0 n49w121/", + "0 n49w122/", + "0 n49w123/", + "0 n49w124/", + "0 n49w125/", + "0 n50w095/", + "0 n50w096/", + "0 n50w097/", + "0 n50w098/", + "0 n50w099/", + "0 n50w100/", + "0 n50w101/", + "0 n50w107/", + "0 n50w108/", + "0 n50w122/", + "0 n50w123/", + "0 n50w124/", + "0 n52e177/", + "0 n52e178/", + "0 n52e179/", + "0 n52w174/", + "0 n52w176/", + "0 n52w177/", + "0 n52w178/", + "0 n52w179/", + "0 n52w180/", + "0 n53e172/", + "0 n53e173/", + "0 n53e174/", + "0 n53e175/", + "0 n53e177/", + "0 n53e178/", + "0 n53e179/", + "0 n53w169/", + "0 n53w170/", + "0 n53w171/", + "0 n53w172/", + "0 n53w173/", + "0 n53w174/", + "0 n53w175/", + "0 n53w176/", + "0 n53w177/", + "0 n54e172/", + "0 n54e173/", + "0 n54w167/", + "0 n54w168/", + "0 n54w169/", + "0 n54w170/", + "0 n55w131/", + "0 n55w132/", + "0 n55w133/", + "0 n55w134/", + "0 n55w160/", + "0 n55w161/", + "0 n55w162/", + "0 n55w163/", + "0 n55w164/", + "0 n55w165/", + "0 n55w166/", + "0 n55w167/", + "0 n56w130/", + "0 n56w131/", + "0 n56w132/", + "0 n56w133/", + "0 n56w134/", + "0 n56w135/", + "0 n56w156/", + "0 n56w157/", + "0 n56w159/", + "0 n56w160/", + "0 n56w161/", + "0 n56w162/", + "0 n56w163/", + "0 n56w164/", + "0 n57w131/", + "0 n57w132/", + "0 n57w133/", + "0 n57w134/", + "0 n57w135/", + "0 n57w136/", + "0 n57w153/", + "0 n57w154/", + "0 n57w155/", + "0 n57w156/", + "0 n57w157/", + "0 n57w158/", + "0 n57w159/", + "0 n57w160/", + "0 n57w161/", + "0 n57w162/", + "0 n57w170/", + "0 n57w171/", + "0 n58w133/", + "0 n58w134/", + "0 n58w135/", + "0 n58w136/", + "0 n58w137/", + "0 n58w153/", + "0 n58w154/", + "0 n58w155/", + "0 n58w156/", + "0 n58w157/", + "0 n58w158/", + "0 n58w159/", + "0 n58w170/", + "0 n58w171/", + "0 n59w134/", + "0 n59w135/", + "0 n59w136/", + "0 n59w137/", + "0 n59w138/", + "0 n59w139/", + "0 n59w152/", + "0 n59w153/", + "0 n59w154/", + "0 n59w155/", + "0 n59w156/", + "0 n59w157/", + "0 n59w158/", + "0 n59w159/", + "0 n59w160/", + "0 n59w161/", + "0 n59w162/", + "0 n59w163/", + "0 n60w135/", + "0 n60w136/", + "0 n60w137/", + "0 n60w138/", + "0 n60w139/", + "0 n60w140/", + "0 n60w141/", + "0 n60w142/", + "0 n60w143/", + "0 n60w144/", + "0 n60w145/", + "0 n60w146/", + "0 n60w147/", + "0 n60w148/", + "0 n60w149/", + "0 n60w150/", + "0 n60w151/", + "0 n60w152/", + "0 n60w153/", + "0 n60w154/", + "0 n60w155/", + "0 n60w156/", + "0 n60w157/", + "0 n60w158/", + "0 n60w159/", + "0 n60w160/", + "0 n60w161/", + "0 n60w162/", + "0 n60w163/", + "0 n60w164/", + "0 n60w165/", + "0 n60w166/", + "0 n60w167/", + "0 n60w168/", + "0 n61w140/", + "0 n61w141/", + "0 n61w142/", + "0 n61w143/", + "0 n61w144/", + "0 n61w145/", + "0 n61w146/", + "0 n61w147/", + "0 n61w148/", + "0 n61w149/", + "0 n61w150/", + "0 n61w151/", + "0 n61w152/", + "0 n61w153/", + "0 n61w154/", + "0 n61w155/", + "0 n61w156/", + "0 n61w157/", + "0 n61w158/", + "0 n61w159/", + "0 n61w160/", + "0 n61w161/", + "0 n61w162/", + "0 n61w163/", + "0 n61w164/", + "0 n61w165/", + "0 n61w166/", + "0 n61w167/", + "0 n61w168/", + "0 n61w173/", + "0 n61w174/", + "0 n62w142/", + "0 n62w143/", + "0 n62w144/", + "0 n62w145/", + "0 n62w146/", + "0 n62w147/", + "0 n62w148/", + "0 n62w149/", + "0 n62w150/", + "0 n62w151/", + "0 n62w152/", + "0 n62w153/", + "0 n62w154/", + "0 n62w155/", + "0 n62w156/", + "0 n62w157/", + "0 n62w158/", + "0 n62w159/", + "0 n62w160/", + "0 n62w161/", + "0 n62w162/", + "0 n62w163/", + "0 n62w164/", + "0 n62w165/", + "0 n62w166/", + "0 n62w167/", + "0 n63w142/", + "0 n63w143/", + "0 n63w144/", + "0 n63w145/", + "0 n63w146/", + "0 n63w147/", + "0 n63w148/", + "0 n63w149/", + "0 n63w150/", + "0 n63w151/", + "0 n63w152/", + "0 n63w153/", + "0 n63w154/", + "0 n63w155/", + "0 n63w156/", + "0 n63w157/", + "0 n63w158/", + "0 n63w159/", + "0 n63w160/", + "0 n63w161/", + "0 n63w162/", + "0 n63w163/", + "0 n63w164/", + "0 n63w165/", + "0 n63w166/", + "0 n63w167/", + "0 n63w170/", + "0 n64w141/", + "0 n64w142/", + "0 n64w143/", + "0 n64w144/", + "0 n64w145/", + "0 n64w146/", + "0 n64w147/", + "0 n64w148/", + "0 n64w149/", + "0 n64w150/", + "0 n64w151/", + "0 n64w152/", + "0 n64w153/", + "0 n64w154/", + "0 n64w155/", + "0 n64w156/", + "0 n64w157/", + "0 n64w158/", + "0 n64w159/", + "0 n64w160/", + "0 n64w161/", + "0 n64w162/", + "0 n64w163/", + "0 n64w164/", + "0 n64w165/", + "0 n64w169/", + "0 n64w170/", + "0 n64w171/", + "0 n64w172/", + "0 n65w141/", + "0 n65w142/", + "0 n65w143/", + "0 n65w144/", + "0 n65w145/", + "0 n65w146/", + "0 n65w147/", + "0 n65w148/", + "0 n65w149/", + "0 n65w150/", + "0 n65w151/", + "0 n65w152/", + "0 n65w153/", + "0 n65w154/", + "0 n65w155/", + "0 n65w156/", + "0 n65w157/", + "0 n65w158/", + "0 n65w159/", + "0 n65w160/", + "0 n65w161/", + "0 n65w162/", + "0 n65w163/", + "0 n65w164/", + "0 n65w165/", + "0 n65w166/", + "0 n65w167/", + "0 n66w141/", + "0 n66w142/", + "0 n66w143/", + "0 n66w144/", + "0 n66w145/", + "0 n66w146/", + "0 n66w147/", + "0 n66w148/", + "0 n66w149/", + "0 n66w150/", + "0 n66w151/", + "0 n66w152/", + "0 n66w153/", + "0 n66w154/", + "0 n66w155/", + "0 n66w156/", + "0 n66w157/", + "0 n66w158/", + "0 n66w159/", + "0 n66w160/", + "0 n66w161/", + "0 n66w162/", + "0 n66w163/", + "0 n66w164/", + "0 n66w165/", + "0 n66w166/", + "0 n66w167/", + "0 n66w168/", + "0 n66w169/", + "0 n67w141/", + "0 n67w142/", + "0 n67w143/", + "0 n67w144/", + "0 n67w145/", + "0 n67w146/", + "0 n67w147/", + "0 n67w148/", + "0 n67w149/", + "0 n67w150/", + "0 n67w151/", + "0 n67w152/", + "0 n67w153/", + "0 n67w154/", + "0 n67w155/", + "0 n67w156/", + "0 n67w157/", + "0 n67w158/", + "0 n67w159/", + "0 n67w160/", + "0 n67w161/", + "0 n67w162/", + "0 n67w163/", + "0 n67w164/", + "0 n67w165/", + "0 n67w166/", + "0 n67w167/", + "0 n67w168/", + "0 n68w141/", + "0 n68w142/", + "0 n68w143/", + "0 n68w144/", + "0 n68w145/", + "0 n68w146/", + "0 n68w147/", + "0 n68w148/", + "0 n68w149/", + "0 n68w150/", + "0 n68w151/", + "0 n68w152/", + "0 n68w153/", + "0 n68w154/", + "0 n68w155/", + "0 n68w156/", + "0 n68w157/", + "0 n68w158/", + "0 n68w159/", + "0 n68w160/", + "0 n68w161/", + "0 n68w162/", + "0 n68w163/", + "0 n68w164/", + "0 n68w165/", + "0 n68w166/", + "0 n69w141/", + "0 n69w142/", + "0 n69w143/", + "0 n69w144/", + "0 n69w145/", + "0 n69w146/", + "0 n69w147/", + "0 n69w148/", + "0 n69w149/", + "0 n69w150/", + "0 n69w151/", + "0 n69w152/", + "0 n69w153/", + "0 n69w154/", + "0 n69w155/", + "0 n69w156/", + "0 n69w157/", + "0 n69w158/", + "0 n69w159/", + "0 n69w160/", + "0 n69w161/", + "0 n69w162/", + "0 n69w163/", + "0 n69w164/", + "0 n69w165/", + "0 n69w166/", + "0 n69w167/", + "0 n70w141/", + "0 n70w142/", + "0 n70w143/", + "0 n70w144/", + "0 n70w145/", + "0 n70w146/", + "0 n70w147/", + "0 n70w148/", + "0 n70w149/", + "0 n70w150/", + "0 n70w151/", + "0 n70w152/", + "0 n70w153/", + "0 n70w154/", + "0 n70w155/", + "0 n70w156/", + "0 n70w157/", + "0 n70w158/", + "0 n70w159/", + "0 n70w160/", + "0 n70w161/", + "0 n70w162/", + "0 n70w163/", + "0 n70w164/", + "0 n70w165/", + "0 n71w143/", + "0 n71w144/", + "0 n71w145/", + "0 n71w146/", + "0 n71w147/", + "0 n71w148/", + "0 n71w149/", + "0 n71w150/", + "0 n71w151/", + "0 n71w152/", + "0 n71w153/", + "0 n71w154/", + "0 n71w155/", + "0 n71w156/", + "0 n71w157/", + "0 n71w158/", + "0 n71w159/", + "0 n71w160/", + "0 n71w161/", + "0 n71w162/", + "0 n71w163/", + "0 n71w164/", + "0 n72w155/", + "0 n72w156/", + "0 n72w157/", + "0 n72w158/", + "0 s14w170/", + "0 s14w171/" + ) + + + # sub out HTML copy pattern with vsi URL + t2 <- gsub("0 ", + "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/", + t) + + # add file paths following NED scheme + t3 <- paste0(t2, + "USGS_13_", + basename(t2), + ".tif") + + # Write table to data-raw + write.table(t3, + paste0(base_dir, "/ned_list_USGS_13.txt"), + row.names = FALSE, + col.names = FALSE, + quote = FALSE) + + +} +# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem" +# download_3dep_vrt(dem_base_dir) +# +# # Create meta data object of three NED resoruces +# ned <- data.frame(rbind( +# # c(id = "USGS_1", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1", +# # varname = "30m elevation", +# # long_name = "30m (1 arcsec) National Elevation Dataset", +# # units = "m"), +# # +# # c(id = "USGS_2", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2", +# # varname = "60m elevation", +# # long_name = "60m (2 arcsec) National Elevation Dataset Alaska", +# # units = "m"), +# +# c(id = "USGS_13", +# URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13", +# varname = "10m elevation", +# long_name = "10m (1/3th arcsec) National Elevation Dataset", +# units = "m") +# )) +# +# +# +# +# # Loop over the three resolutions +# for (i in 1:length(ned)) { +# i = 1 +# +# # Define output text file path +# txt_file <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt") +# +# # Define output VRT path +# vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt")) +# +# # If VRT does NOT exist, build VRT +# if (!file.exists(vrt_file)) { +# +# # read the corresponding index.gpkg +# files <- sf::read_sf(ned$domain_url[i]) +# DEM_UR +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# # Build full HTTPS paths to "./current/" +# files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location))) +# +# files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# # write list of files to text file +# write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE) +# +# # build VRT from text file input using GDAL system call ... +# system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file)) +# } +# +# logger::log_info("Finished ", ned$id[i], "...") +# } + +# +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(hy_id := !!sym(crosswalk_id)) +# # +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# crosswalk_id = crosswalk_id, +# pct = extension_pct, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + +# utility function for getting transects extended and +# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement +# returns the extended version of the transects +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(hy_id := !!sym(crosswalk_id)) +# +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# pct = EXTENSION_PCT, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R new file mode 100644 index 0000000..79792c2 --- /dev/null +++ b/runners/cs_runner2/base_variables.R @@ -0,0 +1,396 @@ +### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ### +# load utils +source("runners/cs_runner2/utils.R") + +# --------------------------------------------------------------------------------- +# ---- General paths and constants variables ---- +# - edit to match your local environment +# - BASE_DIR: base directory for local file storage +# - AWS_PROFILE: AWS profile to run CLI commands +# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for +# --------------------------------------------------------------------------------- +# Base directory for local file storage +BASE_DIR <- '/Volumes/T7SSD/lynker-spatial' +# BASE_DIR <- '/Users/anguswatters/Desktop/lynker-spatial' + +BASE_DIRS_LIST <- get_base_dir_paths(BASE_DIR) + +# AWS profile to run CLI commands +AWS_PROFILE <- "angus-lynker" + +# S3 prefix/folder of version run +VERSION <- "v3.0" +CONUS_VERSION <- "v2.2" + +VERSION_DIRS_LIST <- get_version_base_dir_paths(BASE_DIR, VERSION) + +# string to fill in "CS_SOURCE" column in output datasets +CS_SOURCE <- "hydrofabric3D" + +# ------------------------------------------------------------------------------------- +# ---- S3 BUCKET NAMES ---- +# ------------------------------------------------------------------------------------- + +# name of bucket with nextgen data +LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial" +LYNKER_SPATIAL_HF_S3_PREFIX <- "hydrofabric" + +# AWS S3 bucket URI +LYNKER_SPATIAL_BASE_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/") +LYNKER_SPATIAL_HF_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/") + +# name of bucket with nextgen data +LYNKER_HF_S3_BUCKET_NAME <- "lynker-hydrofabric" + +# ------------------------------------------------------------------------------------- +# ---- VPU IDs ---- +# ------------------------------------------------------------------------------------- +VPU_IDS <- c('01', '02', '03N', '03S', '03W', + '04', '05', '06', '07', '08', '09', + '10L', '10U', '11', '12', '13', '14', + '15', '16', '17', '18', '20', '21') +# VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + +# ------------------------------------------------------------------------------------- +# ---- CONUS NEXTGEN ---- +# ------------------------------------------------------------------------------------- + +CONUS_NEXTGEN_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_nextgen.gpkg") +CONUS_NEXTGEN_GPKG_PATH <- file.path(VERSION_DIRS_LIST$network_dir, "conus_nextgen.gpkg") + +# ------------------------------------------------------------------------------------- +# ---- CONUS REFEREMCE FEATURES ---- +# ------------------------------------------------------------------------------------- + +CONUS_REF_FEATURES_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_reference.gpkg") +CONUS_REF_FEATURES_GPKG_PATH <- file.path(VERSION_DIRS_LIST$ref_features_dir, "conus_reference.gpkg") + +# ------------------------------------------------------------------------------------- +# ---- CONUS ML DATA ---- +# ------------------------------------------------------------------------------------- + +CONUS_ML_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/bathymetry/ml_auxiliary_data.parquet") +CONUS_ML_PARQUET_PATH <- file.path(VERSION_DIRS_LIST$ml_dir, "ml_auxiliary_data.parquet") + +# ------------------------------------------------------------------------------------- +# ---- FEMA 100 year flood plain data ---- +# ------------------------------------------------------------------------------------- + +LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100" +LYNKER_HF_FEMA_S3_URI <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/") + +FEMA_FGB_PATH = BASE_DIRS_LIST$fema_fgb_dir +FEMA_GEOJSON_PATH = BASE_DIRS_LIST$fema_geojson_dir +FEMA_CLEAN_PATH = BASE_DIRS_LIST$fema_clean_dir +FEMA_GPKG_PATH = BASE_DIRS_LIST$fema_gpkg_dir +FEMA_BY_VPU_PATH = BASE_DIRS_LIST$fema_by_vpu_dir +FEMA_VPU_SUBFOLDERS = BASE_DIRS_LIST$fema_by_vpu_subdirs + +# ------------------------------------------------------------------------------------- +# ---- CONUS CS extension polygons ---- +# Data derived from FEMA 100 year flood plain data, puts all FEMA VPU datasets into a +# single geopackage for CONUS +# ------------------------------------------------------------------------------------- + +CS_EXTENSION_POLYGONS_DIR <- BASE_DIRS_LIST$cs_extension_polygons_dir +CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg') + +# ------------------------------------------------------------------------------------- +# ---- CONUS CS extension polygons ---- +# ------------------------------------------------------------------------------------- + +DEM_DIR <- BASE_DIRS_LIST$dem_dir +DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir +DEM_TIF_DIR <- BASE_DIRS_LIST$dem_tif_dir + +# DEM URL +DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +# ---------------------------------------------------------------------------- +# ---- Cross section point extraction constant variables ---- +# ---------------------------------------------------------------------------- + +# scale argument for cross_section_pts() function. +# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line +# Default setting is 50% of the original transect lines length (0.5) +EXTENSION_PCT <- 0.5 + +# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not +# 1% of the cross sections length is the default value we are using +# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") +PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01 + +# # ------------------------------------------------------------------------------------- +# # ---- Create local directory / path variables (FEMA data) ---- +# # ------------------------------------------------------------------------------------- + +# LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100" +# LYNKER_HF_FEMA_S3_URI <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/") + +# # location of FEMA 100 year flood plain FGB files +# FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +# LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100/" +# LYNKER_HF_FEMA_S3_URI <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX) + +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb") +# FEMA_GEOJSON_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson") +# FEMA_CLEAN_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean") +# FEMA_GPKG_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg") +# FEMA_BY_VPU_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU") + +# VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# CS_EXTENSION_POLYGONS_DIR <- paste0(BASE_DIR, "/cs-extension-polygons") +# CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg') + +# DEM_DIR <- BASE_DIRS_LIST$dem_dir +# DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir +# DEM_TIF_DIR <- BASE_DIRS_LIST$dem_tif_dir + +# # ---------------------------------------------------------------------------- +# # ---- Cross section point extraction constant variables ---- +# # ---------------------------------------------------------------------------- + +# # DEM URL +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" + +# # scale argument for cross_section_pts() function. +# # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line +# # Default setting is 50% of the original transect lines length (0.5) +# EXTENSION_PCT <- 0.5 + +# # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not +# # 1% of the cross sections length is the default value we are using +# # (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief") +# PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01 + + +# # ------------------------------------------------------------------------------------- +# # ---- S3 output directories ----- +# # - transects +# # - cross section points +# # - ML cross section points +# # ------------------------------------------------------------------------------------- +# +# # transect bucket prefix +# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/") +# +# # cross section bucket prefix +# S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/") +# +# # cross section bucket prefix +# S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/") +# +# # ------------------------------------------------------------------------------------- +# # ---- S3 nextgen data paths / directories ----- +# # ------------------------------------------------------------------------------------- +# +# # the name of the folder in the S3 bucket with the nextgen data +# S3_BUCKET_NEXTGEN_DIR <- paste0(VERSION, "/gpkg/") +# # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/" +# +# # full URI to the S3 bucket folder with the nextgen data +# S3_BUCKET_NEXTGEN_DIR_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR) +# +# # reference features S3 bucket prefix +# S3_BUCKET_REF_FEATURES_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/") +# # S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/" +# +# # ---------------------------------------------------------------------------- +# # ---- Machine learning data path variables ---- +# # ---------------------------------------------------------------------------- +# +# ML_OUTPUTS_S3_FILE <- "channel_ml_outputs.parquet" +# +# # ML_OUTPUTS_S3_DIR <- paste0(VERSION, "/3D/ml-outputs/") +# # ML_OUTPUTS_S3_DIR <- "v20.1/3D/ml-outputs/" +# +# ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE) +# # ML_OUTPUTS_S3_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE) +# +# ML_OUTPUTS_PATH <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE) +# +# # path to the remote CONUS net parquet file +# CONUS_NETWORK_FILE <- "conus_net.parquet" +# CONUS_NETWORK_URI <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE) +# +# # ---------------------------------------------------------------------------- +# +# +# # ------------------------------------------------------------------------------------- +# # ---- Local directory / path variables ---- +# # ------------------------------------------------------------------------------------- +# +# # directory to copy nextgen bucket data too +# NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR) +# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/") +# +# # # model attributes directory +# # MODEL_ATTR_DIR <- paste0(BASE_DIR, "/model_attributes/") +# +# # cross-section data model data directories +# TRANSECTS_DIR <- paste0(BASE_DIR, "/01_transects/") +# CS_PTS_DIR <- paste0(BASE_DIR, "/02_cs_pts/") +# +# # final output directory with geopackages per VPU +# CS_OUTPUT_DIR <- paste0(BASE_DIR, "/cross_sections/") +# +# # directory to copy nextgen bucket data too +# REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/") +# REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/") +# +# # make a directory for the ML outputs data +# ML_OUTPUTS_DIR <- paste0(BASE_DIR, "/ml-outputs/") +# +# DEM_DIR <- paste0(BASE_DIR, "/dem") +# DEM_VRT_DIR <- paste0(DEM_DIR, "/vrt") +# DEM_TIF_DIR <- paste0(DEM_DIR, "/tif") +# +# # ------------------------------------------------------------------------------------- +# # ---- Create local directory / path variables (FEMA data) ---- +# # ------------------------------------------------------------------------------------- +# +# # location of FEMA 100 year flood plain FGB files +# FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/" +# LYNKER_HF_FEMA_S3_PREFIX <- "FEMA100/" +# LYNKER_HF_FEMA_S3_URI <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX) +# +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- paste0(BASE_DIR, "/FEMA100") +# FEMA_GEOJSON_PATH <- paste0(BASE_DIR, "/FEMA100_geojson") +# FEMA_CLEAN_PATH <- paste0(BASE_DIR, "/FEMA100_clean") +# FEMA_GPKG_PATH <- paste0(BASE_DIR, "/FEMA100_gpkg") +# FEMA_GPKG_BB_PATH <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet +# +# FEMA_BY_VPU_PATH <- paste0(BASE_DIR, "/FEMA_BY_VPU") +# VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID +# +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) +# # FEMA_VPU_SUBFOLDERS <- paste0( +# # FEMA_BY_VPU_PATH, "/VPU_", +# # unlist( +# # lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) { +# # unlist(regmatches(vpu_file_names, gregexpr("\\d+[A-Za-z]*", vpu_file_names)))}) +# # ) +# # ) +# +# # ------------------------------------------------------------------------------------- +# # ---- OVERWRITE_FEMA_FILES constant logicals---- +# # ---- > if TRUE, processing steps will be run again +# # and overwrite existing previously processed files +# # TODO: Describe these variables +# # ------------------------------------------------------------------------------------- +# +# # Default is TRUE (i.e. a fresh processing run is done from start to finish) +# OVERWRITE_FEMA_FILES <- TRUE +# DELETE_STAGING_GPKGS <- TRUE # remove intermediary files from the main output folder +# + +# # ------------------------------------------------------------------------------------- +# # ---- (New single domain) Local directory / path variables ---- +# # ------------------------------------------------------------------------------------- +# +# # directory for new domain data +# NEW_DOMAIN_DIRNAME <- "new_domain" +# NEW_DOMAIN_DIR <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME) +# +# NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines" +# NEW_DOMAIN_FLOWLINES_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME) +# +# NEW_DOMAIN_DEM_DIRNAME <- "dem" +# NEW_DOMAIN_DEM_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME) +# +# NEW_DOMAIN_FLOWLINES_FILE <- "AllDiffusiveCombined.gpkg" +# NEW_DOMAIN_FLOWLINES_PATH <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE) +# +# # # Local DEM file +# # NEW_DOMAIN_DEM_FILE <- "hi_dem.tif" +# # NEW_DOMAIN_DEM_PATH <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE) +# +# # Remote DEM file +# NEW_DOMAIN_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# NEW_DOMAIN_TRANSECTS_DIRNAME <- "transects" +# NEW_DOMAIN_CS_PTS_DIRNAME <- "cs_pts" +# NEW_DOMAIN_CROSS_SECTIONS_DIRNAME <- "cross_sections" +# +# NEW_DOMAIN_TRANSECTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME) +# NEW_DOMAIN_CS_PTS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME) +# NEW_DOMAIN_CROSS_SECTIONS_DIR <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME) +# +# # ------------------------------------------------------------------------------------- +# # ---- (New single domain) Local directory / path variables ---- +# # ------------------------------------------------------------------------------------- +# +# # directory for new domain data +# DOMAIN_WITH_FEMA_DIRNAME <- "domain_with_fema" +# DOMAIN_WITH_FEMA_DIR <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME) +# +# DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines" +# DOMAIN_WITH_FEMA_FLOWLINES_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME) +# +# DOMAIN_WITH_FEMA_SUBSET_DIRNAME <- "domain_subset" +# DOMAIN_WITH_FEMA_SUBSET_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME) +# +# DOMAIN_WITH_FEMA_DEM_DIRNAME <- "dem" +# DOMAIN_WITH_FEMA_DEM_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME) +# +# DOMAIN_WITH_FEMA_FLOWLINES_FILE <- "ls_conus.gpkg" +# DOMAIN_WITH_FEMA_FLOWLINES_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE) +# +# # Geopackage containing area to subset flowlines to before processing +# DOMAIN_WITH_FEMA_SUBSET_FILE <- "AllDiffusiveCombined.gpkg" +# DOMAIN_WITH_FEMA_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE) +# +# # # Local DEM file +# # DOMAIN_WITH_FEMA_DEM_FILE <- "hi_dem.tif" +# # DOMAIN_WITH_FEMA_DEM_PATH <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE) +# +# # Remote DEM file +# DOMAIN_WITH_FEMA_DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME <- "transects" +# DOMAIN_WITH_FEMA_CS_PTS_DIRNAME <- "cs_pts" +# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME <- "cross_sections" +# DOMAIN_WITH_FEMA_OUTPUT_DIRNAME <- "outputs" +# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME <- "vpu-subsets" +# DOMAIN_WITH_FEMA_ML_DIRNAME <- "ml" +# # aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt +# DOMAIN_WITH_FEMA_TRANSECTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME) +# DOMAIN_WITH_FEMA_CS_PTS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME) +# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME) +# DOMAIN_WITH_FEMA_OUTPUT_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME) +# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME) +# DOMAIN_WITH_FEMA_ML_DIR <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME) +# +# ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data") +# # LYNKER_SPATIAL_BASE_S3_URI +# LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI <- "s3://lynker-hydrofabric/" +# ML_BATHYMETRY_S3_DATA_DIR <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/" +# ML_BATHYMETRY_S3_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR) +# VPU_ML_BATHYMETRY_S3_DIRS <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/") +# +# COASTAL_BATHY_DEM_S3_DIR <- "coastal_bathy/diffusive_domain/" +# COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR) +# # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) +# +# +# # # # -------------------------------------------------------------------------- +# # # # ---- Get locations of diffusive domain DEM files in S3 ---- +# # # # -------------------------------------------------------------------------- +# # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)) +# # +# # # COASTAL_BATHY_DEM_S3_URIS <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, +# # # list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE) +# # # ) +# +# + + + + + + diff --git a/runners/cs_runner2/config_env.R b/runners/cs_runner2/config_env.R new file mode 100644 index 0000000..a9b0b17 --- /dev/null +++ b/runners/cs_runner2/config_env.R @@ -0,0 +1,26 @@ +# load required packages +pacman::p_load( + archive, + hydrofabric, + hydrofabric3D, + dplyr, + sf +) + +# # install.packages("devtools") +# devtools::install_github("anguswg-ucsb/hydrofabric3D") + +# load root directory +source("runners/cs_runner2/base_variables.R") +source("runners/cs_runner2/utils.R") + +sf::sf_use_s2(FALSE) + +# create empty base directories +create_local_hydrofabric_base_dirs(base_dir = BASE_DIR) + +# create a new version directory +create_new_version_dirs(base_dir = BASE_DIR, + version = VERSION, + with_output = TRUE) + diff --git a/runners/cs_runner2/download_conus_nextgen.R b/runners/cs_runner2/download_conus_nextgen.R new file mode 100644 index 0000000..80de910 --- /dev/null +++ b/runners/cs_runner2/download_conus_nextgen.R @@ -0,0 +1,28 @@ +# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R + +# load config variables +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Download conus_nextgen.gpkg +# --------------------------------------------------------------------------- + +copy_cmd <- paste0('aws s3 cp ', CONUS_NEXTGEN_S3_URI, " ", CONUS_NEXTGEN_GPKG_PATH) +message("Copying S3 object:\n", CONUS_NEXTGEN_S3_URI) + +if (!file.exists(CONUS_NEXTGEN_GPKG_PATH)) { + tryCatch({ + system(copy_cmd) + message("Download '", basename(CONUS_NEXTGEN_GPKG_PATH), "' complete!") + message("------------------") + }, error = function(e) { + message("Error downloading conus_nextgen.gpkg") + message(e) + stop() + }) + +} else { + message("conus_nextgen.gpkg file already exists at\n > '", CONUS_NEXTGEN_GPKG_PATH, "'") +} diff --git a/runners/cs_runner2/download_conus_ref_features.R b/runners/cs_runner2/download_conus_ref_features.R new file mode 100644 index 0000000..b6f6a06 --- /dev/null +++ b/runners/cs_runner2/download_conus_ref_features.R @@ -0,0 +1,31 @@ +# Running this script goes and pulls the desired CONUS NextGen geopackage dataset from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/cs_runner/base_variables.R + +# load config variables +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Download conus_reference.gpkg +# --------------------------------------------------------------------------- + +copy_cmd <- paste0('aws s3 cp ', CONUS_REF_FEATURES_S3_URI, " ", CONUS_REF_FEATURES_GPKG_PATH) +message("Copying S3 object:\n", CONUS_REF_FEATURES_S3_URI) + +if (!file.exists(CONUS_REF_FEATURES_GPKG_PATH)) { + tryCatch({ + system(copy_cmd) + message("Download '", basename(CONUS_REF_FEATURES_GPKG_PATH), "' complete!") + message("------------------") + }, error = function(e) { + message("Error downloading conus_nextgen.gpkg") + message(e) + stop() + }) + +} else { + message("conus_nextgen.gpkg file already exists at\n > '", CONUS_REF_FEATURES_GPKG_PATH, "'") +} + + +# } \ No newline at end of file diff --git a/runners/cs_runner2/download_dem_from_vrt.R b/runners/cs_runner2/download_dem_from_vrt.R new file mode 100644 index 0000000..edd77bb --- /dev/null +++ b/runners/cs_runner2/download_dem_from_vrt.R @@ -0,0 +1,32 @@ +library(raster) +library(httr) +library(terra) +library(dplyr) + +# get the main config variables / paths +# source("runners/cs_runner/config_vars.R") +source("runners/cs_runner2/base_variables.R") +source("runners/cs_runner2/utils.R") + +base_dirs <- get_base_dir_paths(BASE_DIR) + +DEM_VRT_DIR <- base_dirs$dem_vrt_dir +DEM_TIF_DIR <- base_dirs$dem_tif_dir + +# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" + +# Parse the VRT file +vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE) + +vrt_tiles <- terra::vrt_tiles(vrt_file) + +# tile_path <- vrt_tiles[500] +tile_paths <- gsub("/vsicurl/", "", vrt_tiles) + +# TODO: use AWS S3 sync command like below +s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_TIF_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors") + +# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" + +# TODO: Old method curl request each file individually.... super slow +# error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR) \ No newline at end of file diff --git a/runners/cs_runner2/download_fema100.R b/runners/cs_runner2/download_fema100.R new file mode 100644 index 0000000..86b7e6d --- /dev/null +++ b/runners/cs_runner2/download_fema100.R @@ -0,0 +1,123 @@ +# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R + +# NOTE: The lynker-hydrofabric S3 bucket is private at the moment + +# load config variables +# source("runners/cs_runner/config_vars.R") +source("runners/cs_runner2/base_variables.R") + +# ------------------------------------------------------------------------------------- +# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ---- +# ------------------------------------------------------------------------------------- + +# if (!dir.exists(FEMA_FGB_PATH)) { +# message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'")) +# dir.create(FEMA_FGB_PATH) +# } +# +# # create geojsons directory (if not exists) +# if (!dir.exists(FEMA_GEOJSON_PATH)) { +# message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'")) +# dir.create(FEMA_GEOJSON_PATH) +# } +# +# # create directory for cleaned FEMA geometries (if not exists) +# if (!dir.exists(FEMA_CLEAN_PATH)) { +# message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'")) +# dir.create(FEMA_CLEAN_PATH) +# } +# +# # create directory for cleaned FEMA geometries as geopackages (if not exists) +# if (!dir.exists(FEMA_GPKG_PATH)) { +# message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) +# dir.create(FEMA_GPKG_PATH) +# } +# +# # create simplified geojsons directory (if not exists) +# if (!dir.exists(FEMA_SIMPLIFIED_PATH)) { +# message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'")) +# dir.create(FEMA_SIMPLIFIED_PATH) +# } +# +# # create simplified geojsons directory (if not exists) +# if (!dir.exists(FEMA_DISSOLVED_PATH)) { +# message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'")) +# dir.create(FEMA_DISSOLVED_PATH) +# } +# +# # create exploded geojsons directory (if not exists) +# if (!dir.exists(FEMA_EXPLODED_PATH)) { +# message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'")) +# dir.create(FEMA_EXPLODED_PATH) +# } +# +# # create FEMA GPKG Bounding Boxes directory (if not exists) +# if (!dir.exists(FEMA_GPKG_BB_PATH)) { +# message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'")) +# dir.create(FEMA_GPKG_BB_PATH) +# } +# +# if (!dir.exists(FEMA_FGB_BB_PATH)) { +# message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'")) +# dir.create(FEMA_FGB_BB_PATH) +# } + +# ------------------------------------------------------------------------------------- +# ---- Get list of FEMA FGB files in S3 bucket ---- +# ------------------------------------------------------------------------------------- + +# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern +fema_list_command <- paste0('#!/bin/bash + # AWS S3 Bucket and Directory information + S3_BUCKET="', LYNKER_HF_FEMA_S3_URI, '" + + # Regular expression pattern to match object keys + PATTERN=".fgb$" + + # AWS CLI command to list objects in the S3 bucket and use grep to filter them + S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN") + + echo "$S3_OBJECTS"' +) + +# ------------------------------------------------------------------------------------- +# ---- Get the S3 buckets object keys for FEMA 100 FGB files ---- +# ------------------------------------------------------------------------------------- + +# Run the script to get a list of the nextgen geopackages that matched the regular expression above +FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE) + +# create bucket object URIs +# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX, FEMA_BUCKET_KEYS) + +# ------------------------------------------------------------------------------------- +# ---- Download FEMA 100 year FGB files from S3 ---- +# ------------------------------------------------------------------------------------- + +# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet +for (key in FEMA_BUCKET_KEYS) { + local_save_path <- paste0(FEMA_FGB_PATH, "/", key) + + if (!file.exists(local_save_path)) { + copy_cmd <- paste0('aws s3 cp ', LYNKER_HF_FEMA_S3_URI, key, " ", local_save_path, " --profile ", AWS_PROFILE) + + message("S3 object:\n > '", LYNKER_HF_FEMA_S3_URI, key, "'") + message("Downloading S3 object to:\n > '", local_save_path, "'") + + system(copy_cmd) + + message(" > '", key, "' download complete!") + message("----------------------------------") + } else { + message("File already exists at:\n > '", local_save_path, "'") + } +} + + + + + + + + diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner2/partition_fema_by_vpu.R similarity index 69% rename from runners/cs_runner/partition_fema_by_vpu.R rename to runners/cs_runner2/partition_fema_by_vpu.R index 8e413eb..2292588 100644 --- a/runners/cs_runner/partition_fema_by_vpu.R +++ b/runners/cs_runner2/partition_fema_by_vpu.R @@ -12,9 +12,10 @@ # - Get FEMA bounding box geometries (maybe) # load config variables -source("runners/cs_runner/config_vars.R") -source("runners/cs_runner/config.R") -source("runners/cs_runner/utils.R") +# source("runners/cs_runner/config_vars.R") +# source("runners/cs_runner/config.R") +source("runners/cs_runner2/base_variables.R") +source("runners/cs_runner2/utils.R") library(dplyr) library(sf) @@ -38,46 +39,6 @@ OVERWRITE_FEMA_FILES <- TRUE DELETE_STAGING_GPKGS <- TRUE Sys.setenv(OGR_GEOJSON_MAX_OBJ_SIZE=0) -# ------------------------------------------------------------------------------------- -# ---- Create directories (if they do NOT exist) ---- -# ------------------------------------------------------------------------------------- - -# create directory for cleaned FEMA geometries as geopackages (if not exists) -if (!dir.exists(FEMA_GPKG_PATH)) { - message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'")) - dir.create(FEMA_GPKG_PATH) -} - -# create directory for FEMA geomteries partioned by VPU -if (!dir.exists(FEMA_BY_VPU_PATH)) { - message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'")) - dir.create(FEMA_BY_VPU_PATH) -} - -for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) { - # create directory for FEMA geomteries by VPU - # message(VPU_SUBFOLDER) - - # state_dir = paste0(VPU_SUBFOLDER, "/states/") - # merged_dir = paste0(VPU_SUBFOLDER, "/merged/") - - if (!dir.exists(VPU_SUBFOLDER)) { - message("Creating FEMA VPU subfolder...") - message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n Creating directory:\n > '", VPU_SUBFOLDER, "'")) - dir.create(VPU_SUBFOLDER) - } - # if (!dir.exists(state_dir)) { - # message("Creating FEMA VPU states subfolder...") - # message(paste0("'/", basename(state_dir), "' directory does not exist...\n Creating directory:\n > '", state_dir, "'")) - # dir.create(state_dir) - # } - # if (!dir.exists(merged_dir)) { - # message("Creating FEMA VPU merged subfolder...") - # message(paste0("'/", basename(merged_dir), "' directory does not exist...\n Creating directory:\n > '", merged_dir, "'")) - # dir.create(merged_dir) - # } -} - # ------------------------------------------------------------------------------------- # ---- Get paths to downloaded FEMA 100 FGBs ---- # ------------------------------------------------------------------------------------- @@ -86,10 +47,14 @@ FEMA_FILENAMES <- list.files(FEMA_FGB_PATH, full.names = FALSE) FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES) for (file in FEMA_FILENAMES) { + # message(file) STAGING_FILES_TO_DELETE <- c() - # Convert FGB to GeoJSON + # ------------------------------------------------------------------------------------------------------------------- + # ---- Step 1: Convert FGB to GeoJSON + # ------------------------------------------------------------------------------------------------------------------- + local_fema_path <- paste0(FEMA_FGB_PATH, "/", file) geojson_filename <- gsub(".fgb", ".geojson", file) geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename) @@ -102,6 +67,7 @@ for (file in FEMA_FILENAMES) { message(" >>> '", geojson_filename, "' already exists? ", geojson_exists) message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + # Step 1.1 Run FGDB to GeoJSON conversion ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path) if (OVERWRITE_FEMA_FILES || !geojson_exists) { @@ -111,7 +77,10 @@ for (file in FEMA_FILENAMES) { STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path) } - # Clean GeoJSON + # ------------------------------------------------------------------------------------------------------------------- + # ---- # Step 2: Clean GeoJSON + # ------------------------------------------------------------------------------------------------------------------- + message("Simplify, dissolve, explode > '", geojson_filename, "'") output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename) output_clean_geojson_path <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename) @@ -124,10 +93,9 @@ for (file in FEMA_FILENAMES) { ' -dissolve2 FLD_AR_ID \\', ' -simplify 0.1 visvalingam \\', ' -snap \\', - ' -o ', output_clean_geojson_path - ) - - + ' -o ', output_clean_geojson_path) + + # Step 2.1 Run simplify, dissolve, explode on cleaned GeoJSON if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) { message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...") system(mapshaper_command) @@ -136,9 +104,11 @@ for (file in FEMA_FILENAMES) { STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) } - # Convert cleaned GeoJSON to GeoPackage + # ------------------------------------------------------------------------------------------------------------------- + # ---- # Step 3: Convert cleaned GeoJSON to GeoPackage + # ------------------------------------------------------------------------------------------------------------------- + message("Fema 100 year flood plain:\n > '", output_clean_filename, "'") - output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename) output_gpkg_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename) @@ -155,6 +125,32 @@ for (file in FEMA_FILENAMES) { message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'") } + # ------------------------------------------------------------------------------------------------------------------- + # ---- Step 4: Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- + # ------------------------------------------------------------------------------------------------------------------- + + message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(output_gpkg_path), "'") + + fema <- sf::read_sf(output_gpkg_path) + fema <- resolve_internal_fema_boundaries(fema, output_gpkg_path) + + message("End time: ", Sys.time()) + + if (OVERWRITE_FEMA_FILES) { + message("Writting '", basename(output_gpkg_path), "' to: \n > '", output_gpkg_path, "'") + sf::write_sf( + fema, + # fema_clean, + output_gpkg_path + ) + } + + + + # ------------------------------------------------------------------------------------------------------------------- + # ---- Step 5: Delete intermediary files + # ------------------------------------------------------------------------------------------------------------------- + message("Deleting intermediary files\n") for (delete_file in STAGING_FILES_TO_DELETE) { if (file.exists(delete_file)) { @@ -163,7 +159,9 @@ for (file in FEMA_FILENAMES) { } } - + + rm(fema) + message() } @@ -171,87 +169,71 @@ for (file in FEMA_FILENAMES) { # ------------------------------------------------------------------------------------------------------------------- # ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- # ------------------------------------------------------------------------------------------------------------------- +source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false" +source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false" -# paths to FEMA 100 year flood plain files -FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) -for (file_path in FEMA_gpkg_paths) { - message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") - - fema <- sf::read_sf(file_path) - - fema <- - fema[!sf::st_is_empty(fema), ] %>% - sf::st_transform(5070) - - # TODO: Snap using geos::geos_snap() - # fema <- - # geos::geos_snap( - # geos::as_geos_geometry(fema), - # geos::as_geos_geometry(fema), - # tolerance = 1 - # ) %>% - # geos::geos_make_valid() %>% - # sf::st_as_sf() - - # TODO: we get this error when trying to use the geometry column after geos snapping - # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]." - # fema %>% - # sf::st_cast("POLYGON") - - # TODO: Snap using sf::st_snap() - # fema <- sf::st_snap( - # fema, - # fema, - # tolerance = 2 - # ) - - fema <- - fema %>% - # fema[!sf::st_is_empty(fema), ] %>% - dplyr::select(geometry = geom) %>% - add_predicate_group_id(sf::st_intersects) %>% - sf::st_make_valid() %>% - dplyr::group_by(group_id) %>% - dplyr::summarise( - geometry = sf::st_combine(sf::st_union(geometry)) - ) %>% - dplyr::ungroup() %>% - dplyr::select(-group_id) %>% - add_predicate_group_id(sf::st_intersects) %>% - rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% - rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% - dplyr::mutate( - fema_id = as.character(1:dplyr::n()) - ) %>% - dplyr::select(fema_id, geometry) + +# # paths to FEMA 100 year flood plain files +# FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) + +# for (file_path in FEMA_gpkg_paths) { +# message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") - # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + - # mapview::mapview(end_fema, color = 'red', col.regions = "white") +# fema <- sf::read_sf(file_path) + +# fema <- resolve_internal_fema_boundaries(fema) - fema <- - fema %>% - dplyr::mutate( - source = basename(file_path), - state = gsub("-100yr-flood_valid_clean.gpkg", "", source) - ) %>% - dplyr::select(fema_id, source, state, - # areasqkm, - geometry) +# # fema <- +# # fema[!sf::st_is_empty(fema), ] %>% +# # sf::st_transform(5070) - message("End time: ", Sys.time()) +# # fema <- +# # fema %>% +# # # fema[!sf::st_is_empty(fema), ] %>% +# # dplyr::select(geometry = geom) %>% +# # add_predicate_group_id(sf::st_intersects) %>% +# # sf::st_make_valid() %>% +# # dplyr::group_by(group_id) %>% +# # dplyr::summarise( +# # geometry = sf::st_combine(sf::st_union(geometry)) +# # ) %>% +# # dplyr::ungroup() %>% +# # dplyr::select(-group_id) %>% +# # add_predicate_group_id(sf::st_intersects) %>% +# # rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% +# # rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% +# # dplyr::mutate( +# # fema_id = as.character(1:dplyr::n()) +# # ) %>% +# # dplyr::select(fema_id, geometry) + +# # # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + +# # # mapview::mapview(end_fema, color = 'red', col.regions = "white") + +# # fema <- +# # fema %>% +# # dplyr::mutate( +# # source = basename(file_path), +# # state = gsub("-100yr-flood_valid_clean.gpkg", "", source) +# # ) %>% +# # dplyr::select(fema_id, source, state, +# # # areasqkm, +# # geometry) + +# message("End time: ", Sys.time()) - if (OVERWRITE_FEMA_FILES) { - message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") - sf::write_sf( - # fema_clean, - fema, - file_path - ) - } - message() +# if (OVERWRITE_FEMA_FILES) { +# message("Writting '", basename(file_path), "' to: \n > '", file_path, "'") +# sf::write_sf( +# # fema_clean, +# fema, +# file_path +# ) +# } +# message() -} +# } # ------------------------------------------------------------------------------------- # ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- @@ -261,11 +243,33 @@ for (file_path in FEMA_gpkg_paths) { FEMA_CLEAN_GPKG_PATHS <- list.files(FEMA_GPKG_PATH, full.names = TRUE) # paths to nextgen datasets and model attribute parquet files -NEXTGEN_FILENAMES <- list.files(NEXTGEN_DIR, full.names = FALSE) -NEXTGEN_FILE_PATHS <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES) +# CONUS_NEXTGEN_GPKG_PATH + +# layer_info = sf::st_layers(CONUS_NEXTGEN_GPKG_PATH) +# layer_info[layer_info$name == "flowpaths", "fields"] +# layer_inf + +# flines = sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths") +# flines %>% names() +# (flines$vpuid %>% unique()) %in% VPU_IDS +# VPU_IDS +# VPU_IDS %in% (flines$vpuid %>% unique()) + +# query the unique VPU_IDS from the flowlines layer from sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths") +CONUS_VPU_IDS <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = "SELECT DISTINCT vpuid FROM flowpaths") %>% + dplyr::pull() + + +# NEXTGEN_FILENAMES <- list.files(NEXTGEN_DIR, full.names = FALSE) +# NEXTGEN_FILE_PATHS <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES) + for (file_path in FEMA_CLEAN_GPKG_PATHS) { + # file_path = FEMA_CLEAN_GPKG_PATHS[25] + fema_file <- basename(file_path) message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'") @@ -273,16 +277,23 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # read in fema polygons fema <- sf::read_sf(file_path) - for (nextgen_path in NEXTGEN_FILE_PATHS) { - nextgen_basename <- basename(nextgen_path) - vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) + for (vpu in CONUS_VPU_IDS) { + + # vpu = CONUS_VPU_IDS[12] + + # nextgen_basename <- basename(nextgen_path) + # vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) message("VPU: ", vpu) - message("- nextgen gpkg:\n > '", nextgen_path, "'") - message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") + # message("- nextgen gpkg:\n > '", nextgen_path, "'") + message(" > Checking if '", fema_file, "' intersects with CONUS flowpaths in VPU '", vpu, "'") # read in nextgen flowlines - flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + flines <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) + + # flines <- sf::read_sf(nextgen_path, layer = "flowpaths") # get the FEMA polygons that intersect with the nextgen flowlines fema_intersect <- polygons_with_line_intersects(fema, flines) @@ -294,7 +305,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { if(fema_in_nextgen) { # create filepaths - vpu_subfolder <- paste0("VPU_", vpu) + vpu_subfolder <- paste0("vpu-", vpu) # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) @@ -325,7 +336,60 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { } message() - } + } + # for (nextgen_path in NEXTGEN_FILE_PATHS) { + # nextgen_basename <- basename(nextgen_path) + # vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) + + # message("VPU: ", vpu) + # message("- nextgen gpkg:\n > '", nextgen_path, "'") + # message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") + + # # read in nextgen flowlines + # flines <- sf::read_sf(nextgen_path, layer = "flowpaths") + + # # get the FEMA polygons that intersect with the nextgen flowlines + # fema_intersect <- polygons_with_line_intersects(fema, flines) + + # fema_in_nextgen <- nrow(fema_intersect) != 0 + + # message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) + + # if(fema_in_nextgen) { + + # # create filepaths + # vpu_subfolder <- paste0("VPU_", vpu) + # # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") + # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) + + # # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] + + # fema_intersect <- + # fema_intersect %>% + # dplyr::mutate( + # vpu = vpu + # ) %>% + # dplyr::select(vpu, fema_id, source, state, geom) + + # # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) + + # fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) + # fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) + + + # if (OVERWRITE_FEMA_FILES) { + # message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") + + # sf::write_sf( + # fema_intersect, + # fema_vpu_path + # ) + # } + + + # } + # message() + # } message( @@ -548,6 +612,54 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { } } + + +# ------------------------------------------------------------------------------------- +# ---- Store all FEMA layers in a single conus_fema.gpkg +# ------------------------------------------------------------------------------------- + +fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))] + +combine_gpkg_files(fema_vpu_layers, '/Volumes/T7SSD/lynker-spatial/cs-extension-polygons/conus_fema.gpkg') + + +combine_gpkg_files <- function(gpkg_paths, output_gpkg) { + + layer_counter <- list() + + for (gpkg_path in gpkg_paths) { + + base_name <- tools::file_path_sans_ext(basename(gpkg_path)) + # base_name <- gsub("_output.gpkg", "", basename(gpkg_path)) + + if (base_name %in% names(layer_counter)) { + layer_counter[[base_name]] <- layer_counter[[base_name]] + 1 + layer_name <- paste0(base_name, "_", layer_counter[[base_name]]) + } else { + + layer_counter[[base_name]] <- 1 + layer_name <- base_name + } + + tryCatch({ + sf_layer <- st_read(gpkg_path, quiet = TRUE) + + sf::st_write(sf_layer, + dsn = output_gpkg, + layer = layer_name, + append = TRUE, + quiet = TRUE) + + message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'") + + }, error = function(e) { + warning("Error processing: ", basename(gpkg_path)) + warning(e) + }) + } +} + + # # ------------------------------------------------------------------------------------- # # ---- Union each VPU geopackage (either on state or just touching predicate) ---- # # ------------------------------------------------------------------------------------- diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R new file mode 100644 index 0000000..6feff0b --- /dev/null +++ b/runners/cs_runner2/utils.R @@ -0,0 +1,2562 @@ +# Create an empty file structure +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ +# ├── dem/ +# ├── vrt/ +# ├── tif/ +# ├── cs-extension-polygons/ +create_local_hydrofabric_base_dirs <- function(base_dir) { + + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + + # DEM dirs + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(dem_dir, "vrt") + dem_tif_dir <- file.path(dem_dir, "tif") + + # polygons for transect extensions + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + # create base directories + create_if_not_exists(base_dir) + create_if_not_exists(hydrofabric_dir) + + # DEM dirs + create_if_not_exists(dem_dir) + create_if_not_exists(dem_vrt_dir) + create_if_not_exists(dem_tif_dir) + + # extension polygons + create_if_not_exists(cs_extension_polygons_dir) + + + # Create FEMA folders + create_if_not_exists(fema_dir) + create_if_not_exists(fema_fgb_dir) + create_if_not_exists(fema_geojson_dir) + create_if_not_exists(fema_clean_dir) + create_if_not_exists(fema_gpkg_dir) + create_if_not_exists(fema_by_vpu_dir) + + for (path in fema_by_vpu_subdirs) { + create_if_not_exists(path) + } + +} + +get_vpu_ids <- function() { + VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', + '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') + # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID + + return(VPU_IDS) + +} + +# # Base directory for local file storage +# BASE_DIR <- '/Volumes/T7SSD/lynker-spatial' +# base_dir <- BASE_DIR +# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing) +# FEMA_FGB_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb") +# FEMA_GEOJSON_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson") +# FEMA_CLEAN_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean") +# FEMA_GPKG_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg") +# FEMA_BY_VPU_PATH <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU") +# +# +# VPU_IDS <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', +# '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21') +# # VPU_IDS <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID +# +# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ") +# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS) + +# Create an empty file structure for a new version within a specified base_dir +# base_dir: character, top level directory path +# Directory tree: +# base_dir/ +# └── lynker-spatial/ +# ├── hydrofabric/ + # ├── version_number/ + # ├── network/ + # ├── transects/ + # ├── cross-sections/ + # ├── dem/ + # ├── dem-ml/ + # ├── dem-coastal-bathy/ + # ├── dem-points/ +create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { + # version = "v3.0" + # base_dir <- BASE_DIR + + # build paths + hydrofabric_dir <- paste0(base_dir, "/hydrofabric") + version_base_dir <- paste0(hydrofabric_dir, "/", version) + + # polygons for transect extensions + ml_dir <- paste0(version_base_dir, "/ml") + + # reference features + ref_features_dir <- paste0(version_base_dir, "/reference-features") + + # conus network gpkg + network_dir <- paste0(version_base_dir, "/network") + + # transects + transects_dir <- paste0(version_base_dir, "/transects") + + # cross sections dirs + cross_sections_dir <- paste0(version_base_dir, "/cross-sections") + cross_sections_dem_dir <- paste0(cross_sections_dir, "/dem") + cross_sections_ml_dir <- paste0(cross_sections_dir, "/dem-ml") + cross_sections_coastal_bathy_dir <- paste0(cross_sections_dir, "/dem-coastal-bathy") + cross_sections_dem_pts_dir <- paste0(cross_sections_dir, "/dem-points") + + if(with_output) { + output_dir <- paste0(version_base_dir, "/outputs") + } + + # create version BASE dir + create_if_not_exists(version_base_dir) + + # CONUS dir + create_if_not_exists(network_dir) + + # ML data + create_if_not_exists(ml_dir) + + # reference features data + create_if_not_exists(ref_features_dir) + + # transects + create_if_not_exists(transects_dir) + + # CS pts + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cross_sections_dem_dir) + create_if_not_exists(cross_sections_ml_dir) + create_if_not_exists(cross_sections_coastal_bathy_dir) + create_if_not_exists(cross_sections_dem_pts_dir) + + if(with_output) { + create_if_not_exists(output_dir) + } + +} + +create_if_not_exists <- function(dir_path) { + if (!dir.exists(dir_path)) { + dir.create(dir_path, recursive = TRUE) + message("Created directory: '", dir_path, "'\n") + } +} + +# get a list of top level directories for main directory +get_base_dir_paths <- function(base_dir) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + dem_dir <- file.path(base_dir, "dem") + dem_vrt_dir <- file.path(base_dir, "dem", "vrt") + dem_tif_dir <- file.path(base_dir, "dem", "tif") + + cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") + + # FEMA data + + fema_dir <- file.path(base_dir, "fema") + + fema_fgb_dir <- file.path(fema_dir, "fema-fgb") + fema_geojson_dir <- file.path(fema_dir, "fema-geojson") + fema_clean_dir <- file.path(fema_dir, "fema-clean") + fema_gpkg_dir <- file.path(fema_dir, "fema-gpkg") + + # BY VPU folders + VPU_IDS <- get_vpu_ids() + fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") + fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + + return( + list( + hydrofabric_dir = hydrofabric_dir, + dem_dir = dem_dir, + dem_vrt_dir = dem_vrt_dir, + dem_tif_dir = dem_tif_dir, + cs_extension_polygons_dir = cs_extension_polygons_dir, + fema_dir = fema_dir, + fema_fgb_dir = fema_fgb_dir, + fema_geojson_dir = fema_geojson_dir, + fema_clean_dir = fema_clean_dir, + fema_gpkg_dir = fema_gpkg_dir, + fema_by_vpu_dir = fema_by_vpu_dir, + fema_by_vpu_subdirs = fema_by_vpu_subdirs + ) + ) +} + +# get list of a specific directories in a version directory +get_version_base_dir_paths <- function(base_dir, version) { + # base_dir = BASE_DIR + # version = "v3.0" + + hydrofabric_dir <- file.path(base_dir, "hydrofabric") + + version_base_dir <- file.path(hydrofabric_dir, version) + + # polygons for transect extensions + ml_dir <- file.path(version_base_dir, "ml") + + # reference features + ref_features_dir <- file.path(version_base_dir, "reference-features") + + # conus network gpkg + network_dir <- file.path(version_base_dir, "network") + + # transects + transects_dir <- file.path(version_base_dir, "transects") + + # cross sections dirs + cross_sections_dir <- file.path(version_base_dir, "cross-sections") + cross_sections_dem_dir <- file.path(cross_sections_dir, "dem") + cross_sections_ml_dir <- file.path(cross_sections_dir, "dem-ml") + cross_sections_coastal_bathy_dir <- file.path(cross_sections_dir, "dem-coastal-bathy") + cross_sections_dem_pts_dir <- file.path(cross_sections_dir, "dem-points") + + return( + list( + hydrofabric_dir = hydrofabric_dir, + version_base_dir = version_base_dir, + ref_features_dir = ref_features_dir, + network_dir = network_dir, + ml_dir = ml_dir, + transects_dir = transects_dir, + cross_sections_dir = cross_sections_dir, + cross_sections_dem_dir = cross_sections_dem_dir, + cross_sections_dem_pts_dir = cross_sections_dem_pts_dir, + cross_sections_ml_dir = cross_sections_ml_dir, + cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir + ) + ) +} + +list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { + + profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else "" + + if (is.null(pattern) || pattern == "") { + grep_command <- "" # no filtering if empty or NULL + } else { + grep_command <- paste0(" | grep -E \"", pattern, "\"") # grep if a pattern is given + } + + cmd <- paste0( + '#!/bin/bash\n', + 'S3_BUCKET="', s3_bucket, '"\n', + 'PATTERN="', pattern, '"\n', + 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n', + 'echo "$S3_OBJECTS"' + ) + # cmd <- paste0( + # '#!/bin/bash\n', + # 'S3_BUCKET="', s3_bucket, '"\n', + # 'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n', + # 'echo "$S3_OBJECTS"' + # ) + ls_output <- system(cmd, intern = TRUE) + return(ls_output) +} + +download_tiles <- function(tile_paths, output_dir) { + # output_dir <- DEM_TIF_DIR + # tile_paths + # error_tiles <- + tif_save_paths <- paste0(output_dir, "/", basename(tile_paths)) + + error_tiles <- data.frame( + tile = basename(tile_paths), + status = TRUE + ) + + for (i in seq_along(tile_paths)) { + # i = 1 + tile_path <- tile_paths[i] + tif_save_path <- tif_save_paths[i] + + message("[", i, "]", + "\n > Tile: ", basename(tile_path), + "\n > Output path: ", tif_save_path + ) + + download_tif_cmd <- paste0("curl -o ", tif_save_path, " ", tile_path) + + tryCatch({ + + tif_download_output <- system(download_tif_cmd, intern = TRUE) + message(" > Succesfully downloaded tile: ", basename(tile_path)) + + }, error = function(e) { + + message("Error downloading tile: ", basename(tile_path)) + message("ERROW below: \n ", e) + + error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE + + }) + + } + + return(error_tiles) + +} + +# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to +# make sure they are aligned and in the same order +# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string +# base is a character vector of the base directory of the files. Defaults to NULL +# Returns a dataframe with VPU, x, and y columns +align_files_by_vpu <- function( + x, + y, + base = NULL +) { + + # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period + regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*" + + # path dataframe for X filepaths + x_paths <- data.frame(x = x) + + # path dataframe for Y filepaths + y_paths <- data.frame(y = y) + + # generate VPU IDs based on file path regular expression matching with "regex_pattern" above + x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x) + y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y) + + # match paths based on VPU column + matched_paths <- dplyr::left_join( + x_paths, + y_paths, + by = "vpu" + ) + + # reorder columns + matched_paths <- dplyr::relocate(matched_paths, vpu, x, y) + + if(!is.null(base)) { + matched_paths$base_dir <- base + } + + return(matched_paths) + +} + +# ------------------------------------------------------------------------------------- +# FEMA processing functions: +# ------------------------------------------------------------------------------------- + +resolve_internal_fema_boundaries <- function(fema, source_file = "") { +# message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'") + +# fema <- sf::read_sf(file_path) + + fema <- + fema[!sf::st_is_empty(fema), ] %>% + sf::st_transform(5070) + + fema <- + fema %>% + dplyr::select(geometry = geom) %>% + add_predicate_group_id(sf::st_intersects) %>% + sf::st_make_valid() %>% + dplyr::group_by(group_id) %>% + dplyr::summarise( + geometry = sf::st_combine(sf::st_union(geometry)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-group_id) %>% + add_predicate_group_id(sf::st_intersects) %>% + rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% + rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% + dplyr::mutate( + fema_id = as.character(1:dplyr::n()) + ) %>% + dplyr::select(fema_id, geometry) + + fema <- + fema %>% + dplyr::mutate( + source = basename(source_file), + state = gsub("-100yr-flood_valid_clean.gpkg", "", source) + ) %>% + dplyr::select(fema_id, source, state, geometry) + + return(fema) +} + +add_predicate_group_id <- function(polys, predicate) { + # GROUP BY SPATIAL PREDICATES + # ----------------------------------------- + # predicate = sf::st_touches + # polys <- sf_df + # ----------------------------------------- + + + relations <- predicate(polys) + + relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) }) + + group_ids_map <- fastmap::fastmap() + ids_to_groups <- fastmap::fastmap() + + group_id <- 0 + + for (i in seq_along(relations)) { + + predicate_ids <- relations[i][[1]] + + # message("(", i, ") - ", predicate_ids) + # message("Start Group ID: ", group_id) + + id_group_check <- ids_to_groups$has(predicate_ids) + + if(any(id_group_check)) { + + known_groups <- ids_to_groups$mget(predicate_ids) + known_group <- known_groups[unname(sapply(known_groups , function(kg) { + !is.null(kg) + }))][[1]] + + # message("IDs part of past group ID > '", known_group, "'") + + past_group_ids <- group_ids_map$get(known_group)[[1]] + updated_group_ids <- as.character( + sort(as.numeric(unique(c(past_group_ids, predicate_ids)))) + ) + + group_ids_map$set(known_group, list(updated_group_ids)) + + new_ids <- predicate_ids[!predicate_ids %in% past_group_ids] + + # message("Adding ", new_ids, " to seen set...") + + # add any newly added IDs to the seen map + for (seen_id in new_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + + } else { + # get a new group ID number + group_id <- group_id + 1 + # message("IDs form NEW group > '", group_id, "'") + + # create a new key in the map with the predicate IDs list as the value + group_ids_map$set(as.character(group_id), list(predicate_ids)) + + # message("Adding ", predicate_ids, " to seen set...") + + # add each predicate ID to the map storing the seen indexes and their respecitve group IDs + for (seen_id in predicate_ids) { + # message(seen_id) + ids_to_groups$set(as.character(seen_id), as.character(group_id)) + } + } + # message("End group ID: ", group_id, "\n") + } + + group_ids <- group_ids_map$as_list() + + grouping_df <- lapply(seq_along(group_ids), function(i) { + # i = 2 + grouping <- group_ids[i] + group_id <- names(grouping) + indices <- grouping[[1]][[1]] + + data.frame( + index = as.numeric(indices), + group_id = rep(group_id, length(indices)) + ) + + }) %>% + dplyr::bind_rows() %>% + dplyr::arrange(i) + + # count up the number of IDs for each group, well use this to determine which group + # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members) + group_id_counts <- + grouping_df %>% + dplyr::group_by(group_id) %>% + dplyr::count() %>% + # dplyr::arrange(-n) %>% + dplyr::ungroup() + + # select the IDs with the most other members + grouping_df <- + grouping_df %>% + dplyr::left_join( + group_id_counts, + by = 'group_id' + ) %>% + dplyr::group_by(index) %>% + dplyr::slice_max(n, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(-n) %>% + dplyr::arrange(-index) + + polys$group_id <- grouping_df$group_id + + return(polys) + +} + +# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies +# flowlines: flowlines linestring sf object +# trans: transects linestring sf object +# waterbodies: waterbodies polygon sf object +# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies +### Returns a list of length 2 with updated "flowlines" and "transects" sf objects +wb_intersects <- function(flowlines, trans, waterbodies) { + + ######## ######## ######## ######## ######## ######## + + flowlines_geos <- geos::as_geos_geometry(flowlines) + wbs_geos <- geos::as_geos_geometry(waterbodies) + + # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps + trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id) + + message("Checking flowlines against waterbodies...") + + # create an index between flowlines and waterbodies + wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos) + + # remove any flowlines that cross more than 1 waterbody + to_keep <- flowlines[lengths(wb_index) == 0, ] + to_check <- flowlines[lengths(wb_index) != 0, ] + + # subset transects to the hy_ids in "to_check" set of flowlines + trans_check <- trans[trans$hy_id %in% unique(to_check$id), ] + # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)] + + # check where the transects linestrings intersect with the waterbodies + trans_geos_check <- geos::as_geos_geometry(trans_check) + + message("Checking transects against waterbodies (v2) ...") + wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos) # (NEW METHOD) + # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)]) # (OLD METHOD) + + # sum(lengths(wb_trans_index) == 0) + # length(wb_trans_index) + + # within the transects lines that are on a flowline that crosses a waterbody, + # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + trans_keep <- trans_check[lengths(wb_trans_index) == 0, ] # (NEW METHOD) + # trans_keep <- trans_check[!wb_trans_index, ] # (OLD METHOD) + + # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + + # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # have transects that are NOT in the waterbody + to_keep <- dplyr::bind_rows(to_keep, to_check) + + # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # that were determined to be valid (are being kept) + check_ids <- unique(trans_check$tmp_id) + keep_ids <- unique(trans_keep$tmp_id) + + # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # - Keep original transects that are not on flowlines that intersect waterbodies AND + # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + valid_flowlines <- flowlines$id %in% to_keep$id + valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + + # return alist of updated flowlines and transects + return( + list( + "valid_flowlines" = valid_flowlines, + "valid_transects" = valid_transects + ) + ) + + # # within the transects lines that are on a flowline that crosses a waterbody, + # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL + # trans_keep <- trans_check[!trans_wb_index, ] + # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ] + # + # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies + # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ] + # + # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL, + # # have transects that are NOT in the waterbody + # to_keep <- dplyr::bind_rows(to_keep, to_check) + # + # # 'tmp_ids' of transects that are being checked and also the transects within trans_check + # # that were determined to be valid (are being kept) + # check_ids <- unique(trans_check$tmp_id) + # keep_ids <- unique(trans_keep$tmp_id) + # + # # logical vectors of which flowlines/transects to keep (KEEP == TRUE) + # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too. + # # - Keep original transects that are not on flowlines that intersect waterbodies AND + # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody + # valid_flowlines <- flowlines$id %in% to_keep$id + # valid_transects <- trans$tmp_id %in% dplyr::filter(trans, + # !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id + # + # # return alist of updated flowlines and transects + # return( + # list( + # "valid_flowlines" = valid_flowlines, + # "valid_transects" = valid_transects + # ) + # ) +} + +add_intersects_ids <- function(x, y, id_col) { + # make sure the crs are tjhe same + y <- sf::st_transform(y, sf::st_crs(x)) + + # Perform the intersection + intersections <- sf::st_intersects(x, y) + + # add the intersected values to the first dataframe + x[[id_col]] <- unlist(lapply(intersections, function(idx) { + if (length(idx) > 0) { + paste0(unlist(y[[id_col]][idx]), collapse = ", ") + } else { + NA + } + })) + + return(x) +} + +unnest_ids <- function(ids) { + return( + unique(unlist(strsplit(unique(ids), ", "))) + ) +} + +#' Get the polygons that interesect with any of the linestring geometries +#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes +#' @param polygons polygon sf object. Default is NULL +#' @param lines linestring sf object. Default is NULL. +#' +#' @return sf dataframe of polygons that intersect with the linestrings +polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) { + + if (is.null(polygons)) { + stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries") + } + + if (is.null(lines)) { + stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries") + } + + # Convert the SF geometries to geos geometries + polygons_geos <- geos::as_geos_geometry(polygons) + lines_geos <- geos::as_geos_geometry(lines) + + # create an index between the polygons and linestrings + lines_index <- geos::geos_intersects_matrix(polygons_geos, lines_geos) + + # get the polygons that have atleast 1 intersection with the 'lines' + polygons_with_lines <- polygons[lengths(lines_index) != 0, ] + + return(polygons_with_lines) +} + +# TODO: DElete these NEW DOMAIN functions... +# Create an empty file structure +# base_dir: character, top level directory path +# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist) + +# Directory tree: +# base_dir/ +# └── domain_dirname/ +# ├── flowlines/ +# ├── dem/ +# ├── transects/ +# ├── cross_sections/ +# └── cs_pts/ +create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + + if(with_output) { + output_dir <- paste0(domain_dir, "/outputs") + } + + # create directories + create_if_not_exists(domain_dir) + create_if_not_exists(flowlines_dir) + create_if_not_exists(domain_subset_dir) + create_if_not_exists(dem_dir) + create_if_not_exists(transects_dir) + create_if_not_exists(cross_sections_dir) + create_if_not_exists(cs_pts_dir) + create_if_not_exists(vpu_subsets_dir) + + if(with_output) { + create_if_not_exists(output_dir) + } + +} + +# get path strings for a domain dir (based of a base dir and domain dirname) +# NOTE: this does NOT guarentee that these folders exist, +# NOTE: it just gets the paths if they were created by create_new_domain_dirs() +get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) { + + # build paths + domain_dir <- paste0(base_dir, "/", domain_dirname) + flowlines_dir <- paste0(domain_dir, "/flowlines") + domain_subset_dir <- paste0(domain_dir, "/domain_subset") + dem_dir <- paste0(domain_dir, "/dem") + transects_dir <- paste0(domain_dir, "/transects") + cross_sections_dir <- paste0(domain_dir, "/cross_sections") + cs_pts_dir <- paste0(domain_dir, "/cs_pts") + vpu_subsets_dir <- paste0(domain_dir, "/vpu-subsets") + output_dir <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA) + + + # named list of file paths + return( + list( + base_dir = base_dir, + domain_dir = domain_dir, + flowlines_dir = flowlines_dir, + domain_subset_dir = domain_subset_dir, + dem_dir = dem_dir, + transects_dir = transects_dir, + cross_sections_dir = cross_sections_dir, + cs_pts_dir = cs_pts_dir, + vpu_subsets_dir = vpu_subsets_dir, + output_dir = output_dir + ) + ) + +} + +download_3dep_vrt <- function(base_dir) { + + ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/ + ### paste w/ `datapasta::vector_paste_vertical()` + ### Some manual cleaning of header and footer mess... + ### Reason? Un-scrapable page, and no index.gpkg... + + t <- c( + "0 n06e162/", + "0 n06e163/", + "0 n07e134/", + "0 n07e151/", + "0 n07e152/", + "0 n07e158/", + "0 n08e134/", + "0 n08e151/", + "0 n08e152/", + "0 n08e158/", + "0 n09e134/", + "0 n10e138/", + "0 n14e144/", + "0 n15e145/", + "0 n16e145/", + "0 n18w065/", + "0 n18w066/", + "0 n18w067/", + "0 n18w068/", + "0 n19w065/", + "0 n19w066/", + "0 n19w067/", + "0 n19w068/", + "0 n19w156/", + "0 n20w155/", + "0 n20w156/", + "0 n20w157/", + "0 n21w156/", + "0 n21w157/", + "0 n21w158/", + "0 n22w157/", + "0 n22w158/", + "0 n22w159/", + "0 n22w160/", + "0 n22w161/", + "0 n23w160/", + "0 n23w161/", + "0 n25w081/", + "0 n25w082/", + "0 n25w083/", + "0 n26w081/", + "0 n26w082/", + "0 n26w098/", + "0 n26w099/", + "0 n27w081/", + "0 n27w082/", + "0 n27w083/", + "0 n27w098/", + "0 n27w099/", + "0 n27w100/", + "0 n28w081/", + "0 n28w082/", + "0 n28w083/", + "0 n28w097/", + "0 n28w098/", + "0 n28w099/", + "0 n28w100/", + "0 n28w101/", + "0 n29w081/", + "0 n29w082/", + "0 n29w083/", + "0 n29w090/", + "0 n29w096/", + "0 n29w097/", + "0 n29w098/", + "0 n29w099/", + "0 n29w100/", + "0 n29w101/", + "0 n29w104/", + "0 n30w081/", + "0 n30w082/", + "0 n30w083/", + "0 n30w084/", + "0 n30w085/", + "0 n30w086/", + "0 n30w089/", + "0 n30w090/", + "0 n30w091/", + "0 n30w092/", + "0 n30w093/", + "0 n30w094/", + "0 n30w095/", + "0 n30w096/", + "0 n30w097/", + "0 n30w098/", + "0 n30w099/", + "0 n30w100/", + "0 n30w101/", + "0 n30w102/", + "0 n30w103/", + "0 n30w104/", + "0 n30w105/", + "0 n31w082/", + "0 n31w083/", + "0 n31w084/", + "0 n31w085/", + "0 n31w086/", + "0 n31w087/", + "0 n31w088/", + "0 n31w089/", + "0 n31w090/", + "0 n31w091/", + "0 n31w092/", + "0 n31w093/", + "0 n31w094/", + "0 n31w095/", + "0 n31w096/", + "0 n31w097/", + "0 n31w098/", + "0 n31w099/", + "0 n31w100/", + "0 n31w101/", + "0 n31w102/", + "0 n31w103/", + "0 n31w104/", + "0 n31w105/", + "0 n31w106/", + "0 n31w107/", + "0 n32w081/", + "0 n32w082/", + "0 n32w083/", + "0 n32w084/", + "0 n32w085/", + "0 n32w086/", + "0 n32w087/", + "0 n32w088/", + "0 n32w089/", + "0 n32w090/", + "0 n32w091/", + "0 n32w092/", + "0 n32w093/", + "0 n32w094/", + "0 n32w095/", + "0 n32w096/", + "0 n32w097/", + "0 n32w098/", + "0 n32w099/", + "0 n32w100/", + "0 n32w101/", + "0 n32w102/", + "0 n32w103/", + "0 n32w104/", + "0 n32w105/", + "0 n32w106/", + "0 n32w107/", + "0 n32w108/", + "0 n32w109/", + "0 n32w110/", + "0 n32w111/", + "0 n32w112/", + "0 n32w113/", + "0 n32w114/", + "0 n33w080/", + "0 n33w081/", + "0 n33w082/", + "0 n33w083/", + "0 n33w084/", + "0 n33w085/", + "0 n33w086/", + "0 n33w087/", + "0 n33w088/", + "0 n33w089/", + "0 n33w090/", + "0 n33w091/", + "0 n33w092/", + "0 n33w093/", + "0 n33w094/", + "0 n33w095/", + "0 n33w096/", + "0 n33w097/", + "0 n33w098/", + "0 n33w099/", + "0 n33w100/", + "0 n33w101/", + "0 n33w102/", + "0 n33w103/", + "0 n33w104/", + "0 n33w105/", + "0 n33w106/", + "0 n33w107/", + "0 n33w108/", + "0 n33w109/", + "0 n33w110/", + "0 n33w111/", + "0 n33w112/", + "0 n33w113/", + "0 n33w114/", + "0 n33w115/", + "0 n33w116/", + "0 n33w117/", + "0 n33w118/", + "0 n33w119/", + "0 n34w078/", + "0 n34w079/", + "0 n34w080/", + "0 n34w081/", + "0 n34w082/", + "0 n34w083/", + "0 n34w084/", + "0 n34w085/", + "0 n34w086/", + "0 n34w087/", + "0 n34w088/", + "0 n34w089/", + "0 n34w090/", + "0 n34w091/", + "0 n34w092/", + "0 n34w093/", + "0 n34w094/", + "0 n34w095/", + "0 n34w096/", + "0 n34w097/", + "0 n34w098/", + "0 n34w099/", + "0 n34w100/", + "0 n34w101/", + "0 n34w102/", + "0 n34w103/", + "0 n34w104/", + "0 n34w105/", + "0 n34w106/", + "0 n34w107/", + "0 n34w108/", + "0 n34w109/", + "0 n34w110/", + "0 n34w111/", + "0 n34w112/", + "0 n34w113/", + "0 n34w114/", + "0 n34w115/", + "0 n34w116/", + "0 n34w117/", + "0 n34w118/", + "0 n34w119/", + "0 n34w120/", + "0 n34w121/", + "0 n35w076/", + "0 n35w077/", + "0 n35w078/", + "0 n35w079/", + "0 n35w080/", + "0 n35w081/", + "0 n35w082/", + "0 n35w083/", + "0 n35w084/", + "0 n35w085/", + "0 n35w086/", + "0 n35w087/", + "0 n35w088/", + "0 n35w089/", + "0 n35w090/", + "0 n35w091/", + "0 n35w092/", + "0 n35w093/", + "0 n35w094/", + "0 n35w095/", + "0 n35w096/", + "0 n35w097/", + "0 n35w098/", + "0 n35w099/", + "0 n35w100/", + "0 n35w101/", + "0 n35w102/", + "0 n35w103/", + "0 n35w104/", + "0 n35w105/", + "0 n35w106/", + "0 n35w107/", + "0 n35w108/", + "0 n35w109/", + "0 n35w110/", + "0 n35w111/", + "0 n35w112/", + "0 n35w113/", + "0 n35w114/", + "0 n35w115/", + "0 n35w116/", + "0 n35w117/", + "0 n35w118/", + "0 n35w119/", + "0 n35w120/", + "0 n35w121/", + "0 n36w076/", + "0 n36w077/", + "0 n36w078/", + "0 n36w079/", + "0 n36w080/", + "0 n36w081/", + "0 n36w082/", + "0 n36w083/", + "0 n36w084/", + "0 n36w085/", + "0 n36w086/", + "0 n36w087/", + "0 n36w088/", + "0 n36w089/", + "0 n36w090/", + "0 n36w091/", + "0 n36w092/", + "0 n36w093/", + "0 n36w094/", + "0 n36w095/", + "0 n36w096/", + "0 n36w097/", + "0 n36w098/", + "0 n36w099/", + "0 n36w100/", + "0 n36w101/", + "0 n36w102/", + "0 n36w103/", + "0 n36w104/", + "0 n36w105/", + "0 n36w106/", + "0 n36w107/", + "0 n36w108/", + "0 n36w109/", + "0 n36w110/", + "0 n36w111/", + "0 n36w112/", + "0 n36w113/", + "0 n36w114/", + "0 n36w115/", + "0 n36w116/", + "0 n36w117/", + "0 n36w118/", + "0 n36w119/", + "0 n36w120/", + "0 n36w121/", + "0 n36w122/", + "0 n37w076/", + "0 n37w077/", + "0 n37w078/", + "0 n37w079/", + "0 n37w080/", + "0 n37w081/", + "0 n37w082/", + "0 n37w083/", + "0 n37w084/", + "0 n37w085/", + "0 n37w086/", + "0 n37w087/", + "0 n37w088/", + "0 n37w089/", + "0 n37w090/", + "0 n37w091/", + "0 n37w092/", + "0 n37w093/", + "0 n37w094/", + "0 n37w095/", + "0 n37w096/", + "0 n37w097/", + "0 n37w098/", + "0 n37w099/", + "0 n37w100/", + "0 n37w101/", + "0 n37w102/", + "0 n37w103/", + "0 n37w104/", + "0 n37w105/", + "0 n37w106/", + "0 n37w107/", + "0 n37w108/", + "0 n37w109/", + "0 n37w110/", + "0 n37w111/", + "0 n37w112/", + "0 n37w113/", + "0 n37w114/", + "0 n37w115/", + "0 n37w116/", + "0 n37w117/", + "0 n37w118/", + "0 n37w119/", + "0 n37w120/", + "0 n37w121/", + "0 n37w122/", + "0 n37w123/", + "0 n38w076/", + "0 n38w077/", + "0 n38w078/", + "0 n38w079/", + "0 n38w080/", + "0 n38w081/", + "0 n38w082/", + "0 n38w083/", + "0 n38w084/", + "0 n38w085/", + "0 n38w086/", + "0 n38w087/", + "0 n38w088/", + "0 n38w089/", + "0 n38w090/", + "0 n38w091/", + "0 n38w092/", + "0 n38w093/", + "0 n38w094/", + "0 n38w095/", + "0 n38w096/", + "0 n38w097/", + "0 n38w098/", + "0 n38w099/", + "0 n38w100/", + "0 n38w101/", + "0 n38w102/", + "0 n38w103/", + "0 n38w104/", + "0 n38w105/", + "0 n38w106/", + "0 n38w107/", + "0 n38w108/", + "0 n38w109/", + "0 n38w110/", + "0 n38w111/", + "0 n38w112/", + "0 n38w113/", + "0 n38w114/", + "0 n38w115/", + "0 n38w116/", + "0 n38w117/", + "0 n38w118/", + "0 n38w119/", + "0 n38w120/", + "0 n38w121/", + "0 n38w122/", + "0 n38w123/", + "0 n38w124/", + "0 n39w075/", + "0 n39w076/", + "0 n39w077/", + "0 n39w078/", + "0 n39w079/", + "0 n39w080/", + "0 n39w081/", + "0 n39w082/", + "0 n39w083/", + "0 n39w084/", + "0 n39w085/", + "0 n39w086/", + "0 n39w087/", + "0 n39w088/", + "0 n39w089/", + "0 n39w090/", + "0 n39w091/", + "0 n39w092/", + "0 n39w093/", + "0 n39w094/", + "0 n39w095/", + "0 n39w096/", + "0 n39w097/", + "0 n39w098/", + "0 n39w099/", + "0 n39w100/", + "0 n39w101/", + "0 n39w102/", + "0 n39w103/", + "0 n39w104/", + "0 n39w105/", + "0 n39w106/", + "0 n39w107/", + "0 n39w108/", + "0 n39w109/", + "0 n39w110/", + "0 n39w111/", + "0 n39w112/", + "0 n39w113/", + "0 n39w114/", + "0 n39w115/", + "0 n39w116/", + "0 n39w117/", + "0 n39w118/", + "0 n39w119/", + "0 n39w120/", + "0 n39w121/", + "0 n39w122/", + "0 n39w123/", + "0 n39w124/", + "0 n40w075/", + "0 n40w076/", + "0 n40w077/", + "0 n40w078/", + "0 n40w079/", + "0 n40w080/", + "0 n40w081/", + "0 n40w082/", + "0 n40w083/", + "0 n40w084/", + "0 n40w085/", + "0 n40w086/", + "0 n40w087/", + "0 n40w088/", + "0 n40w089/", + "0 n40w090/", + "0 n40w091/", + "0 n40w092/", + "0 n40w093/", + "0 n40w094/", + "0 n40w095/", + "0 n40w096/", + "0 n40w097/", + "0 n40w098/", + "0 n40w099/", + "0 n40w100/", + "0 n40w101/", + "0 n40w102/", + "0 n40w103/", + "0 n40w104/", + "0 n40w105/", + "0 n40w106/", + "0 n40w107/", + "0 n40w108/", + "0 n40w109/", + "0 n40w110/", + "0 n40w111/", + "0 n40w112/", + "0 n40w113/", + "0 n40w114/", + "0 n40w115/", + "0 n40w116/", + "0 n40w117/", + "0 n40w118/", + "0 n40w119/", + "0 n40w120/", + "0 n40w121/", + "0 n40w122/", + "0 n40w123/", + "0 n40w124/", + "0 n40w125/", + "0 n41w073/", + "0 n41w074/", + "0 n41w075/", + "0 n41w076/", + "0 n41w077/", + "0 n41w078/", + "0 n41w079/", + "0 n41w080/", + "0 n41w081/", + "0 n41w082/", + "0 n41w083/", + "0 n41w084/", + "0 n41w085/", + "0 n41w086/", + "0 n41w087/", + "0 n41w088/", + "0 n41w089/", + "0 n41w090/", + "0 n41w091/", + "0 n41w092/", + "0 n41w093/", + "0 n41w094/", + "0 n41w095/", + "0 n41w096/", + "0 n41w097/", + "0 n41w098/", + "0 n41w099/", + "0 n41w100/", + "0 n41w101/", + "0 n41w102/", + "0 n41w103/", + "0 n41w104/", + "0 n41w105/", + "0 n41w106/", + "0 n41w107/", + "0 n41w108/", + "0 n41w109/", + "0 n41w110/", + "0 n41w111/", + "0 n41w112/", + "0 n41w113/", + "0 n41w114/", + "0 n41w115/", + "0 n41w116/", + "0 n41w117/", + "0 n41w118/", + "0 n41w119/", + "0 n41w120/", + "0 n41w121/", + "0 n41w122/", + "0 n41w123/", + "0 n41w124/", + "0 n41w125/", + "0 n42w070/", + "0 n42w071/", + "0 n42w072/", + "0 n42w073/", + "0 n42w074/", + "0 n42w075/", + "0 n42w076/", + "0 n42w077/", + "0 n42w078/", + "0 n42w079/", + "0 n42w080/", + "0 n42w081/", + "0 n42w082/", + "0 n42w083/", + "0 n42w084/", + "0 n42w085/", + "0 n42w086/", + "0 n42w087/", + "0 n42w088/", + "0 n42w089/", + "0 n42w090/", + "0 n42w091/", + "0 n42w092/", + "0 n42w093/", + "0 n42w094/", + "0 n42w095/", + "0 n42w096/", + "0 n42w097/", + "0 n42w098/", + "0 n42w099/", + "0 n42w100/", + "0 n42w101/", + "0 n42w102/", + "0 n42w103/", + "0 n42w104/", + "0 n42w105/", + "0 n42w106/", + "0 n42w107/", + "0 n42w108/", + "0 n42w109/", + "0 n42w110/", + "0 n42w111/", + "0 n42w112/", + "0 n42w113/", + "0 n42w114/", + "0 n42w115/", + "0 n42w116/", + "0 n42w117/", + "0 n42w118/", + "0 n42w119/", + "0 n42w120/", + "0 n42w121/", + "0 n42w122/", + "0 n42w123/", + "0 n42w124/", + "0 n42w125/", + "0 n43w071/", + "0 n43w072/", + "0 n43w073/", + "0 n43w074/", + "0 n43w075/", + "0 n43w076/", + "0 n43w077/", + "0 n43w078/", + "0 n43w079/", + "0 n43w080/", + "0 n43w081/", + "0 n43w082/", + "0 n43w083/", + "0 n43w084/", + "0 n43w085/", + "0 n43w086/", + "0 n43w087/", + "0 n43w088/", + "0 n43w089/", + "0 n43w090/", + "0 n43w091/", + "0 n43w092/", + "0 n43w093/", + "0 n43w094/", + "0 n43w095/", + "0 n43w096/", + "0 n43w097/", + "0 n43w098/", + "0 n43w099/", + "0 n43w100/", + "0 n43w101/", + "0 n43w102/", + "0 n43w103/", + "0 n43w104/", + "0 n43w105/", + "0 n43w106/", + "0 n43w107/", + "0 n43w108/", + "0 n43w109/", + "0 n43w110/", + "0 n43w111/", + "0 n43w112/", + "0 n43w113/", + "0 n43w114/", + "0 n43w115/", + "0 n43w116/", + "0 n43w117/", + "0 n43w118/", + "0 n43w119/", + "0 n43w120/", + "0 n43w121/", + "0 n43w122/", + "0 n43w123/", + "0 n43w124/", + "0 n43w125/", + "0 n44w069/", + "0 n44w070/", + "0 n44w071/", + "0 n44w072/", + "0 n44w073/", + "0 n44w074/", + "0 n44w075/", + "0 n44w076/", + "0 n44w077/", + "0 n44w078/", + "0 n44w079/", + "0 n44w080/", + "0 n44w081/", + "0 n44w083/", + "0 n44w084/", + "0 n44w085/", + "0 n44w086/", + "0 n44w087/", + "0 n44w088/", + "0 n44w089/", + "0 n44w090/", + "0 n44w091/", + "0 n44w092/", + "0 n44w093/", + "0 n44w094/", + "0 n44w095/", + "0 n44w096/", + "0 n44w097/", + "0 n44w098/", + "0 n44w099/", + "0 n44w100/", + "0 n44w101/", + "0 n44w102/", + "0 n44w103/", + "0 n44w104/", + "0 n44w105/", + "0 n44w106/", + "0 n44w107/", + "0 n44w108/", + "0 n44w109/", + "0 n44w110/", + "0 n44w111/", + "0 n44w112/", + "0 n44w113/", + "0 n44w114/", + "0 n44w115/", + "0 n44w116/", + "0 n44w117/", + "0 n44w118/", + "0 n44w119/", + "0 n44w120/", + "0 n44w121/", + "0 n44w122/", + "0 n44w123/", + "0 n44w124/", + "0 n44w125/", + "0 n45w067/", + "0 n45w068/", + "0 n45w069/", + "0 n45w070/", + "0 n45w071/", + "0 n45w072/", + "0 n45w073/", + "0 n45w074/", + "0 n45w075/", + "0 n45w076/", + "0 n45w077/", + "0 n45w083/", + "0 n45w084/", + "0 n45w085/", + "0 n45w086/", + "0 n45w087/", + "0 n45w088/", + "0 n45w089/", + "0 n45w090/", + "0 n45w091/", + "0 n45w092/", + "0 n45w093/", + "0 n45w094/", + "0 n45w095/", + "0 n45w096/", + "0 n45w097/", + "0 n45w098/", + "0 n45w099/", + "0 n45w100/", + "0 n45w101/", + "0 n45w102/", + "0 n45w103/", + "0 n45w104/", + "0 n45w105/", + "0 n45w106/", + "0 n45w107/", + "0 n45w108/", + "0 n45w109/", + "0 n45w110/", + "0 n45w111/", + "0 n45w112/", + "0 n45w113/", + "0 n45w114/", + "0 n45w115/", + "0 n45w116/", + "0 n45w117/", + "0 n45w118/", + "0 n45w119/", + "0 n45w120/", + "0 n45w121/", + "0 n45w122/", + "0 n45w123/", + "0 n45w124/", + "0 n45w125/", + "0 n46w068/", + "0 n46w069/", + "0 n46w070/", + "0 n46w071/", + "0 n46w072/", + "0 n46w073/", + "0 n46w074/", + "0 n46w075/", + "0 n46w084/", + "0 n46w085/", + "0 n46w086/", + "0 n46w087/", + "0 n46w088/", + "0 n46w089/", + "0 n46w090/", + "0 n46w091/", + "0 n46w092/", + "0 n46w093/", + "0 n46w094/", + "0 n46w095/", + "0 n46w096/", + "0 n46w097/", + "0 n46w098/", + "0 n46w099/", + "0 n46w100/", + "0 n46w101/", + "0 n46w102/", + "0 n46w103/", + "0 n46w104/", + "0 n46w105/", + "0 n46w106/", + "0 n46w107/", + "0 n46w108/", + "0 n46w109/", + "0 n46w110/", + "0 n46w111/", + "0 n46w112/", + "0 n46w113/", + "0 n46w114/", + "0 n46w115/", + "0 n46w116/", + "0 n46w117/", + "0 n46w118/", + "0 n46w119/", + "0 n46w120/", + "0 n46w121/", + "0 n46w122/", + "0 n46w123/", + "0 n46w124/", + "0 n46w125/", + "0 n47w068/", + "0 n47w069/", + "0 n47w070/", + "0 n47w071/", + "0 n47w084/", + "0 n47w085/", + "0 n47w086/", + "0 n47w087/", + "0 n47w088/", + "0 n47w089/", + "0 n47w090/", + "0 n47w091/", + "0 n47w092/", + "0 n47w093/", + "0 n47w094/", + "0 n47w095/", + "0 n47w096/", + "0 n47w097/", + "0 n47w098/", + "0 n47w099/", + "0 n47w100/", + "0 n47w101/", + "0 n47w102/", + "0 n47w103/", + "0 n47w104/", + "0 n47w105/", + "0 n47w106/", + "0 n47w107/", + "0 n47w108/", + "0 n47w109/", + "0 n47w110/", + "0 n47w111/", + "0 n47w112/", + "0 n47w113/", + "0 n47w114/", + "0 n47w115/", + "0 n47w116/", + "0 n47w117/", + "0 n47w118/", + "0 n47w119/", + "0 n47w120/", + "0 n47w121/", + "0 n47w122/", + "0 n47w123/", + "0 n47w124/", + "0 n47w125/", + "0 n48w068/", + "0 n48w069/", + "0 n48w070/", + "0 n48w087/", + "0 n48w088/", + "0 n48w089/", + "0 n48w090/", + "0 n48w091/", + "0 n48w092/", + "0 n48w093/", + "0 n48w094/", + "0 n48w095/", + "0 n48w096/", + "0 n48w097/", + "0 n48w098/", + "0 n48w099/", + "0 n48w100/", + "0 n48w101/", + "0 n48w102/", + "0 n48w103/", + "0 n48w104/", + "0 n48w105/", + "0 n48w106/", + "0 n48w107/", + "0 n48w108/", + "0 n48w109/", + "0 n48w110/", + "0 n48w111/", + "0 n48w112/", + "0 n48w113/", + "0 n48w114/", + "0 n48w115/", + "0 n48w116/", + "0 n48w117/", + "0 n48w118/", + "0 n48w119/", + "0 n48w120/", + "0 n48w121/", + "0 n48w122/", + "0 n48w123/", + "0 n48w124/", + "0 n48w125/", + "0 n49w089/", + "0 n49w090/", + "0 n49w091/", + "0 n49w092/", + "0 n49w093/", + "0 n49w094/", + "0 n49w095/", + "0 n49w096/", + "0 n49w097/", + "0 n49w098/", + "0 n49w099/", + "0 n49w100/", + "0 n49w101/", + "0 n49w102/", + "0 n49w103/", + "0 n49w104/", + "0 n49w105/", + "0 n49w106/", + "0 n49w107/", + "0 n49w108/", + "0 n49w109/", + "0 n49w110/", + "0 n49w111/", + "0 n49w112/", + "0 n49w113/", + "0 n49w114/", + "0 n49w115/", + "0 n49w116/", + "0 n49w117/", + "0 n49w118/", + "0 n49w119/", + "0 n49w120/", + "0 n49w121/", + "0 n49w122/", + "0 n49w123/", + "0 n49w124/", + "0 n49w125/", + "0 n50w095/", + "0 n50w096/", + "0 n50w097/", + "0 n50w098/", + "0 n50w099/", + "0 n50w100/", + "0 n50w101/", + "0 n50w107/", + "0 n50w108/", + "0 n50w122/", + "0 n50w123/", + "0 n50w124/", + "0 n52e177/", + "0 n52e178/", + "0 n52e179/", + "0 n52w174/", + "0 n52w176/", + "0 n52w177/", + "0 n52w178/", + "0 n52w179/", + "0 n52w180/", + "0 n53e172/", + "0 n53e173/", + "0 n53e174/", + "0 n53e175/", + "0 n53e177/", + "0 n53e178/", + "0 n53e179/", + "0 n53w169/", + "0 n53w170/", + "0 n53w171/", + "0 n53w172/", + "0 n53w173/", + "0 n53w174/", + "0 n53w175/", + "0 n53w176/", + "0 n53w177/", + "0 n54e172/", + "0 n54e173/", + "0 n54w167/", + "0 n54w168/", + "0 n54w169/", + "0 n54w170/", + "0 n55w131/", + "0 n55w132/", + "0 n55w133/", + "0 n55w134/", + "0 n55w160/", + "0 n55w161/", + "0 n55w162/", + "0 n55w163/", + "0 n55w164/", + "0 n55w165/", + "0 n55w166/", + "0 n55w167/", + "0 n56w130/", + "0 n56w131/", + "0 n56w132/", + "0 n56w133/", + "0 n56w134/", + "0 n56w135/", + "0 n56w156/", + "0 n56w157/", + "0 n56w159/", + "0 n56w160/", + "0 n56w161/", + "0 n56w162/", + "0 n56w163/", + "0 n56w164/", + "0 n57w131/", + "0 n57w132/", + "0 n57w133/", + "0 n57w134/", + "0 n57w135/", + "0 n57w136/", + "0 n57w153/", + "0 n57w154/", + "0 n57w155/", + "0 n57w156/", + "0 n57w157/", + "0 n57w158/", + "0 n57w159/", + "0 n57w160/", + "0 n57w161/", + "0 n57w162/", + "0 n57w170/", + "0 n57w171/", + "0 n58w133/", + "0 n58w134/", + "0 n58w135/", + "0 n58w136/", + "0 n58w137/", + "0 n58w153/", + "0 n58w154/", + "0 n58w155/", + "0 n58w156/", + "0 n58w157/", + "0 n58w158/", + "0 n58w159/", + "0 n58w170/", + "0 n58w171/", + "0 n59w134/", + "0 n59w135/", + "0 n59w136/", + "0 n59w137/", + "0 n59w138/", + "0 n59w139/", + "0 n59w152/", + "0 n59w153/", + "0 n59w154/", + "0 n59w155/", + "0 n59w156/", + "0 n59w157/", + "0 n59w158/", + "0 n59w159/", + "0 n59w160/", + "0 n59w161/", + "0 n59w162/", + "0 n59w163/", + "0 n60w135/", + "0 n60w136/", + "0 n60w137/", + "0 n60w138/", + "0 n60w139/", + "0 n60w140/", + "0 n60w141/", + "0 n60w142/", + "0 n60w143/", + "0 n60w144/", + "0 n60w145/", + "0 n60w146/", + "0 n60w147/", + "0 n60w148/", + "0 n60w149/", + "0 n60w150/", + "0 n60w151/", + "0 n60w152/", + "0 n60w153/", + "0 n60w154/", + "0 n60w155/", + "0 n60w156/", + "0 n60w157/", + "0 n60w158/", + "0 n60w159/", + "0 n60w160/", + "0 n60w161/", + "0 n60w162/", + "0 n60w163/", + "0 n60w164/", + "0 n60w165/", + "0 n60w166/", + "0 n60w167/", + "0 n60w168/", + "0 n61w140/", + "0 n61w141/", + "0 n61w142/", + "0 n61w143/", + "0 n61w144/", + "0 n61w145/", + "0 n61w146/", + "0 n61w147/", + "0 n61w148/", + "0 n61w149/", + "0 n61w150/", + "0 n61w151/", + "0 n61w152/", + "0 n61w153/", + "0 n61w154/", + "0 n61w155/", + "0 n61w156/", + "0 n61w157/", + "0 n61w158/", + "0 n61w159/", + "0 n61w160/", + "0 n61w161/", + "0 n61w162/", + "0 n61w163/", + "0 n61w164/", + "0 n61w165/", + "0 n61w166/", + "0 n61w167/", + "0 n61w168/", + "0 n61w173/", + "0 n61w174/", + "0 n62w142/", + "0 n62w143/", + "0 n62w144/", + "0 n62w145/", + "0 n62w146/", + "0 n62w147/", + "0 n62w148/", + "0 n62w149/", + "0 n62w150/", + "0 n62w151/", + "0 n62w152/", + "0 n62w153/", + "0 n62w154/", + "0 n62w155/", + "0 n62w156/", + "0 n62w157/", + "0 n62w158/", + "0 n62w159/", + "0 n62w160/", + "0 n62w161/", + "0 n62w162/", + "0 n62w163/", + "0 n62w164/", + "0 n62w165/", + "0 n62w166/", + "0 n62w167/", + "0 n63w142/", + "0 n63w143/", + "0 n63w144/", + "0 n63w145/", + "0 n63w146/", + "0 n63w147/", + "0 n63w148/", + "0 n63w149/", + "0 n63w150/", + "0 n63w151/", + "0 n63w152/", + "0 n63w153/", + "0 n63w154/", + "0 n63w155/", + "0 n63w156/", + "0 n63w157/", + "0 n63w158/", + "0 n63w159/", + "0 n63w160/", + "0 n63w161/", + "0 n63w162/", + "0 n63w163/", + "0 n63w164/", + "0 n63w165/", + "0 n63w166/", + "0 n63w167/", + "0 n63w170/", + "0 n64w141/", + "0 n64w142/", + "0 n64w143/", + "0 n64w144/", + "0 n64w145/", + "0 n64w146/", + "0 n64w147/", + "0 n64w148/", + "0 n64w149/", + "0 n64w150/", + "0 n64w151/", + "0 n64w152/", + "0 n64w153/", + "0 n64w154/", + "0 n64w155/", + "0 n64w156/", + "0 n64w157/", + "0 n64w158/", + "0 n64w159/", + "0 n64w160/", + "0 n64w161/", + "0 n64w162/", + "0 n64w163/", + "0 n64w164/", + "0 n64w165/", + "0 n64w169/", + "0 n64w170/", + "0 n64w171/", + "0 n64w172/", + "0 n65w141/", + "0 n65w142/", + "0 n65w143/", + "0 n65w144/", + "0 n65w145/", + "0 n65w146/", + "0 n65w147/", + "0 n65w148/", + "0 n65w149/", + "0 n65w150/", + "0 n65w151/", + "0 n65w152/", + "0 n65w153/", + "0 n65w154/", + "0 n65w155/", + "0 n65w156/", + "0 n65w157/", + "0 n65w158/", + "0 n65w159/", + "0 n65w160/", + "0 n65w161/", + "0 n65w162/", + "0 n65w163/", + "0 n65w164/", + "0 n65w165/", + "0 n65w166/", + "0 n65w167/", + "0 n66w141/", + "0 n66w142/", + "0 n66w143/", + "0 n66w144/", + "0 n66w145/", + "0 n66w146/", + "0 n66w147/", + "0 n66w148/", + "0 n66w149/", + "0 n66w150/", + "0 n66w151/", + "0 n66w152/", + "0 n66w153/", + "0 n66w154/", + "0 n66w155/", + "0 n66w156/", + "0 n66w157/", + "0 n66w158/", + "0 n66w159/", + "0 n66w160/", + "0 n66w161/", + "0 n66w162/", + "0 n66w163/", + "0 n66w164/", + "0 n66w165/", + "0 n66w166/", + "0 n66w167/", + "0 n66w168/", + "0 n66w169/", + "0 n67w141/", + "0 n67w142/", + "0 n67w143/", + "0 n67w144/", + "0 n67w145/", + "0 n67w146/", + "0 n67w147/", + "0 n67w148/", + "0 n67w149/", + "0 n67w150/", + "0 n67w151/", + "0 n67w152/", + "0 n67w153/", + "0 n67w154/", + "0 n67w155/", + "0 n67w156/", + "0 n67w157/", + "0 n67w158/", + "0 n67w159/", + "0 n67w160/", + "0 n67w161/", + "0 n67w162/", + "0 n67w163/", + "0 n67w164/", + "0 n67w165/", + "0 n67w166/", + "0 n67w167/", + "0 n67w168/", + "0 n68w141/", + "0 n68w142/", + "0 n68w143/", + "0 n68w144/", + "0 n68w145/", + "0 n68w146/", + "0 n68w147/", + "0 n68w148/", + "0 n68w149/", + "0 n68w150/", + "0 n68w151/", + "0 n68w152/", + "0 n68w153/", + "0 n68w154/", + "0 n68w155/", + "0 n68w156/", + "0 n68w157/", + "0 n68w158/", + "0 n68w159/", + "0 n68w160/", + "0 n68w161/", + "0 n68w162/", + "0 n68w163/", + "0 n68w164/", + "0 n68w165/", + "0 n68w166/", + "0 n69w141/", + "0 n69w142/", + "0 n69w143/", + "0 n69w144/", + "0 n69w145/", + "0 n69w146/", + "0 n69w147/", + "0 n69w148/", + "0 n69w149/", + "0 n69w150/", + "0 n69w151/", + "0 n69w152/", + "0 n69w153/", + "0 n69w154/", + "0 n69w155/", + "0 n69w156/", + "0 n69w157/", + "0 n69w158/", + "0 n69w159/", + "0 n69w160/", + "0 n69w161/", + "0 n69w162/", + "0 n69w163/", + "0 n69w164/", + "0 n69w165/", + "0 n69w166/", + "0 n69w167/", + "0 n70w141/", + "0 n70w142/", + "0 n70w143/", + "0 n70w144/", + "0 n70w145/", + "0 n70w146/", + "0 n70w147/", + "0 n70w148/", + "0 n70w149/", + "0 n70w150/", + "0 n70w151/", + "0 n70w152/", + "0 n70w153/", + "0 n70w154/", + "0 n70w155/", + "0 n70w156/", + "0 n70w157/", + "0 n70w158/", + "0 n70w159/", + "0 n70w160/", + "0 n70w161/", + "0 n70w162/", + "0 n70w163/", + "0 n70w164/", + "0 n70w165/", + "0 n71w143/", + "0 n71w144/", + "0 n71w145/", + "0 n71w146/", + "0 n71w147/", + "0 n71w148/", + "0 n71w149/", + "0 n71w150/", + "0 n71w151/", + "0 n71w152/", + "0 n71w153/", + "0 n71w154/", + "0 n71w155/", + "0 n71w156/", + "0 n71w157/", + "0 n71w158/", + "0 n71w159/", + "0 n71w160/", + "0 n71w161/", + "0 n71w162/", + "0 n71w163/", + "0 n71w164/", + "0 n72w155/", + "0 n72w156/", + "0 n72w157/", + "0 n72w158/", + "0 s14w170/", + "0 s14w171/" + ) + + + # sub out HTML copy pattern with vsi URL + t2 <- gsub("0 ", + "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/", + t) + + # add file paths following NED scheme + t3 <- paste0(t2, + "USGS_13_", + basename(t2), + ".tif") + + # Write table to data-raw + write.table(t3, + paste0(base_dir, "/ned_list_USGS_13.txt"), + row.names = FALSE, + col.names = FALSE, + quote = FALSE) + + +} +# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem" +# download_3dep_vrt(dem_base_dir) +# +# # Create meta data object of three NED resoruces +# ned <- data.frame(rbind( +# # c(id = "USGS_1", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1", +# # varname = "30m elevation", +# # long_name = "30m (1 arcsec) National Elevation Dataset", +# # units = "m"), +# # +# # c(id = "USGS_2", +# # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2", +# # varname = "60m elevation", +# # long_name = "60m (2 arcsec) National Elevation Dataset Alaska", +# # units = "m"), +# +# c(id = "USGS_13", +# URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13", +# varname = "10m elevation", +# long_name = "10m (1/3th arcsec) National Elevation Dataset", +# units = "m") +# )) +# +# +# +# +# # Loop over the three resolutions +# for (i in 1:length(ned)) { +# i = 1 +# +# # Define output text file path +# txt_file <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt") +# +# # Define output VRT path +# vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt")) +# +# # If VRT does NOT exist, build VRT +# if (!file.exists(vrt_file)) { +# +# # read the corresponding index.gpkg +# files <- sf::read_sf(ned$domain_url[i]) +# DEM_UR +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# # Build full HTTPS paths to "./current/" +# files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location))) +# +# files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" +# +# # write list of files to text file +# write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE) +# +# # build VRT from text file input using GDAL system call ... +# system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file)) +# } +# +# logger::log_info("Finished ", ned$id[i], "...") +# } + +# +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(hy_id := !!sym(crosswalk_id)) +# # +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# crosswalk_id = crosswalk_id, +# pct = extension_pct, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# # update_transect_lines <- +# # update_transect_lines %>% +# # dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + +# utility function for getting transects extended and +# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement +# returns the extended version of the transects +# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) { +# +# # transect_lines = transects +# # fixed_cs_pts = fixed_pts +# # crosswalk_id = CROSSWALK_ID +# +# fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry") +# transect_lines <- nhdplusTools::rename_geometry(transect_lines, "geometry") +# +# # get the counts of each point type to add this data to the transect_lines dataset +# point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, +# crosswalk_id = crosswalk_id) +# # Check the number of cross sections that were extended +# message("Subsetting cross section points generated after extending transect_lines...") +# +# # extract cross section points that have an "is_extended" value of TRUE +# extended_pts <- +# fixed_cs_pts %>% +# dplyr::filter(is_extended) %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) +# +# # extended_pts %>% +# # get_unique_tmp_ids() %>% +# # length() +# +# # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset +# update_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id)) +# +# cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id) +# +# # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages +# if (nrow(update_transect_lines) > 0) { +# message("Updating ", nrow(update_transect_lines), " transect_lines") +# +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(hy_id := !!sym(crosswalk_id)) +# +# update_transect_lines <- +# update_transect_lines %>% +# # apply extend_by_percent function to each transect line: +# hydrofabric3D:::extend_by_percent( +# pct = EXTENSION_PCT, +# length_col = "cs_lengthm" +# ) +# +# update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry") +# +# update_transect_lines <- +# update_transect_lines %>% +# dplyr::rename(!!sym(crosswalk_id) := hy_id) +# +# # cs_pt_uids <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id) +# # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id) +# +# # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts() +# # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go") +# # and then replace with old transect_lines with the "update_transect_lines" +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# dplyr::bind_rows( +# dplyr::mutate(update_transect_lines, is_extended = TRUE) +# ) +# +# # transect_lines %>% +# # hydrofabric3D::add_tmp_id(x = "hy_id") %>% +# # # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points +# # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids +# # dplyr::bind_rows( # bring in the new updated extended transect_lines +# # dplyr::mutate( +# # update_transect_lines, +# # is_extended = TRUE +# # ) +# # ) +# } else { +# # If no transect_lines were extended +# out_transect_lines <- +# transect_lines %>% +# hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% +# dplyr::filter(tmp_id %in% cs_pt_uids) %>% +# # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>% +# dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) +# } +# +# # Finalize new transect_lines +# out_transect_lines <- +# out_transect_lines %>% +# dplyr::left_join( +# point_type_counts, +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::left_join( +# dplyr::ungroup( +# dplyr::slice( +# dplyr::group_by( +# dplyr::select(sf::st_drop_geometry(fixed_cs_pts), +# dplyr::any_of(crosswalk_id), +# cs_id, bottom, left_bank, right_bank, valid_banks, has_relief +# ), +# dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id"))) +# ), +# 1 +# ) +# ), +# by = c(crosswalk_id, "cs_id") +# ) %>% +# dplyr::select( +# dplyr::any_of(crosswalk_id), +# cs_source, cs_id, cs_measure, cs_lengthm, +# # sinuosity, +# is_extended, +# left_bank_count, right_bank_count, channel_count, bottom_count, +# bottom, left_bank, right_bank, valid_banks, has_relief, +# geometry +# ) %>% +# dplyr::mutate( +# is_extended = ifelse(is.na(is_extended), FALSE, is_extended) +# ) +# +# return(out_transect_lines) +# } + + + From 8c094982a9c7ae193fefa956111a778cc4617768 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Mon, 18 Nov 2024 14:42:57 -0800 Subject: [PATCH 63/64] building new cs_runner format in cs_runner2 to replace first version of cs_runner --- runners/cs_runner2/00_driver.R | 26 ++ runners/cs_runner2/01_transects.R | 0 .../cs_runner2/02_extend_transects_by_fema.R | 0 .../cs_runner2/03_extract_initial_cs_pts.R | 0 .../04_extend_transects_by_cs_attributes.R | 0 runners/cs_runner2/05_extract_output_cs_pts.R | 0 runners/cs_runner2/06_inject_ml_bathymetry.R | 0 runners/cs_runner2/base_variables.R | 36 ++- runners/cs_runner2/download_dem_from_vrt.R | 145 ++++++++-- .../{download_fema100.R => download_fema.R} | 0 runners/cs_runner2/download_ml_outputs.R | 0 runners/cs_runner2/finalize_outputs.R | 0 ...partition_fema_by_vpu.R => process_fema.R} | 264 ++++++++---------- runners/cs_runner2/utils.R | 58 +++- 14 files changed, 350 insertions(+), 179 deletions(-) create mode 100644 runners/cs_runner2/00_driver.R create mode 100644 runners/cs_runner2/01_transects.R create mode 100644 runners/cs_runner2/02_extend_transects_by_fema.R create mode 100644 runners/cs_runner2/03_extract_initial_cs_pts.R create mode 100644 runners/cs_runner2/04_extend_transects_by_cs_attributes.R create mode 100644 runners/cs_runner2/05_extract_output_cs_pts.R create mode 100644 runners/cs_runner2/06_inject_ml_bathymetry.R rename runners/cs_runner2/{download_fema100.R => download_fema.R} (100%) create mode 100644 runners/cs_runner2/download_ml_outputs.R create mode 100644 runners/cs_runner2/finalize_outputs.R rename runners/cs_runner2/{partition_fema_by_vpu.R => process_fema.R} (79%) diff --git a/runners/cs_runner2/00_driver.R b/runners/cs_runner2/00_driver.R new file mode 100644 index 0000000..cb98b6e --- /dev/null +++ b/runners/cs_runner2/00_driver.R @@ -0,0 +1,26 @@ +### Run this file to have all runner scripts run in order + +# downloads nextgen datasets +source("runners/cs_runner2/config_env.R") + +# downloads datasets +# - Nextgen data +# - Reference features (for waterbody filtering) +# - ML outputs +# - FEMA 100 year floodplain polygons (FGBs) +# - 3DEP DEM VRT +source("runners/cs_runner2/download_conus_nextgen.R") +source("runners/cs_runner2/download_conus_ref_features.R") +source("runners/cs_runner2/download_fema.r") +source("runners/cs_runner2/process_fema.R") +source("runners/cs_runner2/download_dem_from_vrt.R") +source("runners/cs_runner2/download_ml_outputs.R") + +# generate and upload transects datasets +source("runners/cs_runner2/01_transects.R") + +# generate and upload cross sections points datasets +source("runners/cs_runner2/02_cs_pts.R") + +# Apply machine learning topwidths and depths estimates to DEM cross section points +source("runners/cs_runner2/03_inject_ml.R") diff --git a/runners/cs_runner2/01_transects.R b/runners/cs_runner2/01_transects.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/02_extend_transects_by_fema.R b/runners/cs_runner2/02_extend_transects_by_fema.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/03_extract_initial_cs_pts.R b/runners/cs_runner2/03_extract_initial_cs_pts.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/04_extend_transects_by_cs_attributes.R b/runners/cs_runner2/04_extend_transects_by_cs_attributes.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/05_extract_output_cs_pts.R b/runners/cs_runner2/05_extract_output_cs_pts.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/06_inject_ml_bathymetry.R b/runners/cs_runner2/06_inject_ml_bathymetry.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R index 79792c2..c716e29 100644 --- a/runners/cs_runner2/base_variables.R +++ b/runners/cs_runner2/base_variables.R @@ -99,13 +99,45 @@ CONUS_FEMA_GPKG_PATH <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.g # ---- CONUS CS extension polygons ---- # ------------------------------------------------------------------------------------- +# Meta data object of National Elevation Datasets (NED) +NED_META <- data.frame(rbind( + # c(id = "USGS_1", + # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1", + # varname = "30m elevation", + # long_name = "30m (1 arcsec) National Elevation Dataset", + # units = "m", + # s3_uri = "s3://prd-tnm/StagedProducts/Elevation/1/TIFF/", + # vrt_name = "USGS_Seamless_DEM_1.vrt" + # ) + # + # # c(id = "USGS_2", + # # URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2", + # # varname = "60m elevation", + # # long_name = "60m (2 arcsec) National Elevation Dataset Alaska", + # # units = "m", + # # s3_uri = "s3://prd-tnm/StagedProducts/Elevation/2/TIFF/", + # # vrt_name = "USGS_Seamless_DEM_2.vrt" + # # ), + c(id = "USGS_13", + URL = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13", + varname = "10m elevation", + long_name = "10m (1/3th arcsec) National Elevation Dataset", + units = "m", + s3_uri = "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/", + vrt_name = "USGS_Seamless_DEM_13.vrt" + ) + ) +) + + DEM_DIR <- BASE_DIRS_LIST$dem_dir DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir -DEM_TIF_DIR <- BASE_DIRS_LIST$dem_tif_dir +# DEM_TIF_DIR <- BASE_DIRS_LIST$dem_tif_dir # DEM URL DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt" - +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" +# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif # ---------------------------------------------------------------------------- # ---- Cross section point extraction constant variables ---- # ---------------------------------------------------------------------------- diff --git a/runners/cs_runner2/download_dem_from_vrt.R b/runners/cs_runner2/download_dem_from_vrt.R index edd77bb..fc18f2c 100644 --- a/runners/cs_runner2/download_dem_from_vrt.R +++ b/runners/cs_runner2/download_dem_from_vrt.R @@ -1,5 +1,3 @@ -library(raster) -library(httr) library(terra) library(dplyr) @@ -8,25 +6,138 @@ library(dplyr) source("runners/cs_runner2/base_variables.R") source("runners/cs_runner2/utils.R") -base_dirs <- get_base_dir_paths(BASE_DIR) +# specify where VRT should go +DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir +# # DEM_TIF_DIR <- base_dirs$dem_tif_dir +# DEM_PATH <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" +# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif +# message("Syncing 3DEP elevation VRTs...") +# s3_cp_cmd <- paste0("aws s3 cp ", "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt", " ", "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt", " --profile ", AWS_PROFILE, " --no-sign-request") +# s3_cp_cmd +# system(s3_cp_cmd) +# "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" +# +# VRT_file <- "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt" +# vrt_tiles <- terra::vrt_tiles(VRT_file) +# +# dem <- terra::rast(vrt_tiles[999]) +# dem +# terra::writeRaster(dem, "/Users/anguswatters/Desktop/tile1_USGS_Seamless_DEM_13.tif", overwrite = T) +# length(vrt_tiles) +# s3://lynker-nypa-inputs/ryan_test/ + -DEM_VRT_DIR <- base_dirs$dem_vrt_dir -DEM_TIF_DIR <- base_dirs$dem_tif_dir +for (i in seq_along(nrow(NED_META))) { + + NED_META + i = 1 + ned_uri <- NED_META$s3_uri[i] + ned_current_uri <- paste0(ned_uri, "current/") + ned_current_uri + + output_dir <- file.path(DEM_VRT_DIR, NED_META$id[i]) + output_dir + current_output_dir <- file.path(DEM_VRT_DIR, NED_META$id[i], "current") + + # create specific VRT directory if it doesnt already exist + create_if_not_exists(output_dir) + create_if_not_exists(current_output_dir) + + message("[", i, "] - ", "Syncing\n '", ned_current_uri, "' > '", current_output_dir, "'") + + s3_sync_cmd <- paste0("aws s3 sync ", ned_current_uri, " ", current_output_dir, " --profile ", AWS_PROFILE, " --no-sign-request") + # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request") + + + tryCatch({ + + # system(s3_sync_cmd) + + }, error = function(e) { + + message("Error syncing ", ned_uri, " to local directory...") + message(e) + + }) + +} -# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" +# vrt_file <- file.path(DEM_VRT_DIR, "USGS_Seamless_DEM_1.vrt") +# +# # TODO: use AWS S3 sync command like below +# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request") +# # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors") +# +# system(s3_sync_cmd) -# Parse the VRT file -vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE) +# ------------------------------------ +# ---- Build VRTs ---- +# ------------------------------------ -vrt_tiles <- terra::vrt_tiles(vrt_file) +message("Building 3DEP elevation '.vrt' files...") -# tile_path <- vrt_tiles[500] -tile_paths <- gsub("/vsicurl/", "", vrt_tiles) +for (i in seq_along(nrow(NED_META))) { + + i = 1 + NED_META$id[i] + + output_dir <- file.path(DEM_VRT_DIR, NED_META$id[i]) + + # output_dir + + + # Define output text file path + txt_file <- file.path(output_dir, paste0("ned_list_", NED_META$id[i], ".txt")) + + # Define output VRT path + vrt_file <- file.path(output_dir, paste0("ned_", NED_META$id[i], ".vrt")) + # vrt_file <- paste0(paste0("data-raw/ned_", NED_META$id[i], ".vrt")) + + # If VRT does NOT exist, build VRT + if (!file.exists(vrt_file)) { + + # txt_file + + current_dir <- file.path(output_dir, "current") + + # all the current/ directories that have the TIFs + tile_dirs <- list.files(current_dir, full.names = TRUE) + # list.files(tile_dirs[1:2], full.names = T) + # index_gpkg <- sf::read_sf(list.files(tile_dirs[999], full.names = T, pattern = ".gpkg")) + + # get list of all the .tif files in the "current/" dir + tif_paths <- list.files(tile_dirs, pattern = ".tif", full.names = T) + + # write list of files to text file + write.table(tif_paths, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE) + + # build VRT from text file input using GDAL system call ... + system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file)) + + } + +} -# TODO: use AWS S3 sync command like below -s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_TIF_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors") -# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" - -# TODO: Old method curl request each file individually.... super slow -# error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR) \ No newline at end of file +# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt") +# length(list.files(file.path(DEM_TIF_DIR, "current"), full.names = T)) +# vrt_tiles <- terra::vrt_tiles(file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt")) +# vrt_tiles[1] +# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" +# +# # Parse the VRT file +# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE) +# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE) +# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt") +# vrt_tiles <- terra::vrt_tiles(vrt_file) +# +# # tile_path <- vrt_tiles[500] +# tile_paths <- gsub("/vsicurl/", "", vrt_tiles) +# +# # TODO: use AWS S3 sync command like below +# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors") +# # aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors +# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors" +# +# # TODO: Old method curl request each file individually.... super slow +# # error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR) \ No newline at end of file diff --git a/runners/cs_runner2/download_fema100.R b/runners/cs_runner2/download_fema.R similarity index 100% rename from runners/cs_runner2/download_fema100.R rename to runners/cs_runner2/download_fema.R diff --git a/runners/cs_runner2/download_ml_outputs.R b/runners/cs_runner2/download_ml_outputs.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/finalize_outputs.R b/runners/cs_runner2/finalize_outputs.R new file mode 100644 index 0000000..e69de29 diff --git a/runners/cs_runner2/partition_fema_by_vpu.R b/runners/cs_runner2/process_fema.R similarity index 79% rename from runners/cs_runner2/partition_fema_by_vpu.R rename to runners/cs_runner2/process_fema.R index 2292588..522d427 100644 --- a/runners/cs_runner2/partition_fema_by_vpu.R +++ b/runners/cs_runner2/process_fema.R @@ -169,10 +169,6 @@ for (file in FEMA_FILENAMES) { # ------------------------------------------------------------------------------------------------------------------- # ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries ---- # ------------------------------------------------------------------------------------------------------------------- -source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false" -source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false" - - # # paths to FEMA 100 year flood plain files # FEMA_gpkg_paths <- list.files(FEMA_GPKG_PATH, full.names = TRUE) @@ -267,8 +263,8 @@ CONUS_VPU_IDS <- for (file_path in FEMA_CLEAN_GPKG_PATHS) { - - # file_path = FEMA_CLEAN_GPKG_PATHS[25] + # i = 35 + # file_path = FEMA_CLEAN_GPKG_PATHS[i] fema_file <- basename(file_path) @@ -278,8 +274,8 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { fema <- sf::read_sf(file_path) for (vpu in CONUS_VPU_IDS) { - - # vpu = CONUS_VPU_IDS[12] + # j = 8 + # vpu = CONUS_VPU_IDS[j] # nextgen_basename <- basename(nextgen_path) # vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) @@ -320,8 +316,10 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) + # BASE_DIRS_LIST$fema_by_vpu_subsets_dirs + fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) - fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) + fema_vpu_path <- paste0(vpu_subfolder_path, "/subsets/", fema_vpu_filename) if (OVERWRITE_FEMA_FILES) { @@ -337,60 +335,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { } message() } - # for (nextgen_path in NEXTGEN_FILE_PATHS) { - # nextgen_basename <- basename(nextgen_path) - # vpu <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename))) - - # message("VPU: ", vpu) - # message("- nextgen gpkg:\n > '", nextgen_path, "'") - # message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'") - - # # read in nextgen flowlines - # flines <- sf::read_sf(nextgen_path, layer = "flowpaths") - - # # get the FEMA polygons that intersect with the nextgen flowlines - # fema_intersect <- polygons_with_line_intersects(fema, flines) - - # fema_in_nextgen <- nrow(fema_intersect) != 0 - - # message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen) - - # if(fema_in_nextgen) { - - # # create filepaths - # vpu_subfolder <- paste0("VPU_", vpu) - # # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states") - # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder) - - # # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)] - - # fema_intersect <- - # fema_intersect %>% - # dplyr::mutate( - # vpu = vpu - # ) %>% - # dplyr::select(vpu, fema_id, source, state, geom) - - # # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file) - - # fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file) - # fema_vpu_path <- paste0(vpu_subfolder_path, "/", fema_vpu_filename) - - - # if (OVERWRITE_FEMA_FILES) { - # message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'") - - # sf::write_sf( - # fema_intersect, - # fema_vpu_path - # ) - # } - - - # } - # message() - # } - message( "--------------------------------------------------------------\n", @@ -400,23 +344,50 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) { } +# for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) { +# vpu_subdir <- BASE_DIRS_LIST$fema_by_vpu_subdirs[i] +# +# message(i, " - ", vpu_subdir) +# +# files_to_move = list.files(vpu_subdir, pattern = ".gpkg", full.names = T) +# +# for (file in files_to_move) { +# new_path <- paste0(vpu_subdir, "/subsets/", basename(file)) +# message(" > ", file, "\n > ", new_path, "\n") +# fs::file_move( +# file, +# new_path +# ) +# } +# } + # ------------------------------------------------------------------------------------- # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- # ------------------------------------------------------------------------------------- +# BASE_DIRS_LIST$fema_by_vpu_subdirs +# BASE_DIRS_LIST$fema_by_vpu_subsets_dirs +# for (vpu_dir in FEMA_VPU_SUBFOLDERS) { +BASE_DIRS_LIST$fema_by_vpu_subdirs -for (vpu_dir in FEMA_VPU_SUBFOLDERS) { +for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) { # for (i in 1:4) { - # i = 1 - # vpu_dir = FEMA_VPU_SUBFOLDERS2[i] + # i = 8 + + vpu_dir <- BASE_DIRS_LIST$fema_by_vpu_subdirs[i] + # vpu_dir <- FEMA_VPU_SUBFOLDERS[i] + + vpu_subset_dir <- BASE_DIRS_LIST$fema_by_vpu_subsets_dirs[i] + message("Merging files in '", basename(vpu_dir), "' directory...") - # } + vpu_subdirs <- list.files(vpu_subset_dir, full.names = TRUE) + # vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) - vpu_subdirs <- list.files(vpu_dir, full.names = TRUE) + # message(paste0("\n > ", basename(vpu_subdirs), collapse = "")) # path to the merged directory where the final merged geopackge will end up - master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) + master_name <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir))) master_gpkg_name <- paste0(master_name, ".gpkg") - master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + master_filepath <- paste0(vpu_dir, "/merged/", master_gpkg_name) # if the file already exists, remove it so we dont OVER append data to the "master file" if (file.exists(master_filepath)) { @@ -424,11 +395,11 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { } # fema state geopackages partioned for the specific VPU - fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE) - master_output_filepath <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name)) - - # make sure to ignore the master file if it already exists - fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath] + fema_state_gpkgs <- list.files(vpu_subset_dir, full.names = TRUE) + # master_output_filepath <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name)) + # + # # make sure to ignore the master file if it already exists + # fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath] for(gpkg_file in fema_state_gpkgs) { # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n > '", @@ -443,29 +414,29 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { " ", gpkg_file ) - if (OVERWRITE_FEMA_FILES) { + # if (OVERWRITE_FEMA_FILES) { system(ogr2ogr_merge_command) - } + # } + } - has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0 - - if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) { - message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...") - # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'") - - remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) - - for (remove_cmd in remove_gpkg_cmds) { - message(" > '", remove_cmd, "'") - system(remove_cmd) - } - } + # has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0 + # + # if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) { + # message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...") + # remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs) + # + # for (remove_cmd in remove_gpkg_cmds) { + # message(" > '", remove_cmd, "'") + # system(remove_cmd) + # } + # } # message() message("Merge complete!") message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'") message() + } # ------------------------------------------------------------------------------------- # ----Apply simplify, dissolve, explode on the MERGED polygons ---- @@ -475,9 +446,13 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) { # for (i in list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]) { # file.remove(i) # } +# seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs) +# rm(STAGING_FILES_TO_DELETE, i, VPU, vpu_dir) for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { + # i = 8 + # FEMA_VPU_SUBFOLDERS STAGING_FILES_TO_DELETE <- c() vpu_dir <- FEMA_VPU_SUBFOLDERS[i] @@ -486,15 +461,23 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { message(i, " - Attempting to union FEMA polygons for '", VPU, "'...") # path to the merged directory where the final merged geopackage will end up - master_name <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir))) + master_name <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir))) master_gpkg_name <- paste0(master_name, ".gpkg") - master_filepath <- paste0(vpu_dir, "/", master_gpkg_name) + master_filepath <- paste0(vpu_dir, "/merged/", master_gpkg_name) master_geojson_name <- paste0(master_name, ".geojson") - master_geojson_filepath <- paste0(vpu_dir, "/", master_geojson_name) + master_geojson_filepath <- paste0(vpu_dir, "/merged/", master_geojson_name) + + # updated_gpkg_name <- gsub(".gpkg", "-output.gpkg", master_gpkg_name) + updated_gpkg_name <- master_gpkg_name + updated_filepath <- paste0(vpu_dir, "/output/", master_gpkg_name) + + updated_gpkg_exists <- file.exists(updated_filepath) - updated_gpkg_name <- gsub(".gpkg", "_output.gpkg", master_gpkg_name) - updated_filepath <- paste0(vpu_dir, "/", updated_gpkg_name) + # remove output file if it already exists + if (updated_gpkg_exists) { + file.remove(updated_filepath) + } message("VPU Merged FEMA filename: '", master_gpkg_name, "'") message("> Simplifying, dissolve, exploding VPU aggregated FEMA polygons... '", basename(master_filepath), "'") @@ -516,12 +499,12 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath) # file.remove(master_geojson_filepath) - if (OVERWRITE_FEMA_FILES || !geojson_exists) { - system(gpkg_to_geojson_cmd) - message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") - - STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath) - } + # if (OVERWRITE_FEMA_FILES || !geojson_exists) { + system(gpkg_to_geojson_cmd) + message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") + + STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath) + # } # master_gj <- sf::read_sf(master_geojson_filepath) # master_gpkg <- sf::read_sf(master_filepath) @@ -529,13 +512,15 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # Clean GeoJSON message("Simplify, dissolve, explode > '", master_geojson_name, "'") output_clean_filename <- gsub(".geojson", "_clean.geojson", master_geojson_name) - output_clean_geojson_path <- paste0(vpu_dir, "/", output_clean_filename) + output_clean_geojson_path <- paste0(vpu_dir, "/merged/", output_clean_filename) clean_geojson_exists <- file.exists(output_clean_geojson_path) message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists) - message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) + # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES) - # file.remove(output_clean_geojson_path) + if (clean_geojson_exists) { + file.remove(output_clean_geojson_path) + } mapshaper_command = paste0('node --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', master_geojson_filepath, @@ -554,33 +539,36 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'") STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path) - output_clean_gpkg_filename <- gsub(".geojson", ".gpkg", master_geojson_name) - output_clean_gpkg_path <- paste0(vpu_dir, "/", output_clean_gpkg_filename) + # output_clean_gpkg_filename <- gsub(".geojson", ".gpkg", master_geojson_name) + # output_clean_gpkg_path <- paste0(vpu_dir, "/merged/", output_clean_gpkg_filename) # fema_vpu <- sf::read_sf(master_filepath) # geojson_to_gpkg_cmd <- paste0("ogr2ogr -f GPKG ", updated_filepath, " ", output_clean_geojson_path) - geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070 ", updated_filepath, " ", output_clean_geojson_path) + geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070 ", + updated_filepath, " ", + output_clean_geojson_path) # geojson_to_gpkg_cmd <- paste0("ogr2ogr ", updated_filepath, " ", output_clean_geojson_path) - updated_gpkg_exists <- file.exists(updated_filepath) - # updated_gpkg_exists - # file.remove(updated_filepath) + # updated_gpkg_exists <- file.exists(updated_filepath) + # if (updated_gpkg_exists) { + # file.remove(updated_filepath) + # } - if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) { - system(geojson_to_gpkg_cmd) - message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'") - } + + # if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) { + system(geojson_to_gpkg_cmd) + message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'") + # } # sf::st_layers(updated_filepath) # mapview::npts(fema) - fema <- sf::read_sf(updated_filepath) %>% # sf::read_sf(output_clean_geojson_path) %>% # rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% dplyr::mutate( - vpu = gsub("VPU_", "", VPU), + vpu = gsub("vpu-", "", VPU), fema_id = as.character(1:dplyr::n()) ) %>% dplyr::select( @@ -595,6 +583,7 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # dplyr::mutate(pts = mapview::npts(geom)) %>% # dplyr::arrange(-pts) + # remove before writting updated version file.remove(updated_filepath) sf::write_sf( @@ -618,47 +607,14 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) { # ---- Store all FEMA layers in a single conus_fema.gpkg # ------------------------------------------------------------------------------------- -fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))] +FEMA_VPU_SUBFOLDERS -combine_gpkg_files(fema_vpu_layers, '/Volumes/T7SSD/lynker-spatial/cs-extension-polygons/conus_fema.gpkg') - +all_fema_vpu_layers <- list.files(BASE_DIRS_LIST$fema_by_vpu_output_dirs, full.names = TRUE) -combine_gpkg_files <- function(gpkg_paths, output_gpkg) { - - layer_counter <- list() - - for (gpkg_path in gpkg_paths) { - - base_name <- tools::file_path_sans_ext(basename(gpkg_path)) - # base_name <- gsub("_output.gpkg", "", basename(gpkg_path)) - - if (base_name %in% names(layer_counter)) { - layer_counter[[base_name]] <- layer_counter[[base_name]] + 1 - layer_name <- paste0(base_name, "_", layer_counter[[base_name]]) - } else { - - layer_counter[[base_name]] <- 1 - layer_name <- base_name - } - - tryCatch({ - sf_layer <- st_read(gpkg_path, quiet = TRUE) - - sf::st_write(sf_layer, - dsn = output_gpkg, - layer = layer_name, - append = TRUE, - quiet = TRUE) - - message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'") - - }, error = function(e) { - warning("Error processing: ", basename(gpkg_path)) - warning(e) - }) - } -} +# fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))] +combine_gpkg_files(all_fema_vpu_layers, CONUS_FEMA_GPKG_PATH) + # # ------------------------------------------------------------------------------------- # # ---- Union each VPU geopackage (either on state or just touching predicate) ---- diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R index 6feff0b..cb624ae 100644 --- a/runners/cs_runner2/utils.R +++ b/runners/cs_runner2/utils.R @@ -9,7 +9,7 @@ # ├── tif/ # ├── cs-extension-polygons/ create_local_hydrofabric_base_dirs <- function(base_dir) { - + # base_dir <- BASE_DIR # build paths hydrofabric_dir <- paste0(base_dir, "/hydrofabric") @@ -17,7 +17,7 @@ create_local_hydrofabric_base_dirs <- function(base_dir) { # DEM dirs dem_dir <- file.path(base_dir, "dem") dem_vrt_dir <- file.path(dem_dir, "vrt") - dem_tif_dir <- file.path(dem_dir, "tif") + # dem_tif_dir <- file.path(dem_dir, "tif") # polygons for transect extensions cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") @@ -42,7 +42,7 @@ create_local_hydrofabric_base_dirs <- function(base_dir) { # DEM dirs create_if_not_exists(dem_dir) create_if_not_exists(dem_vrt_dir) - create_if_not_exists(dem_tif_dir) + # create_if_not_exists(dem_tif_dir) # extension polygons create_if_not_exists(cs_extension_polygons_dir) @@ -58,6 +58,9 @@ create_local_hydrofabric_base_dirs <- function(base_dir) { for (path in fema_by_vpu_subdirs) { create_if_not_exists(path) + create_if_not_exists(file.path(path, "subsets")) + create_if_not_exists(file.path(path, "merged")) + create_if_not_exists(file.path(path, "output")) } } @@ -178,7 +181,7 @@ get_base_dir_paths <- function(base_dir) { dem_dir <- file.path(base_dir, "dem") dem_vrt_dir <- file.path(base_dir, "dem", "vrt") - dem_tif_dir <- file.path(base_dir, "dem", "tif") + # dem_tif_dir <- file.path(base_dir, "dem", "tif") cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons") @@ -196,12 +199,16 @@ get_base_dir_paths <- function(base_dir) { fema_by_vpu_dir <- file.path(fema_dir, "fema-by-vpu") fema_by_vpu_subdirs <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS) + fema_by_vpu_subsets_dirs <- file.path(fema_by_vpu_subdirs, "subsets") + fema_by_vpu_merged_dirs <- file.path(fema_by_vpu_subdirs, "merged") + fema_by_vpu_output_dirs <- file.path(fema_by_vpu_subdirs, "output") + return( list( hydrofabric_dir = hydrofabric_dir, dem_dir = dem_dir, dem_vrt_dir = dem_vrt_dir, - dem_tif_dir = dem_tif_dir, + # dem_tif_dir = dem_tif_dir, cs_extension_polygons_dir = cs_extension_polygons_dir, fema_dir = fema_dir, fema_fgb_dir = fema_fgb_dir, @@ -209,7 +216,10 @@ get_base_dir_paths <- function(base_dir) { fema_clean_dir = fema_clean_dir, fema_gpkg_dir = fema_gpkg_dir, fema_by_vpu_dir = fema_by_vpu_dir, - fema_by_vpu_subdirs = fema_by_vpu_subdirs + fema_by_vpu_subdirs = fema_by_vpu_subdirs, + fema_by_vpu_subsets_dirs = fema_by_vpu_subsets_dirs, + fema_by_vpu_merged_dirs = fema_by_vpu_merged_dirs, + fema_by_vpu_output_dirs = fema_by_vpu_output_dirs ) ) } @@ -259,6 +269,42 @@ get_version_base_dir_paths <- function(base_dir, version) { ) } +combine_gpkg_files <- function(gpkg_paths, output_gpkg) { + + layer_counter <- list() + + for (gpkg_path in gpkg_paths) { + + base_name <- tools::file_path_sans_ext(basename(gpkg_path)) + # base_name <- gsub("_output.gpkg", "", basename(gpkg_path)) + + if (base_name %in% names(layer_counter)) { + layer_counter[[base_name]] <- layer_counter[[base_name]] + 1 + layer_name <- paste0(base_name, "_", layer_counter[[base_name]]) + } else { + + layer_counter[[base_name]] <- 1 + layer_name <- base_name + } + + tryCatch({ + sf_layer <- st_read(gpkg_path, quiet = TRUE) + + sf::st_write(sf_layer, + dsn = output_gpkg, + layer = layer_name, + append = TRUE, + quiet = TRUE) + + message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'") + + }, error = function(e) { + warning("Error processing: ", basename(gpkg_path)) + warning(e) + }) + } +} + list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else "" From 83bb22c12b89ce60d17827ab26298094635db2f4 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb <anguswatters@gmail.com> Date: Tue, 19 Nov 2024 16:27:33 -0800 Subject: [PATCH 64/64] finished 01_transects script and adding extensions script and more util function helpers for managing filepaths/names --- runners/cs_runner/utils.R | 38 +++- runners/cs_runner2/00_driver.R | 5 +- runners/cs_runner2/01_transects.R | 190 ++++++++++++++++++ .../cs_runner2/02_extend_transects_by_fema.R | 118 +++++++++++ runners/cs_runner2/base_variables.R | 7 + runners/cs_runner2/download_ml_outputs.R | 28 +++ runners/cs_runner2/process_fema.R | 17 +- runners/cs_runner2/utils.R | 94 ++++++++- 8 files changed, 474 insertions(+), 23 deletions(-) diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R index ef43e18..47423c1 100644 --- a/runners/cs_runner/utils.R +++ b/runners/cs_runner/utils.R @@ -121,7 +121,11 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { network_dir <- paste0(version_base_dir, "/network") # transects - transects_dir <- paste0(version_base_dir, "/transects") + transects_dir <- paste0(version_base_dir, "/transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") # cross sections dirs cross_sections_dir <- paste0(version_base_dir, "/cross-sections") @@ -129,7 +133,9 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { cross_sections_ml_dir <- paste0(cross_sections_dir, "/dem-ml") cross_sections_coastal_bathy_dir <- paste0(cross_sections_dir, "/dem-coastal-bathy") cross_sections_dem_pts_dir <- paste0(cross_sections_dir, "/dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") + if(with_output) { output_dir <- paste0(version_base_dir, "/outputs") } @@ -148,13 +154,19 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { # transects create_if_not_exists(transects_dir) - + create_if_not_exists(transects_base_dir) + create_if_not_exists(transects_fema_extended_dir) + create_if_not_exists(transects_cs_extended_dir) + create_if_not_exists(transects_output_dir) + # CS pts create_if_not_exists(cross_sections_dir) create_if_not_exists(cross_sections_dem_dir) create_if_not_exists(cross_sections_ml_dir) create_if_not_exists(cross_sections_coastal_bathy_dir) create_if_not_exists(cross_sections_dem_pts_dir) + create_if_not_exists(cross_sections_output_dir) + if(with_output) { create_if_not_exists(output_dir) @@ -233,28 +245,44 @@ get_version_base_dir_paths <- function(base_dir, version) { network_dir <- file.path(version_base_dir, "network") # transects - transects_dir <- file.path(version_base_dir, "transects") - + transects_dir <- file.path(version_base_dir, "transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") + # cross sections dirs cross_sections_dir <- file.path(version_base_dir, "cross-sections") cross_sections_dem_dir <- file.path(cross_sections_dir, "dem") cross_sections_ml_dir <- file.path(cross_sections_dir, "dem-ml") cross_sections_coastal_bathy_dir <- file.path(cross_sections_dir, "dem-coastal-bathy") cross_sections_dem_pts_dir <- file.path(cross_sections_dir, "dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") return( list( hydrofabric_dir = hydrofabric_dir, version_base_dir = version_base_dir, + ref_features_dir = ref_features_dir, + network_dir = network_dir, + ml_dir = ml_dir, + transects_dir = transects_dir, + transects_base_dir = transects_base_dir, + transects_fema_extended_dir = transects_fema_extended_dir, + transects_cs_extended_dir = transects_cs_extended_dir, + transects_output_dir = transects_output_dir, + cross_sections_dir = cross_sections_dir, cross_sections_dem_dir = cross_sections_dem_dir, cross_sections_dem_pts_dir = cross_sections_dem_pts_dir, cross_sections_ml_dir = cross_sections_ml_dir, - cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir + cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir, + cross_sections_output_dir = cross_sections_output_dir + ) ) } diff --git a/runners/cs_runner2/00_driver.R b/runners/cs_runner2/00_driver.R index cb98b6e..9c6eaed 100644 --- a/runners/cs_runner2/00_driver.R +++ b/runners/cs_runner2/00_driver.R @@ -6,15 +6,16 @@ source("runners/cs_runner2/config_env.R") # downloads datasets # - Nextgen data # - Reference features (for waterbody filtering) -# - ML outputs # - FEMA 100 year floodplain polygons (FGBs) +# - Process state level FEMA polygons to CONUS +# - ML outputs # - 3DEP DEM VRT source("runners/cs_runner2/download_conus_nextgen.R") source("runners/cs_runner2/download_conus_ref_features.R") source("runners/cs_runner2/download_fema.r") source("runners/cs_runner2/process_fema.R") -source("runners/cs_runner2/download_dem_from_vrt.R") source("runners/cs_runner2/download_ml_outputs.R") +source("runners/cs_runner2/download_dem_from_vrt.R") # generate and upload transects datasets source("runners/cs_runner2/01_transects.R") diff --git a/runners/cs_runner2/01_transects.R b/runners/cs_runner2/01_transects.R index e69de29..e11f46c 100644 --- a/runners/cs_runner2/01_transects.R +++ b/runners/cs_runner2/01_transects.R @@ -0,0 +1,190 @@ + +library(sf) +library(dplyr) + +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner2/utils.R") + +VPU_IDS <- get_vpu_ids() + +for (i in seq_along(VPU_IDS)) { +# for (i in 21:length(VPU_IDS)) { + + # -------------------------------------------------------------- + # ---- Setup variables / paths ---- + # -------------------------------------------------------------- + # VERSION_DIRS_LIST + vpu <- VPU_IDS[i] + CROSSWALK_ID <- "id" + + transect_filenames <- get_transect_filenames(vpu, sep = "-") + base_output_path <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path) + base_transect_file_exists <- file.exists(base_output_path) + + do_process_transects <- !base_transect_file_exists || REGENERATE_TRANSECTS + # do_not_reprocess_transects <- base_transect_file_exists && !REGENERATE_TRANSECTS + + if (do_process_transects) { + message("Creating VPU ", vpu, " transects:", + "\n - flowpaths: '", + basename(CONUS_NEXTGEN_GPKG_PATH), "'" + ) + } else { + message( + "VPU ", vpu, " transects file already exists at:\n - '", base_output_path, "'", + "\n\n >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", + basename(base_output_path), "' file" + ) + next + + } + + # if we are going to create new transects and the transect file already exists, delete it as to not cause confusion or accidently append to an old dataset + if (do_process_transects && base_transect_file_exists) { + message("Deleting old transect file:\n > '", base_output_path, "'") + file.remove(base_output_path) + } + + # -------------------------------------------------------------- + # ---- Load flowpaths for VPU from conus_nextgen.gpkg ---- + # -------------------------------------------------------------- + + # read in nextgen flowlines + flines <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) + + has_no_flowlines <- nrow(flines) == 0 + + if (has_no_flowlines) { + message("Skipping VPU ", vpu, " as no flowlines were found in '", basename(CONUS_NEXTGEN_GPKG_PATH), "'..." ) + next + } + + + + # Add an estimate bankful width based on Total downsteam drainage area (sqkm, Power law equation) + flines <- + flines %>% + hydroloom::rename_geometry("geometry") %>% + hydrofabric3D::add_powerlaw_bankful_width( + total_drainage_area_sqkm_col = "tot_drainage_areasqkm", + min_bf_width = 50 + ) %>% + dplyr::group_by(order) %>% + dplyr::slice(1:10) %>% + dplyr::ungroup() %>% + dplyr::select( + id, + lengthkm, + tot_drainage_areasqkm, + bf_width, + mainstem, + geometry + ) + # dplyr::group_by(order) + + # -------------------------------------------------------------- + # ---- Generate transects ---- + # -------------------------------------------------------------- + # flines$geometry %>% sf::st_is_empty() %>% any() + + # sf::write_sf( + # flines, + # "/Users/anguswatters/Desktop/wrong_cs_ids_flines_error.gpkg" + # # "/Users/anguswatters/Desktop/empty_geom_flines_error.gpkg" + # ) + + # create transect lines + transects <- hydrofabric3D::cut_cross_sections( + net = flines, # flowlines network + crosswalk_id = CROSSWALK_ID, # Unique feature ID + cs_widths = flines$bf_width, # cross section width of each "id" linestring ("hy_id") + # cs_widths = 15, + num = 3, # number of cross sections per "id" linestring ("hy_id") + # smooth = FALSE, # smooth lines + # densify = NULL, + smooth = TRUE, # smooth lines + densify = 3, # densify linestring points + + rm_self_intersect = TRUE, # remove self intersecting transects + fix_braids = FALSE, # whether to fix braided flowlines or not + add = TRUE # whether to add back the original data + ) + # dplyr::mutate( + # cs_source = CS_SOURCE + # ) + # transects$cs_lengthm + # transects$geometry %>% sf::st_length() + + # transects %>% + # dplyr::mutate( + # new_cs_lengthm = as.numeric(sf::st_length(.)) + # ) %>% + # dplyr::filter(!dplyr::near(new_cs_lengthm, cs_lengthm, tol = 2)) + # .Machine$double.eps^1 + # crosswalk_id <- "id" + # # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered + # mismatches <- + # transects %>% + # sf::st_drop_geometry() %>% + # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + # dplyr::arrange(cs_measure, .by_group = TRUE) %>% + # dplyr::mutate( + # new_cs_id = 1:dplyr::n() + # ) %>% + # dplyr::ungroup() %>% + # dplyr::filter(cs_id != new_cs_id) + # mismatches + # transects %>% + # dplyr::filter(id %in% c("wb-1023360", "wb-1023363")) + # + # transects %>% + # dplyr::filter(id %in% c("wb-1023360", "wb-1023363")) %>% + # mapview::mapview() + # + # # FALSE if there are any transects with different cs_ids to the newly created cs_id + # # Otherwise TRUE + # has_valid_cs_ids <- !(nrow(mismatches) > 0) + # + # # t2 <- transects %>% hydrofabric3D:::renumber_cs_ids("id") + # # hydrofabric3D::validate_transects(t2, "id") + # transects + is_valid_transects <- hydrofabric3D::validate_transects(transects, "id") + is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(transects, flines, "id") + + # is_valid_transects <- hydrofabric3D::validate_transects(transects[c(1, 3, 55), ], "id") + # is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(transects, flines[c(-1), ], "id") + # is_valid_transects_against_flowlines + # hydrofabric3D:::rm_multiflowline_intersections(transects, flines[c(-1), ]) + + # select relevent columns + transects <- + transects %>% + dplyr::mutate( + cs_source = CS_SOURCE + ) %>% + # hydrofabric3D:::select_transects(CROSSWALK_ID) + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + # cs_lengthm = new_cs_lengthm, + cs_measure, + sinuosity, + cs_source, + geometry + ) + + if (is_valid_transects && is_valid_transects_against_flowlines) { + message("Saving transects to:\n - filepath: '", base_output_path, "'") + sf::write_sf( + transects, + base_output_path + ) + } + + message(" --- VPU ", vpu, " transects generation complete! --- ") + message() + +} diff --git a/runners/cs_runner2/02_extend_transects_by_fema.R b/runners/cs_runner2/02_extend_transects_by_fema.R index e69de29..106dc44 100644 --- a/runners/cs_runner2/02_extend_transects_by_fema.R +++ b/runners/cs_runner2/02_extend_transects_by_fema.R @@ -0,0 +1,118 @@ + +library(sf) +library(dplyr) + +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner2/utils.R") + +VPU_IDS <- get_vpu_ids() + +for (i in seq_along(VPU_IDS)) { + # for (i in 21:length(VPU_IDS)) { + + # -------------------------------------------------------------- + # ---- Setup variables / paths ---- + # -------------------------------------------------------------- + # VERSION_DIRS_LIST + # i = 8 + vpu <- VPU_IDS[i] + CROSSWALK_ID <- "id" + # cs_filesnames <- get_cross_section_filenames(vpu, sep = "-") + transect_filenames <- get_transect_filenames(vpu, sep = "-") + + # TODO: Add a catch for not allowing this all to run if the base_transects do NOT exist + base_transects_path <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path) + fema_transects_output_path <- paste0(VERSION_DIRS_LIST$transects_fema_extended_dir, "/", transect_filenames$transects_fema_extended_path) + + base_transect_file_exists <- file.exists(base_transects_path) + fema_transects_file_exists <- file.exists(fema_transects_output_path) + + do_process_transects <- !fema_transects_file_exists || REGENERATE_TRANSECTS + + if (do_process_transects) { + message("Creating VPU ", vpu, " transects:", + "\n - flowpaths: '", + basename(CONUS_NEXTGEN_GPKG_PATH), "'", + "\n - base transects: '", + basename(base_transects_path), "'" + ) + } else { + message( + "VPU ", vpu, " transects file already exists at:\n - '", fema_transects_output_path, "'", + "\n\n >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", + basename(fema_transects_output_path), "' file" + ) + next + + } + + if (do_process_transects && fema_transects_file_exists) { + message("Deleting old fema extended transect file:\n > '", fema_transects_output_path, "'") + file.remove(fema_transects_output_path) + } + + # read in nextgen flowlines + flines <- + CONUS_NEXTGEN_GPKG_PATH %>% + sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) %>% + hydroloom::rename_geometry("geometry") + + # transects + transects <- sf::read_sf(base_transects_path) %>% + hydroloom::rename_geometry("geometry") + + # FEMA polygons + fema <- + CONUS_FEMA_GPKG_PATH %>% + sf::read_sf(layer = get_fema_conus_layer_name(vpu)) %>% + rmapshaper::ms_simplify(keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) %>% + hydroloom::rename_geometry("geometry") + # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16) + + # # add mainstem to + # transects <- + # transects %>% + # dplyr::left_join( + # flines %>% + # sf::st_drop_geometry() %>% + # dplyr::select(dplyr::any_of(CROSSWALK_ID), mainstem), + # by = CROSSWALK_ID + # ) + + message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")") + + # TODO: make sure this 3000m extension distance is appropriate across VPUs + # TODO: also got to make sure that this will be feasible on memory on the larger VPUs... + extended_transects <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = fema, + flowlines = flines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + # grouping_id = "mainstem", + max_extension_distance = 3000, + reindex_cs_ids = TRUE + ) + + is_valid_transects <- hydrofabric3D::validate_transects(extended_transects, "id") + is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(extended_transects, flines, "id") + + + + + +} + + + + + + + + + + + + + + \ No newline at end of file diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R index c716e29..7588bb9 100644 --- a/runners/cs_runner2/base_variables.R +++ b/runners/cs_runner2/base_variables.R @@ -27,6 +27,13 @@ VERSION_DIRS_LIST <- get_version_base_dir_paths(BASE_DIR, VERSION) # string to fill in "CS_SOURCE" column in output datasets CS_SOURCE <- "hydrofabric3D" +# ------------------------------------------------------------------------------------- +# ---- Processing flags (cache data) ---- +# ------------------------------------------------------------------------------------- + +# cache transects by setting REGENERATE_TRANSECTS to FALSE +REGENERATE_TRANSECTS <- TRUE + # ------------------------------------------------------------------------------------- # ---- S3 BUCKET NAMES ---- # ------------------------------------------------------------------------------------- diff --git a/runners/cs_runner2/download_ml_outputs.R b/runners/cs_runner2/download_ml_outputs.R index e69de29..4d14634 100644 --- a/runners/cs_runner2/download_ml_outputs.R +++ b/runners/cs_runner2/download_ml_outputs.R @@ -0,0 +1,28 @@ +# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR" +# BASE_DIR is defined within runners/workflow/root_dir.R + +# load config variables +source("runners/cs_runner2/base_variables.R") +# source("runners/cs_runner/config_vars.R") + +# --------------------------------------------------------------------------- +# ---- Download conus_nextgen.gpkg +# --------------------------------------------------------------------------- + +copy_cmd <- paste0('aws s3 cp ', CONUS_ML_S3_URI, " ", CONUS_ML_PARQUET_PATH) +message("Copying S3 object:\n", CONUS_ML_S3_URI) + +if (!file.exists(CONUS_ML_PARQUET_PATH)) { + tryCatch({ + system(copy_cmd) + message("Download '", basename(CONUS_ML_PARQUET_PATH), "' complete!") + message("------------------") + }, error = function(e) { + message("Error downloading conus_nextgen.gpkg") + message(e) + stop() + }) + +} else { + message("'", basename(CONUS_ML_PARQUET_PATH), "' file already exists at\n > '", CONUS_ML_PARQUET_PATH, "'") +} diff --git a/runners/cs_runner2/process_fema.R b/runners/cs_runner2/process_fema.R index 522d427..50b2106 100644 --- a/runners/cs_runner2/process_fema.R +++ b/runners/cs_runner2/process_fema.R @@ -23,11 +23,9 @@ library(geos) library(fastmap) library(nngeo) -# devtools::install_github("anguswg-ucsb/hydrofabric3D") - -# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop -# TODO: Delete old files as needed - +# ONLY runs if the main CONUS FEMA gpkg hasnt been created yet +if (!file.exists(CONUS_FEMA_GPKG_PATH)) { + # ------------------------------------------------------------------------------------- # ---- OVERWRITE_FEMA_FILES constant logical ---- # ---- > if TRUE, processing steps will be run again @@ -616,6 +614,15 @@ all_fema_vpu_layers <- list.files(BASE_DIRS_LIST$fema_by_vpu_output_dirs, full.n combine_gpkg_files(all_fema_vpu_layers, CONUS_FEMA_GPKG_PATH) +# ------------------------------------------------------------------------------------- + +} else { + message("'", basename(CONUS_FEMA_GPKG_PATH), "'already exists at \n> ", CONUS_FEMA_GPKG_PATH) +} + +# ------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------- + # # ------------------------------------------------------------------------------------- # # ---- Union each VPU geopackage (either on state or just touching predicate) ---- # # ------------------------------------------------------------------------------------- diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R index cb624ae..5413d5c 100644 --- a/runners/cs_runner2/utils.R +++ b/runners/cs_runner2/utils.R @@ -98,14 +98,14 @@ get_vpu_ids <- function() { # base_dir/ # └── lynker-spatial/ # ├── hydrofabric/ - # ├── version_number/ - # ├── network/ - # ├── transects/ - # ├── cross-sections/ - # ├── dem/ - # ├── dem-ml/ - # ├── dem-coastal-bathy/ - # ├── dem-points/ +# ├── version_number/ +# ├── network/ +# ├── transects/ +# ├── cross-sections/ +# ├── dem/ +# ├── dem-ml/ +# ├── dem-coastal-bathy/ +# ├── dem-points/ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { # version = "v3.0" # base_dir <- BASE_DIR @@ -124,7 +124,11 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { network_dir <- paste0(version_base_dir, "/network") # transects - transects_dir <- paste0(version_base_dir, "/transects") + transects_dir <- paste0(version_base_dir, "/transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") # cross sections dirs cross_sections_dir <- paste0(version_base_dir, "/cross-sections") @@ -132,6 +136,8 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { cross_sections_ml_dir <- paste0(cross_sections_dir, "/dem-ml") cross_sections_coastal_bathy_dir <- paste0(cross_sections_dir, "/dem-coastal-bathy") cross_sections_dem_pts_dir <- paste0(cross_sections_dir, "/dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") + if(with_output) { output_dir <- paste0(version_base_dir, "/outputs") @@ -151,6 +157,10 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { # transects create_if_not_exists(transects_dir) + create_if_not_exists(transects_base_dir) + create_if_not_exists(transects_fema_extended_dir) + create_if_not_exists(transects_cs_extended_dir) + create_if_not_exists(transects_output_dir) # CS pts create_if_not_exists(cross_sections_dir) @@ -158,6 +168,8 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) { create_if_not_exists(cross_sections_ml_dir) create_if_not_exists(cross_sections_coastal_bathy_dir) create_if_not_exists(cross_sections_dem_pts_dir) + create_if_not_exists(cross_sections_output_dir) + if(with_output) { create_if_not_exists(output_dir) @@ -243,7 +255,11 @@ get_version_base_dir_paths <- function(base_dir, version) { network_dir <- file.path(version_base_dir, "network") # transects - transects_dir <- file.path(version_base_dir, "transects") + transects_dir <- file.path(version_base_dir, "transects") + transects_base_dir <- paste0(transects_dir, "/base") + transects_fema_extended_dir <- paste0(transects_dir, "/extended-by-fema") + transects_cs_extended_dir <- paste0(transects_dir, "/extended-by-cs-attributes") + transects_output_dir <- paste0(transects_dir, "/output") # cross sections dirs cross_sections_dir <- file.path(version_base_dir, "cross-sections") @@ -251,24 +267,37 @@ get_version_base_dir_paths <- function(base_dir, version) { cross_sections_ml_dir <- file.path(cross_sections_dir, "dem-ml") cross_sections_coastal_bathy_dir <- file.path(cross_sections_dir, "dem-coastal-bathy") cross_sections_dem_pts_dir <- file.path(cross_sections_dir, "dem-points") + cross_sections_output_dir <- paste0(cross_sections_dir, "/output") return( list( hydrofabric_dir = hydrofabric_dir, version_base_dir = version_base_dir, + ref_features_dir = ref_features_dir, + network_dir = network_dir, + ml_dir = ml_dir, + transects_dir = transects_dir, + transects_base_dir = transects_base_dir, + transects_fema_extended_dir = transects_fema_extended_dir, + transects_cs_extended_dir = transects_cs_extended_dir, + transects_output_dir = transects_output_dir, + cross_sections_dir = cross_sections_dir, cross_sections_dem_dir = cross_sections_dem_dir, cross_sections_dem_pts_dir = cross_sections_dem_pts_dir, cross_sections_ml_dir = cross_sections_ml_dir, - cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir + cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir, + cross_sections_output_dir = cross_sections_output_dir + ) ) } + combine_gpkg_files <- function(gpkg_paths, output_gpkg) { layer_counter <- list() @@ -305,6 +334,49 @@ combine_gpkg_files <- function(gpkg_paths, output_gpkg) { } } +get_transect_filenames <- function(vpu, sep = "-", ext = ".gpkg") { + # ext <- ".gpkg" + # sep = "_" + transects_base_path <- paste0(paste0(c(vpu, "transects", "base"), collapse = sep) , ext) + transects_fema_extended_path <- paste0(paste0(c(vpu, "transects", "extended", "by", "fema"), collapse = sep) , ext) + transects_cs_extended_path <- paste0(paste0(c(vpu, "transects", "extended", "by", "cs", "attributes"), collapse = sep) , ext) + transects_output_path <- paste0(paste0(c(vpu, "transects"), collapse = sep) , ext) + + return( + list( + transects_base_path = transects_base_path, + transects_fema_extended_path = transects_fema_extended_path, + transects_cs_extended_path = transects_cs_extended_path, + transects_output_path = transects_output_path + ) + ) +} + +get_cross_section_filenames <- function(vpu, sep = "-", ext = ".parquet") { + # ext <- ".parquet" + # sep = "_" + cs_pts_base_path <- paste0(paste0(c(vpu, "cross", "sections", "base"), collapse = sep) , ext) + cs_pts_fema_extended_path <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "fema"), collapse = sep) , ext) + cs_pts_cs_extended_path <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "cs", "attributes"), collapse = sep) , ext) + cs_pts_output_path <- paste0(paste0(c(vpu, "cross", "sections"), collapse = sep) , ext) + + return( + list( + cs_pts_base_path = cs_pts_base_path, + cs_pts_fema_extended_path = cs_pts_fema_extended_path, + cs_pts_cs_extended_path = cs_pts_cs_extended_path, + cs_pts_output_path = cs_pts_output_path + ) + ) +} + +# derive the FEMA VPU layer name from the conus_fema.gpkg +get_fema_conus_layer_name <- function(vpu) { + + return(paste0("fema-vpu-", vpu)) + +} + list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) { profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""