From 0092719594f53cd379c701f5865d9c0f52d18c22 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 6 Dec 2023 05:02:00 -0800
Subject: [PATCH 01/64] updated transects and cross section pts runners with
 new hydrofabric3D features, added waterbody reference feature download and
 waterbody intersection removal to 02_cs_pts.R runner

---
 runners/cs_runner/.Rapp.history      |   0
 runners/cs_runner/01_transects.R     |  83 ++++++-----
 runners/cs_runner/02_cs_pts.R        | 189 +++++++++++++++++++-----
 runners/cs_runner/config.R           | 209 ++++++++++++++++++++++++++-
 runners/cs_runner/config_vars.R      |  14 +-
 runners/cs_runner/download_nextgen.R |  32 ++++
 6 files changed, 442 insertions(+), 85 deletions(-)
 delete mode 100644 runners/cs_runner/.Rapp.history

diff --git a/runners/cs_runner/.Rapp.history b/runners/cs_runner/.Rapp.history
deleted file mode 100644
index e69de29..0000000
diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 0880d33..456be21 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -1,23 +1,21 @@
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-# source("runners/cs_runner/config.R")
+source("runners/cs_runner/config.R")
 
-# # load libraries
+# # # load libraries
+library(hydrofabric3D)
 # library(terrainSliceR)
-# library(dplyr)
-# library(sf)
-
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
+library(dplyr)
+library(sf)
 
 # transect bucket prefix
-transects_prefix <- paste0(s3_bucket, "v20/3D/transects/")
+transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
 nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
 model_attr_files <- list.files(model_attr_dir, full.names = FALSE)
 
 # string to fill in "cs_source" column in output datasets
-net_source <- "terrainSliceR"
+net_source <- "hydrofabric3D"
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
@@ -33,40 +31,44 @@ for(i in 1:nrow(path_df)) {
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
   
-  # model attributes file and full path
-  model_attr_file <- path_df$y[i]
-  model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  # # model attributes file and full path
+  # model_attr_file <- path_df$y[i]
+  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
 
-  message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'")
+  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
   
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
 
-  #  model attributes
-  model_attrs <- arrow::read_parquet(model_attr_path)
+  # #  model attributes
+  # model_attrs <- arrow::read_parquet(model_attr_path)
+
+  # # join flowlines with model atttributes
+  # flines <- dplyr::left_join(
+  #   flines,
+  #   dplyr::select(
+  #     model_attrs,
+  #     id, eTW
+  #   ),
+  #   by = "id"
+  # )
 
-  # join flowlines with model atttributes
-  flines <- dplyr::left_join(
-    flines,
-    dplyr::select(
-      model_attrs,
-      id, eTW
-    ),
-    by = "id"
-  )
-  
   # calculate bankfull width
   flines <-
     flines %>%
     dplyr::mutate(
-      bf_width = 11 * eTW
-    ) %>%
-    dplyr::mutate( # if there are any NAs, use exp(0.700    + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width
-      bf_width = dplyr::case_when(
-        is.na(bf_width) ~ exp(0.700    + 0.365* log(tot_drainage_areasqkm)),
-        TRUE            ~ bf_width
-      )
+      bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
     ) %>%
+    # dplyr::mutate(
+    #   bf_width = 11 * eTW
+    # ) %>%
+    # dplyr::mutate( # if there are any NAs, use exp(0.700    + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width
+    #   bf_width = dplyr::case_when(
+    #     is.na(bf_width) ~ exp(0.700    + 0.365* log(tot_drainage_areasqkm)),
+    #     TRUE            ~ bf_width
+    #   )
+    # ) %>%
     dplyr::select(
       hy_id = id,
       lengthkm,
@@ -74,16 +76,17 @@ for(i in 1:nrow(path_df)) {
       bf_width,
       geometry = geom
     )
-  
+
   # flines$bf_width <- ifelse(is.na(flines$bf_width),  exp(0.700    + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width)
 
   time1 <- Sys.time()
  # system.time({
     # create transect lines
-    transects <- terrainSliceR::cut_cross_sections(
+    transects <- hydrofabric3D::cut_cross_sections(
       net               = flines,                        # flowlines network
       id                = "hy_id",                       # Unique feature ID
-      cs_widths         = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id")
+      cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+      # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
       num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
       smooth            = TRUE,                          # smooth lines
       densify           = 3,                             # densify linestring points
@@ -132,13 +135,9 @@ for(i in 1:nrow(path_df)) {
   )
   
   # command to copy transects geopackage to S3
-  if (!is.null(aws_profile)) {
-    copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
-                             ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-  } else {
-    copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file)
-  }
-  
+  copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
+                   ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
+                   )
 
   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
           copy_to_s3, 
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index da6e3e8..9c7c237 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -1,16 +1,18 @@
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-# source("runners/cs_runner/config.R")
+source("runners/cs_runner/config.R")
 
 # # load libraries
-# library(terrainSliceR)
+# library(hydrofabric3D)
+# # library(terrainSliceR)
 # library(dplyr)
 # library(sf)
 
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
-
 # cross section bucket prefix
-cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
+cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/")
+# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
+
+# transect bucket prefix
+transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 
 # paths to nextgen datasets
 nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
@@ -19,20 +21,31 @@ nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
 transect_files <- list.files(transects_dir, full.names = FALSE)
 
 # string to fill in "cs_source" column in output datasets
-cs_source <- "terrainSliceR"
+cs_source <- "hydrofabric3D"
+
+# reference features dataframe
+ref_df <- data.frame(
+  vpu      = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = ref_features
+)
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-                x    = nextgen_files,
-                y    = transect_files,
-                base = base_dir
-              )
+              x    = nextgen_files,
+              y    = transect_files,
+              base = base_dir
+            ) %>% 
+          dplyr::left_join(
+            ref_df,
+            by = "vpu"
+            )
 
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
-
+  # i = 15
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -40,8 +53,17 @@ for (i in 1:nrow(path_df)) {
   # model attributes file and full path
   transect_file <- path_df$y[i]
   transect_path <- paste0(transects_dir, transect_file)
-  message("Creating VPU ", path_df$vpu[i], " cross section points:\n - flowpaths: '", nextgen_file, "'\n - transects: '", transect_file, "'")
   
+  # model attributes file and full path
+  ref_file <- path_df$ref_file[i]
+  ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
+  
+  message("Creating VPU ", path_df$vpu[i], 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - transects: '", transect_file, "'", 
+          "\n - waterbodies: '", ref_file, "'"
+          )
+
   ################### 
   
     # read in transects data
@@ -50,21 +72,45 @@ for (i in 1:nrow(path_df)) {
     # read in nextgen data
     flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
-    transects <-
-      transects  %>%
-      dplyr::rename(lengthm = cs_lengthm)
+    # read in waterbodies reference features layer
+    waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+
+    ##### subset flowlines and transects to first 5 features for testing #####
+    # flines = dplyr::slice(flines, 1:5) 
+    # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
+    ##### #####
+
+    # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+    feature_subsets <- wb_intersects(flines, transects, waterbodies)
+
+    # replace flowlines and transects objects with updated versions in "updated_features"
+    flines    <- flines[feature_subsets$valid_flowlines, ]
+    transects <- transects[feature_subsets$valid_transects, ]
 
     # get start time for log messages
     time1 <- Sys.time()
-    
+
     # get cross section point elevations
-    cs_pts <- terrainSliceR::cross_section_pts(
-              cs             = transects[lengths(sf::st_intersects(transects, flines)) == 1, ],
-              points_per_cs  = NULL,
-              min_pts_per_cs = 10,
-              dem            = DEM_URL
-              )
-    
+    cs_pts <- hydrofabric3D::cross_section_pts(
+      cs             = transects,
+      points_per_cs  = NULL,
+      min_pts_per_cs = 10,
+      dem            = DEM_URL
+      )
+
+    # try to extend any cross sections that returned cross section points with 
+    # identical Z values within a certain threshold ("flat" cross sections)
+    cs_pts <- hydrofabric3D::rectify_flat_cs(
+      net            = flines,
+      cs             = transects,
+      cs_pts         = cs_pts, 
+      points_per_cs  = NULL,
+      min_pts_per_cs = 10,
+      dem            = DEM_URL,
+      scale          = EXTENSION_PCT,
+      threshold      = 0
+    )
+
     # get end time for log messages
     time2 <- Sys.time()
     time_diff <- round(as.numeric(time2 - time1 ), 2)
@@ -78,12 +124,83 @@ for (i in 1:nrow(path_df)) {
       dplyr::group_by(hy_id, cs_id) %>% 
       dplyr::filter(!any(is.na(Z))) %>% 
       dplyr::ungroup()
+
+    # # check the number of cross sections that were extended
+    # cs_pts$is_extended %>% table()
+
+    # extract cross section points that have an "is_extended" value of TRUE
+    extended_pts = 
+      cs_pts %>% 
+      dplyr::filter(is_extended) %>% 
+      dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
+
+    # extract transects that have a "hy_id" in the "extended_pts" dataset
+    update_transects = 
+      transects %>% 
+      dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>%
+      dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+      
+    # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
+    if (nrow(update_transects) > 0) {
+      message("Updating ", nrow(update_transects), " transects")
+
+      update_transects =
+        update_transects %>%
+        # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
+        # apply extend_by_percent function to each transect line: 
+        hydrofabric3D:::extend_by_percent(
+          pct        = EXTENSION_PCT,
+          length_col = "cs_lengthm"
+        )
+
+    # remove old transects that have "tmp_id" in "extended_pts", and replace with "update_transects"
+    out_transects = 
+      transects %>% 
+      dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>%
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+      dplyr::bind_rows(
+        update_transects
+        )  %>% 
+      # dplyr::mutate(is_extended = FALSE) %>%
+      # dplyr::bind_rows(
+      #   dplyr::mutate(update_transects, is_extended = TRUE)
+      #   )  %>% 
+      dplyr::select(-tmp_id) 
+
+    # mapview::mapview(transects, color = "red") +
+    #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
+    #  mapview::mapview(flines, color = "dodgerblue") 
+
+    ###################################### 
+    
+    ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
+    message("Saving updated transects to:\n - filepath: '", transect_path, "'")
+  
+    # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
+    sf::write_sf(
+      out_transects,
+      transect_path
+    )
+
+    # command to copy transects geopackage to S3
+    trans_to_s3 <- paste0("aws s3 cp ", transect_path, " ", transects_prefix, transect_file, 
+                            ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+
+    message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
+            trans_to_s3, 
+            "'\n==========================")
     
+    system(trans_to_s3, intern = TRUE)
+
+    ###################################### 
+
+    }
+
     # classify the cross section points
     cs_pts <-
       cs_pts %>%
-      dplyr::rename(cs_widths = lengthm) %>%
-      terrainSliceR::classify_points() %>%
+      dplyr::rename(cs_widths = cs_lengthm) %>%
+      hydrofabric3D::classify_points() %>%
       dplyr::mutate(
         X = sf::st_coordinates(.)[,1],
         Y = sf::st_coordinates(.)[,2]
@@ -95,7 +212,7 @@ for (i in 1:nrow(path_df)) {
         X, Y, Z,
         class
         )
-    
+
   # Drop point geometries, leaving just X, Y, Z values
   cs_pts <- sf::st_drop_geometry(cs_pts)
 
@@ -106,9 +223,10 @@ for (i in 1:nrow(path_df)) {
       Z_source = cs_source
       ) %>%
     dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class)
-  
-  ################### 
-  
+
+  ###################################### 
+
+  ###################################### 
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
   out_path <- paste0(cs_pts_dir, out_file)
@@ -119,17 +237,12 @@ for (i in 1:nrow(path_df)) {
   arrow::write_parquet(cs_pts, out_path)
   
   # command to copy cross section points parquet to S3
-  if (!is.null(aws_profile)) {
-    copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                                    ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-  } else {
-    copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file)
-    
-  }
+  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
+                          ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
 
   message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
           paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
+                ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
           "'\n==========================")
   
   system(copy_cs_pts_to_s3, intern = TRUE)
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index 855cd9b..c72d563 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -2,7 +2,8 @@
 pacman::p_load(
   archive,
   hydrofabric,
-  terrainSliceR
+  hydrofabric3D
+  # terrainSliceR
 )
 
 # load root directory 
@@ -10,9 +11,24 @@ source("runners/cs_runner/config_vars.R")
 
 sf::sf_use_s2(FALSE)
 
+### Cross section point 
+
+### S3 names
+
+# name of S3 bucket
+s3_bucket <- "s3://lynker-spatial/"
+
 # name of bucket with nextgen data
 nextgen_bucket <- "lynker-spatial"
 
+# reference features S3 bucket prefix
+ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/"
+
+# S3 prefix/folder of version run
+version_prefix <- "v20.1"
+# version_prefix <- "v20"
+
+### LOCAL DIRS
 # directory to copy nextgen bucket data too
 nextgen_dir <- paste0(base_dir, "/pre-release/")
 
@@ -26,14 +42,45 @@ cs_pts_dir <- paste0(base_dir, "/02_cs_pts/")
 # final output directory with geopackages per VPU
 final_dir <- paste0(base_dir, "/cross_sections/")
 
+# directory to copy nextgen bucket data too
+ref_features_dir <- paste0(base_dir, "/00_reference_features/")
+
 # create directories 
 dir.create(transects_dir, showWarnings = FALSE)
 dir.create(cs_pts_dir,    showWarnings = FALSE)
+dir.create(ref_features_dir,     showWarnings = FALSE)
+dir.create(paste0(ref_features_dir, "gpkg/"),     showWarnings = FALSE)
 dir.create(final_dir,     showWarnings = FALSE)
 # dir.create(model_attr_dir,  showWarnings = FALSE)
 
+## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
+
+# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+list_ref_features <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', ref_features_prefix , '"
+            
+            # Regular expression pattern to match object keys
+            PATTERN="reference_features.gpkg"
+
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# ---- Get a list of reference features geopackages ----
+
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+ref_features <- system(list_ref_features, intern = TRUE)
+
+# ref features datasets
+ref_features_keys <- paste0(ref_features_prefix, ref_features)
+ref_features_files <- paste0(ref_features_dir, "gpkg/", ref_features)
+
+###
+### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ###
+###
 
-##### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ######
 # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
 # make sure they are aligned and in the same order
 # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
@@ -76,3 +123,161 @@ align_files_by_vpu <- function(
   return(matched_paths)
   
 }
+
+# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+# flowlines: flowlines linestring sf object
+# trans: transects linestring sf object
+# waterbodies: waterbodies polygon sf object
+# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
+### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
+wb_intersects <- function(flowlines, trans, waterbodies) {
+
+  ########  ########  ########  ########  ########  ########
+  ########  ########  ########  ########  ########  ########
+  # if(type == 1) {
+    
+    flowlines_geos <- geos::as_geos_geometry(flowlines)
+    wbs_geos <- geos::as_geos_geometry(waterbodies)
+    
+    # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+    trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+    
+    message("Checking flowlines against waterbodies...")
+    
+    # create an index between flowlines and waterbodies 
+    wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+    
+    # remove any flowlines that cross more than 1 waterbody
+    to_keep  <- flowlines[lengths(wb_index) == 0, ]
+    to_check <- flowlines[lengths(wb_index) != 0, ]
+    
+    # subset transects to the hy_ids in "to_check" set of flowlines
+    trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+    # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+    
+    # trans[trans$hy_id %in% unique(to_check$id), ] %>% nrow()
+    # 
+    # trans_geos[trans$hy_id %in% unique(to_check$id)]
+    
+    # check where the transects linestrings intersect with the waterbodies
+    trans_geos_check <- geos::as_geos_geometry(trans_check)
+    
+    message("Checking transects against waterbodies...")
+    trans_wb_index <- geos::geos_intersects_any(
+      trans_geos_check,  
+      wbs_geos[unlist(wb_index)]
+    )
+  
+    # within the transects lines that are on a flowline that crosses a waterbody, 
+    # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+    trans_keep <- trans_check[!trans_wb_index, ]
+    # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  
+    # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+    to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+    
+    # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+    # have transects that are NOT in the waterbody
+    to_keep <- dplyr::bind_rows(to_keep, to_check)
+    
+    # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+    # that were determined to be valid (are being kept)
+    check_ids <- unique(trans_check$tmp_id)
+    keep_ids <- unique(trans_keep$tmp_id)
+    
+    # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+    # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+    # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+    # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+    valid_flowlines <- flowlines$id %in% to_keep$id
+    valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                       !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+    
+    # return alist of updated flowlines and transects 
+    return(
+      list(
+        "valid_flowlines" =  valid_flowlines,
+        "valid_transects" =  valid_transects
+      )
+    )
+  # }
+  ########  ########  ########  ########  ########  ########
+  ########  ########  ########  ########  ########  ########
+  # if(type == 2) {
+  # 
+  #   # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  #   trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  #   
+  #   # trans$order <- 1:nrow(trans)
+  #   
+  #   message("Checking flowlines against waterbodies...")
+  #   
+  #   # create an index between flowlines and waterbodies 
+  #   wb_index <- sf::st_intersects(flowlines, waterbodies)
+  #   
+  #   # remove any flowlines that cross more than 1 waterbody
+  #   to_keep <- flowlines[lengths(wb_index) == 0, ]
+  #   to_check <- flowlines[lengths(wb_index) != 0, ]
+  #   
+  #   # subset transects to the hy_ids in "to_check" set of flowlines
+  #   trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  #   
+  #   message("Checking transects against waterbodies...")
+  #   
+  #   # check where the transects linestrings intersect with the waterbodies
+  #   trans_wb_index <- sf::st_intersects(trans_check, 
+  #                                       waterbodies[unlist(wb_index), ]
+  #   )
+  #   # trans_wb_index <- sf::st_intersects(trans_check, wbs)
+  #   
+  #   # within the transects lines that are on a flowline that crosses a waterbody, 
+  #   # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  #   trans_keep <- trans_check[lengths(trans_wb_index) == 0, ]
+  #   
+  #   # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  #   to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  #   
+  #   # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  #   # have transects that are NOT in the waterbody
+  #   to_keep <- dplyr::bind_rows(to_keep, to_check)
+  #   
+  #   # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  #   # that were determined to be valid (are being kept)
+  #   check_ids <- unique(trans_check$tmp_id)
+  #   keep_ids <- unique(trans_keep$tmp_id)
+  #   
+  #   # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  #   # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  #   # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  #   # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  #   valid_flowlines <- flowlines$id %in% to_keep$id
+  #   valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+  #                                                      !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  #   
+  #   # return alist of updated flowlines and transects 
+  #   return(
+  #     list(
+  #       "valid_flowlines" =  valid_flowlines,
+  #       "valid_transects" =  valid_transects
+  #     )
+  #   )
+  #   
+  #   # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  #   # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  #   #     # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  #   # trans <-
+  #   #   trans %>%
+  #   #   dplyr::filter(
+  #   #     !tmp_id %in% check_ids[!check_ids %in% keep_ids]
+  #   #     ) %>%
+  #   #   dplyr::select(-tmp_id)
+  #   # 
+  #   # # return alist of updated flowlines and transects 
+  #   # return(
+  #   #   list(
+  #   #     "flowlines" = to_keep,
+  #   #     "transects" = trans
+  #   #     )
+  #   # )
+  # }
+}
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 9738869..72d87b9 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -1,11 +1,19 @@
 ### EDIT base_dir, aws_profile, and DEM_URL ###
-base_dir    <- '/Users/anguswatters/Desktop/lynker-spatial'
+base_dir     <- '/Users/anguswatters/Desktop/lynker-spatial'
 
 # AWS profile to run CLI commands 
-aws_profile <- "angus-lynker"
+aws_profile  <- "angus-lynker"
+
+# name of S3 bucket
+s3_bucket    <- "s3://lynker-spatial/"
 
 # DEM URL
-DEM_URL     <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+DEM_URL      <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+# scale argument for cross_section_pts() function. 
+# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
+# Default setting is 50% of the original transect lines length (0.5)
+EXTENSION_PCT <- 0.5
 
 # # create the directory if it does NOT exist
 # if(!dir.exists(base_dir)) {
diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R
index a1aa30a..39ff31d 100644
--- a/runners/cs_runner/download_nextgen.R
+++ b/runners/cs_runner/download_nextgen.R
@@ -10,6 +10,9 @@ s3_bucket <- "s3://lynker-spatial/"
 # nextgen bucket name
 prerelease_prefix     <- "s3://lynker-spatial/pre-release/"
 
+# # reference features S3 bucket prefix
+# ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/"
+
 # nextgen model attributes folder in S3 bucket with parquet files
 model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/")
 
@@ -95,5 +98,34 @@ for (key in model_attr_keys) {
   message("Download '", paste0(model_attr_prefix, key), "' complete!")
   message("------------------")
 }
+## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
+
+# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+list_ref_features <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', ref_features_prefix , '"
+            
+            # Regular expression pattern to match object keys
+            PATTERN="reference_features.gpkg"
+
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
 
+# ---- Get a list of reference features geopackages geopackages ----
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+ref_features <- system(list_ref_features, intern = TRUE)
 
+## Download reference features geopackages and save them to a local directory
+# Parse the selected S3 objects keys and copy them to the destination directory
+for (key in ref_features) {
+  # paste0(ref_features_dir, "gpkg/")
+  copy_cmd <- paste0('aws s3 cp ', ref_features_prefix, key, ' ', paste0(ref_features_dir, "gpkg/"), key)
+  
+  message("Copying S3 object:\n", paste0(ref_features_prefix, key))
+  system(copy_cmd)
+  
+  message("Download '", paste0(ref_features_dir, "gpkg/", key), "' complete!")
+  message("------------------")
+}

From 16c6637728693928b8640f2d2252622dc71716d2 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 6 Dec 2023 05:05:38 -0800
Subject: [PATCH 02/64] small cleanups

---
 runners/cs_runner/01_transects.R | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 456be21..1d6f70d 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -50,9 +50,7 @@ for(i in 1:nrow(path_df)) {
   #   dplyr::select(
   #     model_attrs,
   #     id, eTW
-  #   ),
-  #   by = "id"
-  # )
+  #   ), by = "id")
 
   # calculate bankfull width
   flines <-
@@ -80,7 +78,6 @@ for(i in 1:nrow(path_df)) {
   # flines$bf_width <- ifelse(is.na(flines$bf_width),  exp(0.700    + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width)
 
   time1 <- Sys.time()
- # system.time({
     # create transect lines
     transects <- hydrofabric3D::cut_cross_sections(
       net               = flines,                        # flowlines network
@@ -100,7 +97,6 @@ for(i in 1:nrow(path_df)) {
       # precision         = 1,
       add               = TRUE                           # whether to add back the original data
     )
-  # })
     
   time2 <- Sys.time()
   time_diff <- round(as.numeric(time2 - time1 ), 2)

From ddecfffc8bfee04214a743e0ef6d3d83112c97b8 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Sat, 9 Dec 2023 06:58:42 -0800
Subject: [PATCH 03/64] Updating config and downlaod_nextgen runner scripts to
 be more flexible in which remote datasets theypoint too, updating transects
 runner to use powerlaw bankful width and updated cs_pts runner to remove
 waterbodies and try and extend transects with flat cross section Z values

---
 runners/cs_runner/01_transects.R     | 10 +++++-----
 runners/cs_runner/02_cs_pts.R        |  4 ++--
 runners/cs_runner/config.R           | 10 +++++++++-
 runners/cs_runner/download_nextgen.R | 15 ++++++++++-----
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 1d6f70d..7cf33ae 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -1,11 +1,11 @@
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
 
-# # # load libraries
-library(hydrofabric3D)
-# library(terrainSliceR)
-library(dplyr)
-library(sf)
+# # # # load libraries
+# library(hydrofabric3D)
+# # library(terrainSliceR)
+# library(dplyr)
+# library(sf)
 
 # transect bucket prefix
 transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 9c7c237..02dad68 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -44,8 +44,7 @@ path_df <- align_files_by_vpu(
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
-  # i = 15
-  
+
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -248,3 +247,4 @@ for (i in 1:nrow(path_df)) {
   system(copy_cs_pts_to_s3, intern = TRUE)
 
 }
+
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index c72d563..ffc88e5 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -21,6 +21,12 @@ s3_bucket <- "s3://lynker-spatial/"
 # name of bucket with nextgen data
 nextgen_bucket <- "lynker-spatial"
 
+# nextgen bucket folder name
+nextgen_bucket_folder <- "v20.1/gpkg/"
+
+# nextgen bucket name
+nextgen_prefix  <- paste0(s3_bucket, nextgen_bucket_folder)
+
 # reference features S3 bucket prefix
 ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/"
 
@@ -29,8 +35,10 @@ version_prefix <- "v20.1"
 # version_prefix <- "v20"
 
 ### LOCAL DIRS
+
 # directory to copy nextgen bucket data too
-nextgen_dir <- paste0(base_dir, "/pre-release/")
+nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder)
+# nextgen_dir <- paste0(base_dir, "/pre-release/")
 
 # model attributes directory
 model_attr_dir <- paste0(base_dir, "/model_attributes/")
diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R
index 39ff31d..c405fa9 100644
--- a/runners/cs_runner/download_nextgen.R
+++ b/runners/cs_runner/download_nextgen.R
@@ -7,8 +7,13 @@ source("runners/cs_runner/config_vars.R")
 # name of S3 bucket
 s3_bucket <- "s3://lynker-spatial/"
 
+# nextgen bucket folder name
+nextgen_bucket_folder <- "v20.1/gpkg/"
+
 # nextgen bucket name
-prerelease_prefix     <- "s3://lynker-spatial/pre-release/"
+nextgen_prefix  <- paste0(s3_bucket, nextgen_bucket_folder)
+
+# prerelease_prefix     <- "s3://lynker-spatial/pre-release/"
 
 # # reference features S3 bucket prefix
 # ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/"
@@ -17,7 +22,7 @@ prerelease_prefix     <- "s3://lynker-spatial/pre-release/"
 model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/")
 
 # directory to copy nextgen bucket data too
-nextgen_dir <- paste0(base_dir, "/pre-release/")
+nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder)
 
 # create the directory if it does NOT exist
 if(!dir.exists(nextgen_dir)) {
@@ -38,7 +43,7 @@ if(!dir.exists(model_attr_dir)) {
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 command <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
-            S3_BUCKET="', prerelease_prefix, '"
+            S3_BUCKET="', nextgen_prefix, '"
             DESTINATION_DIR=', nextgen_dir, '
             
             # Regular expression pattern to match object keys
@@ -57,8 +62,8 @@ bucket_keys <- system(command, intern = TRUE)
 # Parse the selected S3 objects keys and copy them to the destination directory
 for (key in bucket_keys) {
   
-  copy_cmd <- paste0('aws s3 cp ', prerelease_prefix, key, " ", nextgen_dir, key)
-  message("Copying S3 object:\n", paste0(prerelease_prefix, key))
+  copy_cmd <- paste0('aws s3 cp ', nextgen_prefix, key, " ", nextgen_dir, key)
+  message("Copying S3 object:\n", paste0(nextgen_prefix, key))
   
   system(copy_cmd)
   

From 2567b41cc6abf5f5ed111c8ec98293e63be43fe1 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 11 Dec 2023 14:14:24 -0800
Subject: [PATCH 04/64] small cleanups to cs_runner/config.R, removing old code

---
 runners/cs_runner/config.R | 88 +-------------------------------------
 1 file changed, 1 insertion(+), 87 deletions(-)

diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index ffc88e5..58cb4ed 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -141,8 +141,6 @@ align_files_by_vpu <- function(
 wb_intersects <- function(flowlines, trans, waterbodies) {
 
   ########  ########  ########  ########  ########  ########
-  ########  ########  ########  ########  ########  ########
-  # if(type == 1) {
     
     flowlines_geos <- geos::as_geos_geometry(flowlines)
     wbs_geos <- geos::as_geos_geometry(waterbodies)
@@ -163,10 +161,6 @@ wb_intersects <- function(flowlines, trans, waterbodies) {
     trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
     # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
     
-    # trans[trans$hy_id %in% unique(to_check$id), ] %>% nrow()
-    # 
-    # trans_geos[trans$hy_id %in% unique(to_check$id)]
-    
     # check where the transects linestrings intersect with the waterbodies
     trans_geos_check <- geos::as_geos_geometry(trans_check)
     
@@ -199,7 +193,7 @@ wb_intersects <- function(flowlines, trans, waterbodies) {
     # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
     valid_flowlines <- flowlines$id %in% to_keep$id
     valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
-                                                       !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
     
     # return alist of updated flowlines and transects 
     return(
@@ -208,84 +202,4 @@ wb_intersects <- function(flowlines, trans, waterbodies) {
         "valid_transects" =  valid_transects
       )
     )
-  # }
-  ########  ########  ########  ########  ########  ########
-  ########  ########  ########  ########  ########  ########
-  # if(type == 2) {
-  # 
-  #   # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
-  #   trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
-  #   
-  #   # trans$order <- 1:nrow(trans)
-  #   
-  #   message("Checking flowlines against waterbodies...")
-  #   
-  #   # create an index between flowlines and waterbodies 
-  #   wb_index <- sf::st_intersects(flowlines, waterbodies)
-  #   
-  #   # remove any flowlines that cross more than 1 waterbody
-  #   to_keep <- flowlines[lengths(wb_index) == 0, ]
-  #   to_check <- flowlines[lengths(wb_index) != 0, ]
-  #   
-  #   # subset transects to the hy_ids in "to_check" set of flowlines
-  #   trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
-  #   
-  #   message("Checking transects against waterbodies...")
-  #   
-  #   # check where the transects linestrings intersect with the waterbodies
-  #   trans_wb_index <- sf::st_intersects(trans_check, 
-  #                                       waterbodies[unlist(wb_index), ]
-  #   )
-  #   # trans_wb_index <- sf::st_intersects(trans_check, wbs)
-  #   
-  #   # within the transects lines that are on a flowline that crosses a waterbody, 
-  #   # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
-  #   trans_keep <- trans_check[lengths(trans_wb_index) == 0, ]
-  #   
-  #   # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
-  #   to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
-  #   
-  #   # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
-  #   # have transects that are NOT in the waterbody
-  #   to_keep <- dplyr::bind_rows(to_keep, to_check)
-  #   
-  #   # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
-  #   # that were determined to be valid (are being kept)
-  #   check_ids <- unique(trans_check$tmp_id)
-  #   keep_ids <- unique(trans_keep$tmp_id)
-  #   
-  #   # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
-  #   # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
-  #   # - Keep original transects that are not on flowlines that intersect waterbodies AND 
-  #   # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
-  #   valid_flowlines <- flowlines$id %in% to_keep$id
-  #   valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
-  #                                                      !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
-  #   
-  #   # return alist of updated flowlines and transects 
-  #   return(
-  #     list(
-  #       "valid_flowlines" =  valid_flowlines,
-  #       "valid_transects" =  valid_transects
-  #     )
-  #   )
-  #   
-  #   # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
-  #   # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
-  #   #     # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
-  #   # trans <-
-  #   #   trans %>%
-  #   #   dplyr::filter(
-  #   #     !tmp_id %in% check_ids[!check_ids %in% keep_ids]
-  #   #     ) %>%
-  #   #   dplyr::select(-tmp_id)
-  #   # 
-  #   # # return alist of updated flowlines and transects 
-  #   # return(
-  #   #   list(
-  #   #     "flowlines" = to_keep,
-  #   #     "transects" = trans
-  #   #     )
-  #   # )
-  # }
 }

From 67d33bb7bd6b6b026efaf8b42cfc524e04bc4f9c Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 26 Feb 2024 07:59:53 -0800
Subject: [PATCH 05/64] updated transect and cross section runners to allow
 transects to be updated and save a new updated local version of transects
 after cross section rectification, added a COLLECT_META flag for if meta data
 CSVs for each cross section run should be collected and saved locally

---
 .gitignore                       |   3 +-
 runners/cs_runner/01_transects.R |  71 ++++-
 runners/cs_runner/02_cs_pts.R    | 487 +++++++++++++++++++++++++------
 runners/cs_runner/config.R       | 228 +++++++++++----
 runners/cs_runner/config_vars.R  |   4 +
 5 files changed, 625 insertions(+), 168 deletions(-)

diff --git a/.gitignore b/.gitignore
index d70321e..3d01a85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ dead
 inst/doc
 vignettes/cihro-data
 data
-check
\ No newline at end of file
+check
+.Rapp.history
\ No newline at end of file
diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 7cf33ae..6bc0025 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -26,7 +26,9 @@ path_df <- align_files_by_vpu(
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
-
+  
+  # i = 8
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -78,6 +80,9 @@ for(i in 1:nrow(path_df)) {
   # flines$bf_width <- ifelse(is.na(flines$bf_width),  exp(0.700    + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width)
 
   time1 <- Sys.time()
+  
+  # system.time({
+    
     # create transect lines
     transects <- hydrofabric3D::cut_cross_sections(
       net               = flines,                        # flowlines network
@@ -97,6 +102,7 @@ for(i in 1:nrow(path_df)) {
       # precision         = 1,
       add               = TRUE                           # whether to add back the original data
     )
+  # })
     
   time2 <- Sys.time()
   time_diff <- round(as.numeric(time2 - time1 ), 2)
@@ -109,35 +115,70 @@ for(i in 1:nrow(path_df)) {
   
   message("Saving transects to:\n - filepath: '", out_path, "'")
   
-  # add cs_source column and keep just the desired columns to save and upload to S3
-  transects <-
+  # add cs_source column and rename cs_widths to cs_lengthm
+  transects <- 
     transects %>%
     dplyr::mutate(
       cs_source = net_source
     ) %>%
-    dplyr::select(
-      hy_id,
-      cs_source,
-      cs_id,
-      cs_measure,
-      cs_lengthm = cs_widths,
-      geometry
-    )
+    dplyr::rename("cs_lengthm" = cs_widths)
+  
+  # # add cs_source column and keep just the desired columns to save and upload to S3
+  # transects <-
+  #   transects %>%
+  #   dplyr::mutate(
+  #     cs_source = net_source
+  #   ) %>%
+  #   dplyr::select(
+  #     hy_id,
+  #     cs_source,
+  #     cs_id,
+  #     cs_measure,
+  #     cs_lengthm = cs_widths,
+  #     geometry
+  #   )
+  # tmp <- sf::read_sf(out_path)
 
-  # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
+  # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
   sf::write_sf(
-    transects,
+    # save dataset with only subset of columns to upload to S3
+    dplyr::select(transects, 
+                  hy_id,
+                  cs_source,
+                  cs_id,
+                  cs_measure,
+                  cs_lengthm,
+                  # sinuosity,
+                  geometry
+    ),
     out_path
-  )
+    )
   
   # command to copy transects geopackage to S3
   copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
                    ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
                    )
-
+  
   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
           copy_to_s3, 
           "'\n==========================")
   
   system(copy_to_s3, intern = TRUE)
+  
+  message("Overwritting local copy of transects to include 'is_extended' column...\n==========================")
+  # Overwrite transects with additional columns for development purposes (is_extended) to have a local copy of dataset with information about extensions
+  sf::write_sf(
+    dplyr::select(
+      dplyr::mutate(transects, is_extended = FALSE),
+      hy_id,
+      cs_source,
+      cs_id,
+      cs_measure,
+      cs_lengthm,
+      # sinuosity,
+      is_extended,
+      geometry
+    ),
+    out_path
+  )
 }
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 02dad68..93c561b 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -40,11 +40,16 @@ path_df <- align_files_by_vpu(
             by = "vpu"
             )
 
+# Local path to save CSVs of cross section meta data during each iteration
+meta_path <- "/local/path/to/save/cross_section_meta_data/"
+
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
 
+  start <- Sys.time()
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -57,10 +62,14 @@ for (i in 1:nrow(path_df)) {
   ref_file <- path_df$ref_file[i]
   ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
   
-  message("Creating VPU ", path_df$vpu[i], 
+  # current VPU being processed
+  VPU = path_df$vpu[i]
+  
+  message("Creating VPU ", VPU, 
           " cross section points:\n - flowpaths: '", nextgen_file,
           "'\n - transects: '", transect_file, "'", 
-          "\n - waterbodies: '", ref_file, "'"
+          "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
           )
 
   ################### 
@@ -78,17 +87,35 @@ for (i in 1:nrow(path_df)) {
     # flines = dplyr::slice(flines, 1:5) 
     # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
     ##### #####
-
-    # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
-    feature_subsets <- wb_intersects(flines, transects, waterbodies)
-
+    
+    # system.time({
+      # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+      feature_subsets <- wb_intersects(flines, transects, waterbodies)
+    # })
+    
+    # Collect meta data on features and changes
+    if(COLLECT_META) {
+      
+      fline_count       <- nrow(flines)
+      transect_count    <- nrow(transects)
+      wb_count          <- nrow(waterbodies)
+      
+      fline_wb_count    <- sum(feature_subsets$valid_flowlines)
+      transect_wb_count <- sum(feature_subsets$valid_transects)
+    }
+    
     # replace flowlines and transects objects with updated versions in "updated_features"
     flines    <- flines[feature_subsets$valid_flowlines, ]
     transects <- transects[feature_subsets$valid_transects, ]
-
-    # get start time for log messages
-    time1 <- Sys.time()
-
+    
+    rm(waterbodies)
+    gc()
+    
+    start_cs_pts <- Sys.time()
+    message("Extracting cross section points (", start_cs_pts,")")    
+    
+    system.time({
+  
     # get cross section point elevations
     cs_pts <- hydrofabric3D::cross_section_pts(
       cs             = transects,
@@ -96,54 +123,166 @@ for (i in 1:nrow(path_df)) {
       min_pts_per_cs = 10,
       dem            = DEM_URL
       )
-
-    # try to extend any cross sections that returned cross section points with 
-    # identical Z values within a certain threshold ("flat" cross sections)
-    cs_pts <- hydrofabric3D::rectify_flat_cs(
-      net            = flines,
-      cs             = transects,
-      cs_pts         = cs_pts, 
-      points_per_cs  = NULL,
-      min_pts_per_cs = 10,
-      dem            = DEM_URL,
-      scale          = EXTENSION_PCT,
-      threshold      = 0
-    )
-
-    # get end time for log messages
-    time2 <- Sys.time()
-    time_diff <- round(as.numeric(time2 - time1 ), 2)
     
-    message("\n\n ---> Cross section point elevations processed in ", time_diff)
+    })
+    
+    end_cs_pts <- Sys.time()
+    message("\n ---> Completed extraction of cross section points (", end_cs_pts,")")
+    
+    if(COLLECT_META) {
+      start_cs_pts_count <- nrow(cs_pts)
+    }
+    
+    # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2)
+    # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time)
+    
+    start_rectify <- Sys.time()
+    message("Rectifying cross section points (", start_rectify,")")
+    
+    # collect the hy_ids and number of stream orders in cs_pts
+    if(COLLECT_META) {
+    
+      cs_pts_ids       <- unique(cs_pts$hy_id)
+      start_cs_pts_ids <- length(cs_pts_ids)
+      
+      start_order_count <- 
+        flines %>% 
+        sf::st_drop_geometry() %>% 
+        dplyr::filter(id %in% cs_pts_ids) %>% 
+        dplyr::group_by(order) %>% 
+        dplyr::count() %>% 
+        tidyr::pivot_wider(names_from = order,  
+                         names_glue   = "start_order_{order}", 
+                         values_from  = n
+                         ) %>% 
+        dplyr::ungroup()
+    }
     
     # Remove any cross section that has ANY missing (NA) Z values. 
-    cs_pts <-
-      cs_pts %>%
+    cs_pts <- 
+      cs_pts %>% 
       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
       dplyr::group_by(hy_id, cs_id) %>% 
       dplyr::filter(!any(is.na(Z))) %>% 
       dplyr::ungroup()
-
+    # try to extend any cross sections that returned cross section points with 
+    # identical Z values within a certain threshold ("flat" cross sections)
+    
+    system.time({
+    # cs_pts <- hydrofabric3D::rectify_flat_cs(
+      fixed_pts <- hydrofabric3D::rectify_flat_cs(
+                  cs_pts         = cs_pts, 
+                  net            = flines,
+                  cs             = transects,
+                  points_per_cs  = NULL,
+                  min_pts_per_cs = 10,
+                  dem            = DEM_URL,
+                  scale          = EXTENSION_PCT,
+                  threshold      = 1,
+                  pct_threshold  = 0.99,
+                  fix_ids        = FALSE
+                  )
+    })
+    
+    
+    end_rectify <- Sys.time()
+    rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2)
+    
+    message("\n ---> Completed rectifying cross section points (", end_rectify,")")
+    
+    if(COLLECT_META) {
+      rectify_cs_pts_count <- nrow(fixed_pts)
+      # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts
+      rectify_cs_pts_ids      <- unique(fixed_pts$hy_id)
+      rectify_cs_pts_id_count <- length(rectify_cs_pts_ids)
+
+      rectify_order_count <- 
+        flines %>% 
+        sf::st_drop_geometry() %>% 
+        dplyr::filter(id %in% rectify_cs_pts_ids) %>% 
+        dplyr::group_by(order) %>% 
+        dplyr::count() %>% 
+        tidyr::pivot_wider(names_from = order,  
+                           names_glue   = "rectify_order_{order}", 
+                           values_from  = n
+        ) %>% 
+        dplyr::ungroup()
+    }
+    
+    rm(cs_pts)
+    gc()
+    
+    message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ")
+    
+    # Remove any cross section that has ANY missing (NA) Z values. 
+    fixed_pts <- 
+      fixed_pts %>% 
+      # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
+      # dplyr::group_by(hy_id, cs_id) %>% 
+      # dplyr::filter(!any(is.na(Z))) %>% 
+      # dplyr::ungroup() %>% 
+      hydrofabric3D::add_tmp_id()
+    
+    # Number of cross section points after removing any cross sections that contain any NA Z values
+    if(COLLECT_META) {
+      cs_pts_na_removed_count <- nrow(fixed_pts)
+    }
+    
+    # Stash meta data about the points 
+    pts_meta <- 
+      fixed_pts %>% 
+      sf::st_drop_geometry() %>% 
+      dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended)
+    
+    message("Classifying cross section points...")
+    
+    # Classify points
+    fixed_pts <- hydrofabric3D::classify_points(fixed_pts)
+    
+    # add meta data back to the points
+    fixed_pts <- 
+      fixed_pts %>% 
+      dplyr::left_join(
+        pts_meta,
+        by = c("hy_id", "cs_id", "pt_id")
+        # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended)
+      )
+    
+    message("Gathering count of point types per cross section...")
+    
+    # get the counts of each point type to add this data to the transects dataset
+    point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
+    
     # # check the number of cross sections that were extended
-    # cs_pts$is_extended %>% table()
-
+    # fixed_pts$is_extended %>% table()
+    message("Subsetting cross section points generated after extending transects...")
+    
     # extract cross section points that have an "is_extended" value of TRUE
-    extended_pts = 
-      cs_pts %>% 
+    extended_pts <-
+      fixed_pts %>% 
       dplyr::filter(is_extended) %>% 
-      dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
-
+      hydrofabric3D::add_tmp_id()
+      # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
+    
     # extract transects that have a "hy_id" in the "extended_pts" dataset
-    update_transects = 
+    update_transects <-
       transects %>% 
-      dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>%
+      hydrofabric3D::add_tmp_id() %>% 
       dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
-      
+    
+    # Number of cross section points generated from extending transects and number of tmpIDs
+    if(COLLECT_META) {
+      extended_pts_count <- nrow(extended_pts)
+      extended_pts_ids   <- length(unique(extended_pts$tmp_id))
+      extended_transects_count <- nrow(update_transects)
+      extended_transects_ids   <- length(unique(update_transects$tmp_id))
+    }
+    
     # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
     if (nrow(update_transects) > 0) {
       message("Updating ", nrow(update_transects), " transects")
-
-      update_transects =
+    
+      update_transects <-
         update_transects %>%
         # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
         # apply extend_by_percent function to each transect line: 
@@ -151,81 +290,201 @@ for (i in 1:nrow(path_df)) {
           pct        = EXTENSION_PCT,
           length_col = "cs_lengthm"
         )
-
-    # remove old transects that have "tmp_id" in "extended_pts", and replace with "update_transects"
-    out_transects = 
-      transects %>% 
-      dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) %>%
-      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-      dplyr::bind_rows(
-        update_transects
-        )  %>% 
-      # dplyr::mutate(is_extended = FALSE) %>%
-      # dplyr::bind_rows(
-      #   dplyr::mutate(update_transects, is_extended = TRUE)
-      #   )  %>% 
-      dplyr::select(-tmp_id) 
-
+      
+      # # Number of transects being updated
+      # if(COLLECT_META) {
+        # extended_transects_count <- nrow(update_transects)
+        # extended_transects_ids   <- length(unique(update_transects$tmp_id))
+      # }
+      
+      # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+      # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
+      # and then replace with old transects with the "update_transects" 
+      out_transects <- 
+        transects %>% 
+        hydrofabric3D::add_tmp_id() %>% 
+        # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+        # dplyr::filter(!tmp_id %in% )
+        dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+        dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+        dplyr::bind_rows(
+          dplyr::mutate(
+            update_transects,
+            is_extended = TRUE
+            )
+          )  
+      
+        # dplyr::mutate(is_extended = FALSE) %>%
+        # dplyr::bind_rows(
+        #   dplyr::mutate(update_transects, is_extended = TRUE)
+        #   )  %>% 
+        # dplyr::select(-tmp_id) 
+      
+    } else {
+      
+      out_transects <- 
+        transects %>% 
+        hydrofabric3D::add_tmp_id() %>% 
+        dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+        dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+    }
+    
+      # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
+      if(COLLECT_META) {
+        output_transects_count <- nrow(out_transects)
+        output_transects_ids   <- length(unique(out_transects$tmp_id))
+      }
+    
+      # finalize new transects
+      out_transects <- 
+        out_transects %>% 
+        dplyr::left_join(
+          point_type_counts,
+          by = c("hy_id", "cs_id")
+        ) %>% 
+        dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
+                      # sinuosity, 
+                      is_extended, 
+                      left_bank_count, right_bank_count, channel_count, bottom_count,
+                      geom)
+      
+      # -------------------------------------------------------------------
+      # ---- Re enumerate the transects & cross section points "cs_id" ----
+      # -------------------------------------------------------------------
+      
+      # make a dataframe that has a new_cs_id column that has 
+      # the cs_id renumbered to fill in any missing IDs,
+      # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
+      # The dataframe below will be used to join the "new_cs_id" with 
+      # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
+      renumbered_ids <-
+        fixed_pts %>% 
+        sf::st_drop_geometry() %>% 
+        # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
+        dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
+        dplyr::group_by(hy_id, cs_id) %>% 
+        dplyr::slice(1) %>% 
+        dplyr::ungroup() %>% 
+        hydrofabric3D::add_tmp_id() %>% 
+        dplyr::group_by(hy_id) %>% 
+        dplyr::mutate(
+          new_cs_id = 1:dplyr::n()
+        ) %>% 
+        dplyr::ungroup() %>% 
+        dplyr::select(new_cs_id, tmp_id)
+      
+      # Renumber the transects to have correct CS IDs
+      out_transects <- dplyr::left_join(
+                          hydrofabric3D::add_tmp_id(out_transects),
+                          renumbered_ids,
+                          by = "tmp_id"
+                        ) %>% 
+                        dplyr::select(-cs_id, -tmp_id) %>% 
+                        dplyr::select(hy_id, cs_source, cs_id = new_cs_id, 
+                                      cs_measure, cs_lengthm, 
+                                      # sinuosity,
+                                      is_extended, 
+                                      left_bank_count, right_bank_count, channel_count, bottom_count,
+                                      geometry = geom
+                                      )
+      
+      # # fline_lengths <-  sf::st_drop_geometry(flines) %>% 
+      # #   dplyr::filter(id %in% out_transects$hy_id) %>% 
+      # #   dplyr::mutate(lengthm = lengthkm * 1000) %>% 
+      # #   dplyr::select(hy_id = id, lengthm, lengthkm) 
+      # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% 
+      #   dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% 
+      #   dplyr::select(-sinuosity) %>% 
+      #   dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% 
+      #   dplyr::rename("geometry" = geom)
+      
+      # Renumber the cross sections points to have correct CS IDs
+      fixed_pts <- dplyr::left_join(
+                            hydrofabric3D::add_tmp_id(fixed_pts),
+                            renumbered_ids,
+                            by = "tmp_id"
+                          ) %>% 
+                        dplyr::select(-cs_id, -tmp_id) %>% 
+                        dplyr::rename(cs_id = new_cs_id)
+      
     # mapview::mapview(transects, color = "red") +
     #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
     #  mapview::mapview(flines, color = "dodgerblue") 
 
     ###################################### 
-    
-    ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
-    message("Saving updated transects to:\n - filepath: '", transect_path, "'")
-  
-    # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
-    sf::write_sf(
-      out_transects,
-      transect_path
-    )
-
-    # command to copy transects geopackage to S3
-    trans_to_s3 <- paste0("aws s3 cp ", transect_path, " ", transects_prefix, transect_file, 
-                            ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
 
-    message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
-            trans_to_s3, 
-            "'\n==========================")
+    # ----------------------------------------------------------
+    # ---- Cross section points parquet to S3 ----
+    # ----------------------------------------------------------
     
-    system(trans_to_s3, intern = TRUE)
-
-    ###################################### 
-
-    }
-
     # classify the cross section points
-    cs_pts <-
-      cs_pts %>%
-      dplyr::rename(cs_widths = cs_lengthm) %>%
-      hydrofabric3D::classify_points() %>%
+    fixed_pts <- 
+      fixed_pts %>% 
       dplyr::mutate(
         X = sf::st_coordinates(.)[,1],
         Y = sf::st_coordinates(.)[,2]
       ) %>%
+      sf::st_drop_geometry() %>% 
       dplyr::select(
         hy_id, cs_id, pt_id,
-        cs_lengthm = cs_widths,
+        cs_lengthm,
         relative_distance,
         X, Y, Z,
-        class
+        class, point_type
         )
 
   # Drop point geometries, leaving just X, Y, Z values
-  cs_pts <- sf::st_drop_geometry(cs_pts)
+  fixed_pts <- sf::st_drop_geometry(fixed_pts)
 
   # add Z_source column for source of elevation data
-  cs_pts <-
-    cs_pts %>%
+  fixed_pts <-
+    fixed_pts %>%
     dplyr::mutate(
       Z_source = cs_source
       ) %>%
     dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class)
 
+  # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
+  if(COLLECT_META) {
+    output_cs_pts_count      <- nrow(fixed_pts)
+    output_cs_pts_ids        <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id))
+    dropped_transects_count  <- transect_count - output_transects_count
+    }
+  
   ###################################### 
-
+  
+  # ----------------------------------------------------------
+  # ---- Re upload the updated transects geopackage to S3 ----
+  # ----------------------------------------------------------
+  updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path)
+  
+  ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
+  message("Saving updated transects to:\n - filepath: '", updated_path, "'")
+  
+  # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
+  sf::write_sf(
+    out_transects,
+    # transect_path
+    updated_path
+  )
+ 
+  # command to copy transects geopackage to S3
+  trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, 
+                        ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+  
+  message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
+          trans_to_s3, 
+          "'\n==========================")
+  
+  system(trans_to_s3, intern = TRUE)
+  
   ###################################### 
+  ###################################### 
+  
+  # ----------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------
+  
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
   out_path <- paste0(cs_pts_dir, out_file)
@@ -233,7 +492,7 @@ for (i in 1:nrow(path_df)) {
   message("Saving cross section points to:\n - filepath: '", out_path, "'")
   
   # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
-  arrow::write_parquet(cs_pts, out_path)
+  arrow::write_parquet(fixed_pts, out_path)
   
   # command to copy cross section points parquet to S3
   copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
@@ -245,6 +504,50 @@ for (i in 1:nrow(path_df)) {
           "'\n==========================")
   
   system(copy_cs_pts_to_s3, intern = TRUE)
-
-}
+  
+  end <- Sys.time()
+  
+  message("Finished cross section point generation for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  if(COLLECT_META) {
+    
+    meta_df <- data.frame(
+      vpu                 = VPU,
+      start               = as.character(start),
+      end                 = as.character(end),
+      start_cs_pts        = as.character(start_cs_pts),
+      end_cs_pts          = as.character(end_cs_pts),
+      start_rectify       = as.character(start_rectify),
+      end_rectify         = as.character(end_rectify),
+      fline_count         = fline_count,
+      transect_count      = transect_count,
+      wb_count            = wb_count,
+      fline_wb_count      = fline_wb_count,
+      transect_wb_count   = transect_wb_count,
+      start_cs_pts_count  = start_cs_pts_count,
+      start_cs_pts_ids    = start_cs_pts_ids,
+      rectify_cs_pts_count       = rectify_cs_pts_count,
+      rectify_cs_pts_ids         = rectify_cs_pts_id_count,
+      extended_transects_count   = extended_transects_count,
+      extended_transects_ids     = extended_transects_ids,
+      dropped_transects          = dropped_transects_count,
+      output_transects_count     = output_transects_count,
+      output_cs_pts_count        = output_cs_pts_count,
+      output_transects_ids       = output_transects_ids,
+      output_cs_pts_ids          = output_cs_pts_ids
+    )
+    
+    order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count)
+    
+    readr::write_csv(meta_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_metadata.csv"))
+    readr::write_csv(order_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_streamorder.csv"))
+  }
+  
+  rm(fixed_pts)
+  gc()
+  gc()
+  
+  }
 
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index 58cb4ed..41f995d 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -3,9 +3,11 @@ pacman::p_load(
   archive,
   hydrofabric,
   hydrofabric3D
-  # terrainSliceR
 )
 
+# # install.packages("devtools")
+# devtools::install_github("anguswg-ucsb/hydrofabric3D")
+
 # load root directory 
 source("runners/cs_runner/config_vars.R")
 
@@ -139,67 +141,173 @@ align_files_by_vpu <- function(
 # Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
 ### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
 wb_intersects <- function(flowlines, trans, waterbodies) {
-
+  
   ########  ########  ########  ########  ########  ########
-    
-    flowlines_geos <- geos::as_geos_geometry(flowlines)
-    wbs_geos <- geos::as_geos_geometry(waterbodies)
-    
-    # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
-    trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
-    
-    message("Checking flowlines against waterbodies...")
-    
-    # create an index between flowlines and waterbodies 
-    wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
-    
-    # remove any flowlines that cross more than 1 waterbody
-    to_keep  <- flowlines[lengths(wb_index) == 0, ]
-    to_check <- flowlines[lengths(wb_index) != 0, ]
-    
-    # subset transects to the hy_ids in "to_check" set of flowlines
-    trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
-    # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
-    
-    # check where the transects linestrings intersect with the waterbodies
-    trans_geos_check <- geos::as_geos_geometry(trans_check)
-    
-    message("Checking transects against waterbodies...")
-    trans_wb_index <- geos::geos_intersects_any(
-      trans_geos_check,  
-      wbs_geos[unlist(wb_index)]
+  
+  flowlines_geos <- geos::as_geos_geometry(flowlines)
+  wbs_geos <- geos::as_geos_geometry(waterbodies)
+  
+  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  
+  message("Checking flowlines against waterbodies...")
+  
+  # create an index between flowlines and waterbodies 
+  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+  
+  # remove any flowlines that cross more than 1 waterbody
+  to_keep  <- flowlines[lengths(wb_index) == 0, ]
+  to_check <- flowlines[lengths(wb_index) != 0, ]
+  
+  # subset transects to the hy_ids in "to_check" set of flowlines
+  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+  
+  # check where the transects linestrings intersect with the waterbodies
+  trans_geos_check <- geos::as_geos_geometry(trans_check)
+  
+  message("Checking transects against waterbodies (v2) ...")
+  wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos)                    # (NEW METHOD)
+  # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)])   # (OLD METHOD)
+  
+  # sum(lengths(wb_trans_index) == 0)
+  # length(wb_trans_index)
+  
+  # within the transects lines that are on a flowline that crosses a waterbody, 
+  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  trans_keep <- trans_check[lengths(wb_trans_index) == 0, ]                       # (NEW METHOD)
+  # trans_keep <- trans_check[!wb_trans_index, ]                                  # (OLD METHOD)
+  
+  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  
+  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # have transects that are NOT in the waterbody
+  to_keep <- dplyr::bind_rows(to_keep, to_check)
+  
+  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # that were determined to be valid (are being kept)
+  check_ids <- unique(trans_check$tmp_id)
+  keep_ids <- unique(trans_keep$tmp_id)
+  
+  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  valid_flowlines <- flowlines$id %in% to_keep$id
+  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  
+  # return alist of updated flowlines and transects 
+  return(
+    list(
+      "valid_flowlines" =  valid_flowlines,
+      "valid_transects" =  valid_transects
     )
+  )
+  
+  # # within the transects lines that are on a flowline that crosses a waterbody, 
+  # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  # trans_keep <- trans_check[!trans_wb_index, ]
+  # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  # 
+  # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  # 
+  # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # # have transects that are NOT in the waterbody
+  # to_keep <- dplyr::bind_rows(to_keep, to_check)
+  # 
+  # # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # # that were determined to be valid (are being kept)
+  # check_ids <- unique(trans_check$tmp_id)
+  # keep_ids <- unique(trans_keep$tmp_id)
+  # 
+  # # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  # valid_flowlines <- flowlines$id %in% to_keep$id
+  # valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+  #                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  # 
+  # # return alist of updated flowlines and transects 
+  # return(
+  #   list(
+  #     "valid_flowlines" =  valid_flowlines,
+  #     "valid_transects" =  valid_transects
+  #   )
+  # )
+}
+
+# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+# flowlines: flowlines linestring sf object
+# trans: transects linestring sf object
+# waterbodies: waterbodies polygon sf object
+# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
+### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
+wb_intersects_v1 <- function(flowlines, trans, waterbodies) {
+  
+  ########  ########  ########  ########  ########  ########
+  
+  flowlines_geos <- geos::as_geos_geometry(flowlines)
+  wbs_geos <- geos::as_geos_geometry(waterbodies)
+  
+  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  
+  message("Checking flowlines against waterbodies...")
+  
+  # create an index between flowlines and waterbodies 
+  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+  
+  # remove any flowlines that cross more than 1 waterbody
+  to_keep  <- flowlines[lengths(wb_index) == 0, ]
+  to_check <- flowlines[lengths(wb_index) != 0, ]
+  
+  # subset transects to the hy_ids in "to_check" set of flowlines
+  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+  
+  # check where the transects linestrings intersect with the waterbodies
+  trans_geos_check <- geos::as_geos_geometry(trans_check)
+  
+  message("Checking transects against waterbodies...")
+  trans_wb_index <- geos::geos_intersects_any(
+    trans_geos_check,  
+    wbs_geos[unlist(wb_index)]
+  )
+  
+  # within the transects lines that are on a flowline that crosses a waterbody, 
+  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  trans_keep <- trans_check[!trans_wb_index, ]
+  # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  
+  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  
+  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # have transects that are NOT in the waterbody
+  to_keep <- dplyr::bind_rows(to_keep, to_check)
+  
+  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # that were determined to be valid (are being kept)
+  check_ids <- unique(trans_check$tmp_id)
+  keep_ids <- unique(trans_keep$tmp_id)
+  
+  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  valid_flowlines <- flowlines$id %in% to_keep$id
+  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
   
-    # within the transects lines that are on a flowline that crosses a waterbody, 
-    # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
-    trans_keep <- trans_check[!trans_wb_index, ]
-    # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
-  
-    # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
-    to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
-    
-    # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
-    # have transects that are NOT in the waterbody
-    to_keep <- dplyr::bind_rows(to_keep, to_check)
-    
-    # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
-    # that were determined to be valid (are being kept)
-    check_ids <- unique(trans_check$tmp_id)
-    keep_ids <- unique(trans_keep$tmp_id)
-    
-    # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
-    # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
-    # - Keep original transects that are not on flowlines that intersect waterbodies AND 
-    # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
-    valid_flowlines <- flowlines$id %in% to_keep$id
-    valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
-                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
-    
-    # return alist of updated flowlines and transects 
-    return(
-      list(
-        "valid_flowlines" =  valid_flowlines,
-        "valid_transects" =  valid_transects
-      )
+  # return alist of updated flowlines and transects 
+  return(
+    list(
+      "valid_flowlines" =  valid_flowlines,
+      "valid_transects" =  valid_transects
     )
+  )
 }
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 72d87b9..c282421 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -15,6 +15,10 @@ DEM_URL      <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevat
 # Default setting is 50% of the original transect lines length (0.5)
 EXTENSION_PCT <- 0.5
 
+# Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R)
+COLLECT_META <- TRUE
+
+
 # # create the directory if it does NOT exist
 # if(!dir.exists(base_dir)) {
 #   message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}'))

From a5d6e05ea0ec6e6ecff35389809f29d773536139 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 26 Feb 2024 08:01:03 -0800
Subject: [PATCH 06/64] added COLLECT_META flag before setting the meta_path
 variable in cs runner

---
 runners/cs_runner/02_cs_pts.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 93c561b..8b2afbf 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -40,8 +40,12 @@ path_df <- align_files_by_vpu(
             by = "vpu"
             )
 
-# Local path to save CSVs of cross section meta data during each iteration
-meta_path <- "/local/path/to/save/cross_section_meta_data/"
+# Where should meta data CSVs be saved to? 
+if(COLLECT_META) {
+  # Local path to save CSVs of cross section meta data during each iteration
+  meta_path <- "/local/path/to/save/cross_section_meta_data/"
+  
+}
 
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.

From ad88c273733f21936390ccfac3cfdd5e5eb2a72e Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 28 Feb 2024 08:53:06 -0800
Subject: [PATCH 07/64] updated cross secction points generation to better
 rectify flat cross section points, also added a COLLECT_META and META_PATH in
 config_vars.R that if selected will save out changes made during cs pt
 rectification

---
 runners/cs_runner/01_transects.R |  3 ---
 runners/cs_runner/02_cs_pts.R    | 29 +++++++++++------------------
 runners/cs_runner/config_vars.R  |  5 +++++
 3 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 6bc0025..8216658 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -3,7 +3,6 @@ source("runners/cs_runner/config.R")
 
 # # # # load libraries
 # library(hydrofabric3D)
-# # library(terrainSliceR)
 # library(dplyr)
 # library(sf)
 
@@ -27,8 +26,6 @@ path_df <- align_files_by_vpu(
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
   
-  # i = 8
-  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 8b2afbf..2423cad 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -40,13 +40,6 @@ path_df <- align_files_by_vpu(
             by = "vpu"
             )
 
-# Where should meta data CSVs be saved to? 
-if(COLLECT_META) {
-  # Local path to save CSVs of cross section meta data during each iteration
-  meta_path <- "/local/path/to/save/cross_section_meta_data/"
-  
-}
-
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
@@ -175,15 +168,15 @@ for (i in 1:nrow(path_df)) {
     system.time({
     # cs_pts <- hydrofabric3D::rectify_flat_cs(
       fixed_pts <- hydrofabric3D::rectify_flat_cs(
-                  cs_pts         = cs_pts, 
-                  net            = flines,
-                  cs             = transects,
-                  points_per_cs  = NULL,
-                  min_pts_per_cs = 10,
-                  dem            = DEM_URL,
-                  scale          = EXTENSION_PCT,
-                  threshold      = 1,
-                  pct_threshold  = 0.99,
+                  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+                  net            = flines,    # original flowline network
+                  cs             = transects, # original transect lines
+                  points_per_cs  = NULL, 
+                  min_pts_per_cs = 10, # number of points per cross sections
+                  dem            = DEM_URL, # DEM to extract points from
+                  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+                  threshold      = 1,    # 1 meter from bottom
+                  pct_threshold  = 0.99, # rectify if 99% points are within 1 meter from the bottom
                   fix_ids        = FALSE
                   )
     })
@@ -545,8 +538,8 @@ for (i in 1:nrow(path_df)) {
     
     order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count)
     
-    readr::write_csv(meta_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_metadata.csv"))
-    readr::write_csv(order_df, paste0(meta_path, "nextgen_", VPU, "_cross_sections_streamorder.csv"))
+    readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
+    readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv"))
   }
   
   rm(fixed_pts)
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index c282421..8745b7a 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -18,6 +18,11 @@ EXTENSION_PCT <- 0.5
 # Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R)
 COLLECT_META <- TRUE
 
+# Where should meta data CSVs be saved to? 
+# Local path to save CSVs of cross section meta data during each iteration
+META_PATH <- '/Users/anguswatters/Desktop/cs_meta/'
+# META_PATH <-  "/local/path/to/save/cross_section_meta_data/"
+
 
 # # create the directory if it does NOT exist
 # if(!dir.exists(base_dir)) {

From 7bf195e7d68f436e56ce264e2f162c956d47dfc3 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 19 Mar 2024 15:38:06 -0700
Subject: [PATCH 08/64] added download script for FEMA 100 year flood fgb
 files, and related config var variables

---
 runners/cs_runner/01_transects.R     |    5 +-
 runners/cs_runner/02_cs_pts.R        | 1327 ++++++++++++++++++--------
 runners/cs_runner/config_vars.R      |   14 +
 runners/cs_runner/download_fema100.R |   50 +
 4 files changed, 988 insertions(+), 408 deletions(-)
 create mode 100644 runners/cs_runner/download_fema100.R

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 8216658..912d95d 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -5,6 +5,9 @@ source("runners/cs_runner/config.R")
 # library(hydrofabric3D)
 # library(dplyr)
 # library(sf)
+# install.packages("devtools")
+
+# devtools::install_github("anguswg-ucsb/hydrofabric3D")
 
 # transect bucket prefix
 transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
@@ -39,7 +42,7 @@ for(i in 1:nrow(path_df)) {
   
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-
+  
   # #  model attributes
   # model_attrs <- arrow::read_parquet(model_attr_path)
 
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 2423cad..dff9218 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -1,11 +1,15 @@
+
+###########################################################################################################################################
+################################################.     REDO EVERYTHING   #######################################################
+###########################################################################################################################################
+
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
 
 # # load libraries
-# library(hydrofabric3D)
-# # library(terrainSliceR)
-# library(dplyr)
-# library(sf)
+library(hydrofabric3D)
+library(dplyr)
+library(sf)
 
 # cross section bucket prefix
 cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/")
@@ -19,6 +23,7 @@ nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
 
 # paths to nextgen datasets
 transect_files <- list.files(transects_dir, full.names = FALSE)
+transect_files <- transect_files[!grepl("updated_", transect_files)]
 
 # string to fill in "cs_source" column in output datasets
 cs_source <- "hydrofabric3D"
@@ -31,20 +36,20 @@ ref_df <- data.frame(
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-              x    = nextgen_files,
-              y    = transect_files,
-              base = base_dir
-            ) %>% 
-          dplyr::left_join(
-            ref_df,
-            by = "vpu"
-            )
+  x    = nextgen_files,
+  y    = transect_files,
+  base = base_dir
+) %>% 
+  dplyr::left_join(
+    ref_df,
+    by = "vpu"
+  )
 
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 1:nrow(path_df)) {
-
+for (i in 20:nrow(path_df)) {
+  
   start <- Sys.time()
   
   # nextgen file and full path
@@ -67,386 +72,319 @@ for (i in 1:nrow(path_df)) {
           "'\n - transects: '", transect_file, "'", 
           "\n - waterbodies: '", ref_file, "'",
           "'\n - start time: '", start, "'"
-          )
-
+  )
+  
   ################### 
   
-    # read in transects data
-    transects <- sf::read_sf(transect_path)
+  # read in transects data
+  transects <- sf::read_sf(transect_path)
   
-    # read in nextgen data
-    flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
-    # read in waterbodies reference features layer
-    waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
-
-    ##### subset flowlines and transects to first 5 features for testing #####
-    # flines = dplyr::slice(flines, 1:5) 
-    # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
-    ##### #####
-    
-    # system.time({
-      # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
-      feature_subsets <- wb_intersects(flines, transects, waterbodies)
-    # })
-    
-    # Collect meta data on features and changes
-    if(COLLECT_META) {
-      
-      fline_count       <- nrow(flines)
-      transect_count    <- nrow(transects)
-      wb_count          <- nrow(waterbodies)
-      
-      fline_wb_count    <- sum(feature_subsets$valid_flowlines)
-      transect_wb_count <- sum(feature_subsets$valid_transects)
-    }
-    
-    # replace flowlines and transects objects with updated versions in "updated_features"
-    flines    <- flines[feature_subsets$valid_flowlines, ]
-    transects <- transects[feature_subsets$valid_transects, ]
-    
-    rm(waterbodies)
-    gc()
-    
-    start_cs_pts <- Sys.time()
-    message("Extracting cross section points (", start_cs_pts,")")    
-    
-    system.time({
+  # read in waterbodies reference features layer
+  waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+  
+  ##### subset flowlines and transects to first 5 features for testing #####
+  # flines = dplyr::slice(flines, 1:5) 
+  # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
+  ##### #####
   
+  # system.time({
+  # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+  feature_subsets <- wb_intersects(flines, transects, waterbodies)
+  # })
+  
+  # tmp <- transects %>% 
+  #   dplyr::slice(1:15000)
+  # tmp_flines <- flines %>% 
+  #   dplyr::filter(id %in% tmp$hy_id)
+  # mapview::mapview(tmp_flines, color = "dodgerblue") + mapview::mapview(tmp, color = "red") 
+  
+  # replace flowlines and transects objects with updated versions in "updated_features"
+  flines    <- flines[feature_subsets$valid_flowlines, ]
+  transects <- transects[feature_subsets$valid_transects, ]
+  
+  rm(waterbodies)
+  gc()
+  
+  #################################################################
+  ##### Temporary subsetting to speed local development up ########
+  #################################################################
+  
+  # flines <- 
+  #   flines %>% 
+  #   dplyr::group_by(order) %>% 
+  #   dplyr::slice(1:100) %>% 
+  #   dplyr::ungroup()
+  # # flines <- 
+  # #   flines %>% 
+  # #   dplyr::slice(1:2500) 
+  # 
+  # transects <- 
+  #   transects %>% 
+  #   # dplyr::filter(hy_id %in% unique(tmp$id)) 
+  #   dplyr::filter(hy_id %in% unique(flines$id)) 
+  
+  #################################################################
+  #################################################################
+  
+  start_cs_pts <- Sys.time()
+  message("Extracting cross section points (", start_cs_pts, ")")
+  # message("Extracting cross section points (", Sys.time(),")")    
+  
+  # STEP 1: Extract cs points from DEM
+  # system.time({
+    
     # get cross section point elevations
     cs_pts <- hydrofabric3D::cross_section_pts(
       cs             = transects,
       points_per_cs  = NULL,
       min_pts_per_cs = 10,
       dem            = DEM_URL
-      )
-    
-    })
-    
-    end_cs_pts <- Sys.time()
-    message("\n ---> Completed extraction of cross section points (", end_cs_pts,")")
-    
-    if(COLLECT_META) {
-      start_cs_pts_count <- nrow(cs_pts)
-    }
-    
-    # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2)
-    # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time)
-    
-    start_rectify <- Sys.time()
-    message("Rectifying cross section points (", start_rectify,")")
-    
-    # collect the hy_ids and number of stream orders in cs_pts
-    if(COLLECT_META) {
-    
-      cs_pts_ids       <- unique(cs_pts$hy_id)
-      start_cs_pts_ids <- length(cs_pts_ids)
-      
-      start_order_count <- 
-        flines %>% 
-        sf::st_drop_geometry() %>% 
-        dplyr::filter(id %in% cs_pts_ids) %>% 
-        dplyr::group_by(order) %>% 
-        dplyr::count() %>% 
-        tidyr::pivot_wider(names_from = order,  
-                         names_glue   = "start_order_{order}", 
-                         values_from  = n
-                         ) %>% 
-        dplyr::ungroup()
-    }
-    
-    # Remove any cross section that has ANY missing (NA) Z values. 
-    cs_pts <- 
-      cs_pts %>% 
-      # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
-      dplyr::group_by(hy_id, cs_id) %>% 
-      dplyr::filter(!any(is.na(Z))) %>% 
-      dplyr::ungroup()
-    # try to extend any cross sections that returned cross section points with 
-    # identical Z values within a certain threshold ("flat" cross sections)
+    )
     
-    system.time({
+  # })
+  
+  # system.time({
+  
+  # STEP 2:
+  # Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+  cs_pts <- 
+    cs_pts %>% 
+    # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
+    dplyr::group_by(hy_id, cs_id) %>% 
+    dplyr::filter(!any(is.na(Z))) %>% 
+    dplyr::ungroup() %>% 
+    hydrofabric3D::classify_points(pct_of_length_for_relief = 0.01)  
+  
+  # })
+  
+  # STEP 3: Try to rectify any no relief and invalid banks cross sections
+  system.time({
     # cs_pts <- hydrofabric3D::rectify_flat_cs(
-      fixed_pts <- hydrofabric3D::rectify_flat_cs(
-                  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
-                  net            = flines,    # original flowline network
-                  cs             = transects, # original transect lines
-                  points_per_cs  = NULL, 
-                  min_pts_per_cs = 10, # number of points per cross sections
-                  dem            = DEM_URL, # DEM to extract points from
-                  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
-                  threshold      = 1,    # 1 meter from bottom
-                  pct_threshold  = 0.99, # rectify if 99% points are within 1 meter from the bottom
-                  fix_ids        = FALSE
-                  )
-    })
-    
-    
-    end_rectify <- Sys.time()
-    rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2)
-    
-    message("\n ---> Completed rectifying cross section points (", end_rectify,")")
-    
-    if(COLLECT_META) {
-      rectify_cs_pts_count <- nrow(fixed_pts)
-      # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts
-      rectify_cs_pts_ids      <- unique(fixed_pts$hy_id)
-      rectify_cs_pts_id_count <- length(rectify_cs_pts_ids)
-
-      rectify_order_count <- 
-        flines %>% 
-        sf::st_drop_geometry() %>% 
-        dplyr::filter(id %in% rectify_cs_pts_ids) %>% 
-        dplyr::group_by(order) %>% 
-        dplyr::count() %>% 
-        tidyr::pivot_wider(names_from = order,  
-                           names_glue   = "rectify_order_{order}", 
-                           values_from  = n
-        ) %>% 
-        dplyr::ungroup()
-    }
-    
-    rm(cs_pts)
-    gc()
-    
-    message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ")
-    
-    # Remove any cross section that has ANY missing (NA) Z values. 
-    fixed_pts <- 
-      fixed_pts %>% 
-      # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
-      # dplyr::group_by(hy_id, cs_id) %>% 
-      # dplyr::filter(!any(is.na(Z))) %>% 
-      # dplyr::ungroup() %>% 
-      hydrofabric3D::add_tmp_id()
-    
-    # Number of cross section points after removing any cross sections that contain any NA Z values
-    if(COLLECT_META) {
-      cs_pts_na_removed_count <- nrow(fixed_pts)
-    }
-    
-    # Stash meta data about the points 
-    pts_meta <- 
-      fixed_pts %>% 
-      sf::st_drop_geometry() %>% 
-      dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended)
-    
-    message("Classifying cross section points...")
-    
-    # Classify points
-    fixed_pts <- hydrofabric3D::classify_points(fixed_pts)
+    fixed_pts <- hydrofabric3D::rectify_cs(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = flines,    # original flowline network
+      transects      = transects, # original transect lines
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_URL, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
+  })
+  # })
+  
+  # get a summary dataframe and print out details message
+  rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE)
+  rectify_summary <- 
+    rectify_summary %>% 
+    dplyr::mutate(
+      vpu = VPU
+    ) %>% 
+    dplyr::relocate(vpu, metric, value)
+  
+  readr::write_csv(rectify_summary, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
+  
+  # get the counts of each point type to add this data to the transects dataset
+  point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
+  
+  # # check the number of cross sections that were extended
+  # fixed_pts$is_extended %>% table()
+  message("Subsetting cross section points generated after extending transects...")
+  
+  # extract cross section points that have an "is_extended" value of TRUE
+  extended_pts <-
+    fixed_pts %>% 
+    dplyr::filter(is_extended) %>% 
+    hydrofabric3D::add_tmp_id()
+  # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
+  
+  # extract transects that have a "hy_id" in the "extended_pts" dataset
+  update_transects <-
+    transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+  
+  # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
+  if (nrow(update_transects) > 0) {
+    message("Updating ", nrow(update_transects), " transects")
     
-    # add meta data back to the points
-    fixed_pts <- 
-      fixed_pts %>% 
-      dplyr::left_join(
-        pts_meta,
-        by = c("hy_id", "cs_id", "pt_id")
-        # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended)
+    update_transects <-
+      update_transects %>%
+      # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
+      # apply extend_by_percent function to each transect line: 
+      hydrofabric3D:::extend_by_percent(
+        pct        = EXTENSION_PCT,
+        length_col = "cs_lengthm"
       )
     
-    message("Gathering count of point types per cross section...")
-    
-    # get the counts of each point type to add this data to the transects dataset
-    point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
-    
-    # # check the number of cross sections that were extended
-    # fixed_pts$is_extended %>% table()
-    message("Subsetting cross section points generated after extending transects...")
+    # # Number of transects being updated
+    # if(COLLECT_META) {
+    # extended_transects_count <- nrow(update_transects)
+    # extended_transects_ids   <- length(unique(update_transects$tmp_id))
+    # }
     
-    # extract cross section points that have an "is_extended" value of TRUE
-    extended_pts <-
-      fixed_pts %>% 
-      dplyr::filter(is_extended) %>% 
-      hydrofabric3D::add_tmp_id()
-      # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
-    
-    # extract transects that have a "hy_id" in the "extended_pts" dataset
-    update_transects <-
+    # start_uids <- hydrofabric3D:::get_unique_tmp_ids(cs_pts)
+    # end_uids <- hydrofabric3D:::get_unique_tmp_ids(fixed_pts)
+    # removed_tmp_ids <- start_uids[!start_uids %in% end_uids]
+    # transects %>% 
+    #   hydrofabric3D::add_tmp_id() %>% 
+    #   dplyr::filter(!tmp_id %in% removed_tmp_ids)
+  
+    # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+    # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
+    # and then replace with old transects with the "update_transects" 
+    out_transects <- 
       transects %>% 
       hydrofabric3D::add_tmp_id() %>% 
-      dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
-    
-    # Number of cross section points generated from extending transects and number of tmpIDs
-    if(COLLECT_META) {
-      extended_pts_count <- nrow(extended_pts)
-      extended_pts_ids   <- length(unique(extended_pts$tmp_id))
-      extended_transects_count <- nrow(update_transects)
-      extended_transects_ids   <- length(unique(update_transects$tmp_id))
-    }
-    
-    # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
-    if (nrow(update_transects) > 0) {
-      message("Updating ", nrow(update_transects), " transects")
-    
-      update_transects <-
-        update_transects %>%
-        # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
-        # apply extend_by_percent function to each transect line: 
-        hydrofabric3D:::extend_by_percent(
-          pct        = EXTENSION_PCT,
-          length_col = "cs_lengthm"
+      # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+      dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+      dplyr::bind_rows( # bring in the new updated extended transects
+        dplyr::mutate(
+          update_transects,
+          is_extended = TRUE
         )
-      
-      # # Number of transects being updated
-      # if(COLLECT_META) {
-        # extended_transects_count <- nrow(update_transects)
-        # extended_transects_ids   <- length(unique(update_transects$tmp_id))
-      # }
-      
-      # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
-      # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
-      # and then replace with old transects with the "update_transects" 
-      out_transects <- 
-        transects %>% 
-        hydrofabric3D::add_tmp_id() %>% 
-        # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-        # dplyr::filter(!tmp_id %in% )
-        dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
-        dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-        dplyr::bind_rows(
-          dplyr::mutate(
-            update_transects,
-            is_extended = TRUE
-            )
-          )  
-      
-        # dplyr::mutate(is_extended = FALSE) %>%
-        # dplyr::bind_rows(
-        #   dplyr::mutate(update_transects, is_extended = TRUE)
-        #   )  %>% 
-        # dplyr::select(-tmp_id) 
-      
-    } else {
-      
-      out_transects <- 
-        transects %>% 
-        hydrofabric3D::add_tmp_id() %>% 
-        dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
-        dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
-    }
+      )  
     
-      # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
-      if(COLLECT_META) {
-        output_transects_count <- nrow(out_transects)
-        output_transects_ids   <- length(unique(out_transects$tmp_id))
-      }
+  } else {
     
-      # finalize new transects
-      out_transects <- 
-        out_transects %>% 
-        dplyr::left_join(
-          point_type_counts,
-          by = c("hy_id", "cs_id")
-        ) %>% 
-        dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
-                      # sinuosity, 
-                      is_extended, 
-                      left_bank_count, right_bank_count, channel_count, bottom_count,
-                      geom)
-      
-      # -------------------------------------------------------------------
-      # ---- Re enumerate the transects & cross section points "cs_id" ----
-      # -------------------------------------------------------------------
-      
-      # make a dataframe that has a new_cs_id column that has 
-      # the cs_id renumbered to fill in any missing IDs,
-      # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
-      # The dataframe below will be used to join the "new_cs_id" with 
-      # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
-      renumbered_ids <-
-        fixed_pts %>% 
-        sf::st_drop_geometry() %>% 
-        # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
-        dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
-        dplyr::group_by(hy_id, cs_id) %>% 
-        dplyr::slice(1) %>% 
-        dplyr::ungroup() %>% 
-        hydrofabric3D::add_tmp_id() %>% 
-        dplyr::group_by(hy_id) %>% 
-        dplyr::mutate(
-          new_cs_id = 1:dplyr::n()
-        ) %>% 
-        dplyr::ungroup() %>% 
-        dplyr::select(new_cs_id, tmp_id)
-      
-      # Renumber the transects to have correct CS IDs
-      out_transects <- dplyr::left_join(
-                          hydrofabric3D::add_tmp_id(out_transects),
-                          renumbered_ids,
-                          by = "tmp_id"
-                        ) %>% 
-                        dplyr::select(-cs_id, -tmp_id) %>% 
-                        dplyr::select(hy_id, cs_source, cs_id = new_cs_id, 
-                                      cs_measure, cs_lengthm, 
-                                      # sinuosity,
-                                      is_extended, 
-                                      left_bank_count, right_bank_count, channel_count, bottom_count,
-                                      geometry = geom
-                                      )
-      
-      # # fline_lengths <-  sf::st_drop_geometry(flines) %>% 
-      # #   dplyr::filter(id %in% out_transects$hy_id) %>% 
-      # #   dplyr::mutate(lengthm = lengthkm * 1000) %>% 
-      # #   dplyr::select(hy_id = id, lengthm, lengthkm) 
-      # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% 
-      #   dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% 
-      #   dplyr::select(-sinuosity) %>% 
-      #   dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% 
-      #   dplyr::rename("geometry" = geom)
-      
-      # Renumber the cross sections points to have correct CS IDs
-      fixed_pts <- dplyr::left_join(
-                            hydrofabric3D::add_tmp_id(fixed_pts),
-                            renumbered_ids,
-                            by = "tmp_id"
-                          ) %>% 
-                        dplyr::select(-cs_id, -tmp_id) %>% 
-                        dplyr::rename(cs_id = new_cs_id)
-      
-    # mapview::mapview(transects, color = "red") +
-    #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
-    #  mapview::mapview(flines, color = "dodgerblue") 
-
-    ###################################### 
-
-    # ----------------------------------------------------------
-    # ---- Cross section points parquet to S3 ----
-    # ----------------------------------------------------------
-    
-    # classify the cross section points
-    fixed_pts <- 
-      fixed_pts %>% 
-      dplyr::mutate(
-        X = sf::st_coordinates(.)[,1],
-        Y = sf::st_coordinates(.)[,2]
-      ) %>%
-      sf::st_drop_geometry() %>% 
-      dplyr::select(
-        hy_id, cs_id, pt_id,
-        cs_lengthm,
-        relative_distance,
-        X, Y, Z,
-        class, point_type
-        )
-
-  # Drop point geometries, leaving just X, Y, Z values
-  fixed_pts <- sf::st_drop_geometry(fixed_pts)
-
+    out_transects <- 
+      transects %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+  }
+  
+  # finalize new transects
+  out_transects <- 
+    out_transects %>% 
+    dplyr::left_join(
+      point_type_counts,
+      by = c("hy_id", "cs_id")
+    ) %>% 
+    dplyr::left_join(
+      dplyr::ungroup(
+        dplyr::slice( 
+          dplyr::group_by(
+            dplyr::select(sf::st_drop_geometry(fixed_pts), 
+                          hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), 
+            hy_id, cs_id),
+          1)
+      ),
+      by = c("hy_id", "cs_id")
+    ) %>% 
+    dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
+                  # sinuosity, 
+                  is_extended, 
+                  left_bank_count, right_bank_count, channel_count, bottom_count, 
+                  bottom, left_bank, right_bank, valid_banks, has_relief,
+                  geom
+                  )
+  
+  # -------------------------------------------------------------------
+  # ---- Re enumerate the transects & cross section points "cs_id" ----
+  # -------------------------------------------------------------------
+  
+  # make a dataframe that has a new_cs_id column that has 
+  # the cs_id renumbered to fill in any missing IDs,
+  # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
+  # The dataframe below will be used to join the "new_cs_id" with 
+  # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
+  renumbered_ids <-
+    fixed_pts %>% 
+    sf::st_drop_geometry() %>% 
+    # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
+    dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
+    dplyr::group_by(hy_id, cs_id) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup() %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::group_by(hy_id) %>% 
+    dplyr::mutate(
+      new_cs_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(new_cs_id, tmp_id)
+  
+  # Renumber the transects to have correct CS IDs
+  out_transects <- dplyr::left_join(
+                      hydrofabric3D::add_tmp_id(out_transects),
+                      renumbered_ids,
+                      by = "tmp_id"
+                    ) %>% 
+                      dplyr::select(-cs_id, -tmp_id) %>% 
+                      dplyr::select(hy_id, cs_source, cs_id = new_cs_id, 
+                                    cs_measure, cs_lengthm, 
+                                    # sinuosity,
+                                    is_extended, 
+                                    left_bank_count, right_bank_count, channel_count, bottom_count, 
+                                    bottom, left_bank, right_bank, valid_banks, has_relief,
+                                    geometry = geom
+                      )
+  
+  # # fline_lengths <-  sf::st_drop_geometry(flines) %>% 
+  # #   dplyr::filter(id %in% out_transects$hy_id) %>% 
+  # #   dplyr::mutate(lengthm = lengthkm * 1000) %>% 
+  # #   dplyr::select(hy_id = id, lengthm, lengthkm) 
+  # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% 
+  #   dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% 
+  #   dplyr::select(-sinuosity) %>% 
+  #   dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% 
+  #   dplyr::rename("geometry" = geom)
+  
+  # Renumber the cross sections points to have correct CS IDs
+  fixed_pts <- dplyr::left_join(
+                    hydrofabric3D::add_tmp_id(fixed_pts),
+                    renumbered_ids,
+                    by = "tmp_id"
+                  ) %>% 
+                    dplyr::select(-cs_id, -tmp_id) %>% 
+                    dplyr::rename(cs_id = new_cs_id)
+  
+  # mapview::mapview(transects, color = "red") +
+  #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
+  #  mapview::mapview(flines, color = "dodgerblue") 
+  
+  ###################################### 
+  
+  # ----------------------------------------------------------
+  # ---- Cross section points parquet to S3 ----
+  # ----------------------------------------------------------
+  
+  # classify the cross section points
+  fixed_pts <-
+    fixed_pts %>% 
+    dplyr::mutate(
+      X = sf::st_coordinates(.)[,1],
+      Y = sf::st_coordinates(.)[,2]
+    ) %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(
+      hy_id, cs_id, pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    )
+  
+  # # Drop point geometries, leaving just X, Y, Z values
+  # fixed_pts <- sf::st_drop_geometry(fixed_pts)
+  
   # add Z_source column for source of elevation data
   fixed_pts <-
     fixed_pts %>%
     dplyr::mutate(
       Z_source = cs_source
-      ) %>%
-    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class)
-
-  # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
-  if(COLLECT_META) {
-    output_cs_pts_count      <- nrow(fixed_pts)
-    output_cs_pts_ids        <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id))
-    dropped_transects_count  <- transect_count - output_transects_count
-    }
+    ) %>%
+    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
   
   ###################################### 
   
@@ -464,7 +402,7 @@ for (i in 1:nrow(path_df)) {
     # transect_path
     updated_path
   )
- 
+  
   # command to copy transects geopackage to S3
   trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, 
                         ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
@@ -493,11 +431,11 @@ for (i in 1:nrow(path_df)) {
   
   # command to copy cross section points parquet to S3
   copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                          ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-
+                              ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+  
   message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
           paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
+                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
           "'\n==========================")
   
   system(copy_cs_pts_to_s3, intern = TRUE)
@@ -508,43 +446,618 @@ for (i in 1:nrow(path_df)) {
   message("- Completed at: ", end)
   message("==========================")
   
-  if(COLLECT_META) {
-    
-    meta_df <- data.frame(
-      vpu                 = VPU,
-      start               = as.character(start),
-      end                 = as.character(end),
-      start_cs_pts        = as.character(start_cs_pts),
-      end_cs_pts          = as.character(end_cs_pts),
-      start_rectify       = as.character(start_rectify),
-      end_rectify         = as.character(end_rectify),
-      fline_count         = fline_count,
-      transect_count      = transect_count,
-      wb_count            = wb_count,
-      fline_wb_count      = fline_wb_count,
-      transect_wb_count   = transect_wb_count,
-      start_cs_pts_count  = start_cs_pts_count,
-      start_cs_pts_ids    = start_cs_pts_ids,
-      rectify_cs_pts_count       = rectify_cs_pts_count,
-      rectify_cs_pts_ids         = rectify_cs_pts_id_count,
-      extended_transects_count   = extended_transects_count,
-      extended_transects_ids     = extended_transects_ids,
-      dropped_transects          = dropped_transects_count,
-      output_transects_count     = output_transects_count,
-      output_cs_pts_count        = output_cs_pts_count,
-      output_transects_ids       = output_transects_ids,
-      output_cs_pts_ids          = output_cs_pts_ids
-    )
-    
-    order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count)
-    
-    readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
-    readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv"))
-  }
-  
   rm(fixed_pts)
   gc()
   gc()
   
-  }
-
+}
+# 
+# transects
+# flines
+# ###########################################################################################################################################
+# ###########################################################################################################################################
+# 
+# # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+# source("runners/cs_runner/config.R")
+# 
+# # # load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+# 
+# # cross section bucket prefix
+# cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/")
+# # cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
+# 
+# # transect bucket prefix
+# transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+# 
+# # paths to nextgen datasets
+# nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
+# 
+# # paths to nextgen datasets
+# transect_files <- list.files(transects_dir, full.names = FALSE)
+# 
+# # string to fill in "cs_source" column in output datasets
+# cs_source <- "hydrofabric3D"
+# 
+# # reference features dataframe
+# ref_df <- data.frame(
+#   vpu      = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), 
+#   ref_file = ref_features
+# )
+# 
+# # ensure the files are in the same order and matched up by VPU
+# path_df <- align_files_by_vpu(
+#               x    = nextgen_files,
+#               y    = transect_files,
+#               base = base_dir
+#             ) %>% 
+#           dplyr::left_join(
+#             ref_df,
+#             by = "vpu"
+#             )
+# 
+# # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# # Save parquet locally and upload to specified S3 bucket
+# for (i in 1:nrow(path_df)) {
+# 
+#   # i = 8
+#   
+#   start <- Sys.time()
+#   
+#   # nextgen file and full path
+#   nextgen_file <- path_df$x[i]
+#   nextgen_path <- paste0(nextgen_dir, nextgen_file)
+#   
+#   # model attributes file and full path
+#   transect_file <- path_df$y[i]
+#   transect_path <- paste0(transects_dir, transect_file)
+#   
+#   # model attributes file and full path
+#   ref_file <- path_df$ref_file[i]
+#   ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
+#   
+#   # current VPU being processed
+#   VPU = path_df$vpu[i]
+#   
+#   message("Creating VPU ", VPU, 
+#           " cross section points:\n - flowpaths: '", nextgen_file,
+#           "'\n - transects: '", transect_file, "'", 
+#           "\n - waterbodies: '", ref_file, "'",
+#           "'\n - start time: '", start, "'"
+#           )
+# 
+#   ################### 
+#   
+#     # read in transects data
+#     transects <- sf::read_sf(transect_path)
+#   
+#     # read in nextgen data
+#     flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+#   
+#     # read in waterbodies reference features layer
+#     waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+# 
+#     ##### subset flowlines and transects to first 5 features for testing #####
+#     # flines = dplyr::slice(flines, 1:5) 
+#     # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
+#     ##### #####
+#     
+#     # system.time({
+#       # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+#       feature_subsets <- wb_intersects(flines, transects, waterbodies)
+#     # })
+#     
+#     # Collect meta data on features and changes
+#     if(COLLECT_META) {
+#       
+#       fline_count       <- nrow(flines)
+#       transect_count    <- nrow(transects)
+#       wb_count          <- nrow(waterbodies)
+#       
+#       fline_wb_count    <- sum(feature_subsets$valid_flowlines)
+#       transect_wb_count <- sum(feature_subsets$valid_transects)
+#     }
+#     
+#     # replace flowlines and transects objects with updated versions in "updated_features"
+#     flines    <- flines[feature_subsets$valid_flowlines, ]
+#     transects <- transects[feature_subsets$valid_transects, ]
+#     
+    # rm(waterbodies)
+    # gc()
+#     
+#     flines <- 
+#       flines %>% 
+#       dplyr::group_by(order) %>% 
+#       dplyr::slice(1:100) %>% 
+#       dplyr::ungroup()
+#     # flines <- 
+#     #   flines %>% 
+#     #   dplyr::slice(1:2500) 
+#     
+#     transects <- 
+#       transects %>% 
+#       # dplyr::filter(hy_id %in% unique(tmp$id)) 
+#       dplyr::filter(hy_id %in% unique(flines$id)) 
+#     
+#     
+#     start_cs_pts <- Sys.time()
+#     
+#     message("Extracting cross section points (", start_cs_pts,")")    
+#     
+#     system.time({
+#   
+#     # get cross section point elevations
+#     cs_pts <- hydrofabric3D::cross_section_pts(
+#       cs             = transects,
+#       points_per_cs  = NULL,
+#       min_pts_per_cs = 10,
+#       dem            = DEM_URL
+#       )
+#     
+#     })
+#     
+#     # Remove any cross section that has ANY missing (NA) Z values. 
+#     cs_pts2 <-
+#       cs_pts %>% 
+#       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
+#       dplyr::group_by(hy_id, cs_id) %>% 
+#       dplyr::filter(!any(is.na(Z))) %>% 
+#       dplyr::ungroup() 
+#     
+#     # Stash meta data about the points 
+#     pts_meta <- 
+#       cs_pts2 %>% 
+#       sf::st_drop_geometry() %>% 
+#       dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended)
+#     
+#     # classify the points
+#     cs_pts2 <- hydrofabric3D::classify_points(cs_pts2, pct_of_length_for_relief = 0.01)
+#     
+#     system.time({
+#       # cs_pts <- hydrofabric3D::rectify_flat_cs(
+#       fixed_pts <- hydrofabric3D::rectify_cs(
+#         cs_pts         = cs_pts2,    # cross section points generated from hydrofabric3D::cross_section_pts()
+#         net            = flines,    # original flowline network
+#         transects      = transects, # original transect lines
+#         points_per_cs  = NULL, 
+#         min_pts_per_cs = 10, # number of points per cross sections
+#         dem            = DEM_URL, # DEM to extract points from
+#         scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+#         pct_of_length_for_relief = 0.01, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+#         fix_ids = FALSE,
+#         verbose = TRUE
+#       )
+#     })
+#     
+#     rectify_summary <- hydrofabric3D::rectify_summary(cs_pts2, fixed_pts)
+#     
+#     end_cs_pts <- Sys.time()
+#     message("\n ---> Completed extraction of cross section points (", end_cs_pts,")")
+#     
+#     if(COLLECT_META) {
+#       start_cs_pts_count <- nrow(cs_pts)
+#     }
+#     
+#     # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2)
+#     # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time)
+#     
+#     start_rectify <- Sys.time()
+#     message("Rectifying cross section points (", start_rectify,")")
+#     
+#     # collect the hy_ids and number of stream orders in cs_pts
+#     if(COLLECT_META) {
+#     
+#       cs_pts_ids       <- unique(cs_pts$hy_id)
+#       start_cs_pts_ids <- length(cs_pts_ids)
+#       
+#       start_order_count <- 
+#         flines %>% 
+#         sf::st_drop_geometry() %>% 
+#         dplyr::filter(id %in% cs_pts_ids) %>% 
+#         dplyr::group_by(order) %>% 
+#         dplyr::count() %>% 
+#         tidyr::pivot_wider(names_from = order,  
+#                          names_glue   = "start_order_{order}", 
+#                          values_from  = n
+#                          ) %>% 
+#         dplyr::ungroup()
+#     }
+#     
+#     # Remove any cross section that has ANY missing (NA) Z values. 
+#     cs_pts <- 
+#       cs_pts %>% 
+#       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
+#       dplyr::group_by(hy_id, cs_id) %>% 
+#       dplyr::filter(!any(is.na(Z))) %>% 
+#       dplyr::ungroup()
+#     # try to extend any cross sections that returned cross section points with 
+#     # identical Z values within a certain threshold ("flat" cross sections)
+#     
+#     system.time({
+#     # cs_pts <- hydrofabric3D::rectify_flat_cs(
+#       fixed_pts <- hydrofabric3D::rectify_flat_cs(
+#                   cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+#                   net            = flines,    # original flowline network
+#                   cs             = transects, # original transect lines
+#                   points_per_cs  = NULL, 
+#                   min_pts_per_cs = 10, # number of points per cross sections
+#                   dem            = DEM_URL, # DEM to extract points from
+#                   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+#                   threshold      = 1,    # 1 meter from bottom
+#                   pct_threshold  = 0.99, # rectify if 99% points are within 1 meter from the bottom
+#                   fix_ids        = FALSE
+#                   )
+#     })
+#     
+#     
+#     end_rectify <- Sys.time()
+#     rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2)
+#     
+#     message("\n ---> Completed rectifying cross section points (", end_rectify,")")
+#     
+#     if(COLLECT_META) {
+#       rectify_cs_pts_count <- nrow(fixed_pts)
+#       # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts
+#       rectify_cs_pts_ids      <- unique(fixed_pts$hy_id)
+#       rectify_cs_pts_id_count <- length(rectify_cs_pts_ids)
+# 
+#       rectify_order_count <- 
+#         flines %>% 
+#         sf::st_drop_geometry() %>% 
+#         dplyr::filter(id %in% rectify_cs_pts_ids) %>% 
+#         dplyr::group_by(order) %>% 
+#         dplyr::count() %>% 
+#         tidyr::pivot_wider(names_from = order,  
+#                            names_glue   = "rectify_order_{order}", 
+#                            values_from  = n
+#         ) %>% 
+#         dplyr::ungroup()
+#     }
+#     
+#     rm(cs_pts)
+#     gc()
+#     
+#     message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ")
+#     
+#     # Remove any cross section that has ANY missing (NA) Z values. 
+#     fixed_pts <- 
+#       fixed_pts %>% 
+#       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
+#       # dplyr::group_by(hy_id, cs_id) %>% 
+#       # dplyr::filter(!any(is.na(Z))) %>% 
+#       # dplyr::ungroup() %>% 
+#       hydrofabric3D::add_tmp_id()
+#     
+#     # Number of cross section points after removing any cross sections that contain any NA Z values
+#     if(COLLECT_META) {
+#       cs_pts_na_removed_count <- nrow(fixed_pts)
+#     }
+#     
+#     # Stash meta data about the points 
+#     pts_meta <- 
+#       fixed_pts %>% 
+#       sf::st_drop_geometry() %>% 
+#       dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended)
+#     
+#     message("Classifying cross section points...")
+#     
+#     # # Classify points
+#     # fixed_pts <- hydrofabric3D::classify_points(fixed_pts)
+#     
+#     # add meta data back to the points
+#     fixed_pts <- 
+#       fixed_pts %>% 
+#       dplyr::select(-is_extended) %>% 
+#       dplyr::left_join(
+#         pts_meta,
+#         # dplyr::select(pts_meta, -is_extended),
+#         by = c("hy_id", "cs_id", "pt_id")
+#         # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended)
+#       )
+#     
+#     message("Gathering count of point types per cross section...")
+#     
+#     # get the counts of each point type to add this data to the transects dataset
+#     point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
+#     
+#     # # check the number of cross sections that were extended
+#     # fixed_pts$is_extended %>% table()
+#     message("Subsetting cross section points generated after extending transects...")
+#     
+#     # extract cross section points that have an "is_extended" value of TRUE
+#     extended_pts <-
+#       fixed_pts %>% 
+#       dplyr::filter(is_extended) %>% 
+#       hydrofabric3D::add_tmp_id()
+#       # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
+#     
+#     # extract transects that have a "hy_id" in the "extended_pts" dataset
+#     update_transects <-
+#       transects %>% 
+#       hydrofabric3D::add_tmp_id() %>% 
+#       dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#     
+#     # Number of cross section points generated from extending transects and number of tmpIDs
+#     if(COLLECT_META) {
+#       extended_pts_count <- nrow(extended_pts)
+#       extended_pts_ids   <- length(unique(extended_pts$tmp_id))
+#       extended_transects_count <- nrow(update_transects)
+#       extended_transects_ids   <- length(unique(update_transects$tmp_id))
+#     }
+#     
+#     # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
+#     if (nrow(update_transects) > 0) {
+#       message("Updating ", nrow(update_transects), " transects")
+#     
+#       update_transects <-
+#         update_transects %>%
+#         # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
+#         # apply extend_by_percent function to each transect line: 
+#         hydrofabric3D:::extend_by_percent(
+#           pct        = EXTENSION_PCT,
+#           length_col = "cs_lengthm"
+#         )
+#       
+#       # # Number of transects being updated
+#       # if(COLLECT_META) {
+#         # extended_transects_count <- nrow(update_transects)
+#         # extended_transects_ids   <- length(unique(update_transects$tmp_id))
+#       # }
+#       
+#       # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#       # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
+#       # and then replace with old transects with the "update_transects" 
+#       out_transects <- 
+#         transects %>% 
+#         hydrofabric3D::add_tmp_id() %>% 
+#         # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#         # dplyr::filter(!tmp_id %in% )
+#         dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+#         dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#         dplyr::bind_rows(
+#           dplyr::mutate(
+#             update_transects,
+#             is_extended = TRUE
+#             )
+#           )  
+#       
+#         # dplyr::mutate(is_extended = FALSE) %>%
+#         # dplyr::bind_rows(
+#         #   dplyr::mutate(update_transects, is_extended = TRUE)
+#         #   )  %>% 
+#         # dplyr::select(-tmp_id) 
+#       
+#     } else {
+#       
+#       out_transects <- 
+#         transects %>% 
+#         hydrofabric3D::add_tmp_id() %>% 
+#         dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+#         dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#     }
+#     
+#       # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
+#       if(COLLECT_META) {
+#         output_transects_count <- nrow(out_transects)
+#         output_transects_ids   <- length(unique(out_transects$tmp_id))
+#       }
+#     
+#       # finalize new transects
+#       out_transects <- 
+#         out_transects %>% 
+#         dplyr::left_join(
+#           point_type_counts,
+#           by = c("hy_id", "cs_id")
+#         ) %>% 
+#         dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
+#                       # sinuosity, 
+#                       is_extended, 
+#                       left_bank_count, right_bank_count, channel_count, bottom_count,
+#                       geom)
+#       
+#       # -------------------------------------------------------------------
+#       # ---- Re enumerate the transects & cross section points "cs_id" ----
+#       # -------------------------------------------------------------------
+#       
+#       # make a dataframe that has a new_cs_id column that has 
+#       # the cs_id renumbered to fill in any missing IDs,
+#       # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
+#       # The dataframe below will be used to join the "new_cs_id" with 
+#       # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
+#       renumbered_ids <-
+#         fixed_pts %>% 
+#         sf::st_drop_geometry() %>% 
+#         # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
+#         dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
+#         dplyr::group_by(hy_id, cs_id) %>% 
+#         dplyr::slice(1) %>% 
+#         dplyr::ungroup() %>% 
+#         hydrofabric3D::add_tmp_id() %>% 
+#         dplyr::group_by(hy_id) %>% 
+#         dplyr::mutate(
+#           new_cs_id = 1:dplyr::n()
+#         ) %>% 
+#         dplyr::ungroup() %>% 
+#         dplyr::select(new_cs_id, tmp_id)
+#       
+#       # Renumber the transects to have correct CS IDs
+#       out_transects <- dplyr::left_join(
+#                           hydrofabric3D::add_tmp_id(out_transects),
+#                           renumbered_ids,
+#                           by = "tmp_id"
+#                         ) %>% 
+#                         dplyr::select(-cs_id, -tmp_id) %>% 
+#                         dplyr::select(hy_id, cs_source, cs_id = new_cs_id, 
+#                                       cs_measure, cs_lengthm, 
+#                                       # sinuosity,
+#                                       is_extended, 
+#                                       left_bank_count, right_bank_count, channel_count, bottom_count,
+#                                       geometry = geom
+#                                       )
+#       
+#       # # fline_lengths <-  sf::st_drop_geometry(flines) %>% 
+#       # #   dplyr::filter(id %in% out_transects$hy_id) %>% 
+#       # #   dplyr::mutate(lengthm = lengthkm * 1000) %>% 
+#       # #   dplyr::select(hy_id = id, lengthm, lengthkm) 
+#       # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% 
+#       #   dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% 
+#       #   dplyr::select(-sinuosity) %>% 
+#       #   dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% 
+#       #   dplyr::rename("geometry" = geom)
+#       
+#       # Renumber the cross sections points to have correct CS IDs
+#       fixed_pts <- dplyr::left_join(
+#                             hydrofabric3D::add_tmp_id(fixed_pts),
+#                             renumbered_ids,
+#                             by = "tmp_id"
+#                           ) %>% 
+#                         dplyr::select(-cs_id, -tmp_id) %>% 
+#                         dplyr::rename(cs_id = new_cs_id)
+#       
+#     # mapview::mapview(transects, color = "red") +
+#     #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
+#     #  mapview::mapview(flines, color = "dodgerblue") 
+# 
+#     ###################################### 
+# 
+#     # ----------------------------------------------------------
+#     # ---- Cross section points parquet to S3 ----
+#     # ----------------------------------------------------------
+#     
+#     # classify the cross section points
+#     fixed_pts <- 
+#       fixed_pts %>% 
+#       dplyr::mutate(
+#         X = sf::st_coordinates(.)[,1],
+#         Y = sf::st_coordinates(.)[,2]
+#       ) %>%
+#       sf::st_drop_geometry() %>% 
+#       dplyr::select(
+#         hy_id, cs_id, pt_id,
+#         cs_lengthm,
+#         relative_distance,
+#         X, Y, Z,
+#         class, point_type
+#         )
+# 
+#   # Drop point geometries, leaving just X, Y, Z values
+#   fixed_pts <- sf::st_drop_geometry(fixed_pts)
+# 
+#   # add Z_source column for source of elevation data
+#   fixed_pts <-
+#     fixed_pts %>%
+#     dplyr::mutate(
+#       Z_source = cs_source
+#       ) %>%
+#     dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class)
+# 
+#   # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
+#   if(COLLECT_META) {
+#     output_cs_pts_count      <- nrow(fixed_pts)
+#     output_cs_pts_ids        <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id))
+#     dropped_transects_count  <- transect_count - output_transects_count
+#     }
+#   
+#   ###################################### 
+#   
+#   # ----------------------------------------------------------
+#   # ---- Re upload the updated transects geopackage to S3 ----
+#   # ----------------------------------------------------------
+#   updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path)
+#   
+#   ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
+#   message("Saving updated transects to:\n - filepath: '", updated_path, "'")
+#   
+#   # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
+#   sf::write_sf(
+#     out_transects,
+#     # transect_path
+#     updated_path
+#   )
+#  
+#   # command to copy transects geopackage to S3
+#   trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, 
+#                         ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+#   
+#   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
+#           trans_to_s3, 
+#           "'\n==========================")
+#   
+#   system(trans_to_s3, intern = TRUE)
+#   
+#   ###################################### 
+#   ###################################### 
+#   
+#   # ----------------------------------------------------------
+#   # ---- Upload the cross section points parquet to S3 ----
+#   # ----------------------------------------------------------
+#   
+#   # name of file and path to save transects gpkg too
+#   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
+#   out_path <- paste0(cs_pts_dir, out_file)
+#   
+#   message("Saving cross section points to:\n - filepath: '", out_path, "'")
+#   
+#   # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+#   arrow::write_parquet(fixed_pts, out_path)
+#   
+#   # command to copy cross section points parquet to S3
+#   copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
+#                           ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+# 
+#   message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
+#           paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
+#                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
+#           "'\n==========================")
+#   
+#   system(copy_cs_pts_to_s3, intern = TRUE)
+#   
+#   end <- Sys.time()
+#   
+#   message("Finished cross section point generation for VPU ", VPU)
+#   message("- Completed at: ", end)
+#   message("==========================")
+#   
+#   if(COLLECT_META) {
+#     
+#     meta_df <- data.frame(
+#       vpu                 = VPU,
+#       start               = as.character(start),
+#       end                 = as.character(end),
+#       start_cs_pts        = as.character(start_cs_pts),
+#       end_cs_pts          = as.character(end_cs_pts),
+#       start_rectify       = as.character(start_rectify),
+#       end_rectify         = as.character(end_rectify),
+#       fline_count         = fline_count,
+#       transect_count      = transect_count,
+#       wb_count            = wb_count,
+#       fline_wb_count      = fline_wb_count,
+#       transect_wb_count   = transect_wb_count,
+#       start_cs_pts_count  = start_cs_pts_count,
+#       start_cs_pts_ids    = start_cs_pts_ids,
+#       rectify_cs_pts_count       = rectify_cs_pts_count,
+#       rectify_cs_pts_ids         = rectify_cs_pts_id_count,
+#       extended_transects_count   = extended_transects_count,
+#       extended_transects_ids     = extended_transects_ids,
+#       dropped_transects          = dropped_transects_count,
+#       output_transects_count     = output_transects_count,
+#       output_cs_pts_count        = output_cs_pts_count,
+#       output_transects_ids       = output_transects_ids,
+#       output_cs_pts_ids          = output_cs_pts_ids
+#     )
+#     
+#     order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count)
+#     
+#     readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
+#     readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv"))
+#   }
+#   
+  # rm(fixed_pts)
+  # gc()
+  # gc()
+#   
+#   }
+# 
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 8745b7a..8e345d8 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -7,6 +7,14 @@ aws_profile  <- "angus-lynker"
 # name of S3 bucket
 s3_bucket    <- "s3://lynker-spatial/"
 
+# location of FEMA 100 year flood plain FGB files
+FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/"
+FEMA_S3_BUCKET_PREFIX <- "FEMA100/"
+FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX)
+
+# FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100")
+
 # DEM URL
 DEM_URL      <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
 
@@ -15,6 +23,11 @@ DEM_URL      <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevat
 # Default setting is 50% of the original transect lines length (0.5)
 EXTENSION_PCT <- 0.5
 
+# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
+# 1% of the cross sections length is the default value we are using 
+# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
+PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF = 0.01
+
 # Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R)
 COLLECT_META <- TRUE
 
@@ -24,6 +37,7 @@ META_PATH <- '/Users/anguswatters/Desktop/cs_meta/'
 # META_PATH <-  "/local/path/to/save/cross_section_meta_data/"
 
 
+
 # # create the directory if it does NOT exist
 # if(!dir.exists(base_dir)) {
 #   message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}'))
diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
new file mode 100644
index 0000000..f55e8d7
--- /dev/null
+++ b/runners/cs_runner/download_fema100.R
@@ -0,0 +1,50 @@
+# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "base_dir"
+# base_dir is defined within runners/workflow/root_dir.R
+
+# NOTE: The lynker-hydrofabric S3 bucket is private at the moment
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# create FEMA100/ directory if it does NOT exist
+if (!dir.exists(FEMA_FGB_PATH)) {
+  message(glue::glue('FEMA100/ directory does not exist...\nCreating directory: {FEMA_FGB_PATH}'))
+  dir.create(FEMA_FGB_PATH)
+}
+
+
+# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+fema_list_command <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', FEMA_S3_DIR, '" 
+            
+            # Regular expression pattern to match object keys
+            PATTERN=".fgb$"
+            
+            # AWS CLI command to list objects in the S3 bucket and use grep to filter them
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', aws_profile, ' | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# ---- Get nextgen geopackages ----
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
+
+# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, FEMA_BUCKET_KEYS)
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (key in FEMA_BUCKET_KEYS) {
+  local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
+  
+  if(!file.exists(local_save_path)) {
+    copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path)
+    
+    message("Copying S3 object:\n", local_save_path)
+    
+    # system(copy_cmd)
+    
+    message("Download '", key, "' complete!")
+    message("------------------")
+  }
+}
\ No newline at end of file

From 28be701d17830d0090a069ba82afb78b39e80181 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 21 Mar 2024 08:04:17 -0700
Subject: [PATCH 09/64] cleaned up 01_transects and 02_cs_pts runners to
 reflect newest methods in hydrofabric3D transect and cross section point
 generation

---
 runners/cs_runner/01_transects.R |  73 ++-
 runners/cs_runner/02_cs_pts.R    | 791 +++----------------------------
 2 files changed, 84 insertions(+), 780 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 912d95d..9c081e8 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -43,7 +43,7 @@ for(i in 1:nrow(path_df)) {
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
-  # #  model attributes
+  # # model attributes
   # model_attrs <- arrow::read_parquet(model_attr_path)
 
   # # join flowlines with model atttributes
@@ -60,15 +60,12 @@ for(i in 1:nrow(path_df)) {
     dplyr::mutate(
       bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
     ) %>%
-    # dplyr::mutate(
-    #   bf_width = 11 * eTW
-    # ) %>%
+    # dplyr::mutate( bf_width = 11 * eTW) %>%
     # dplyr::mutate( # if there are any NAs, use exp(0.700    + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width
     #   bf_width = dplyr::case_when(
     #     is.na(bf_width) ~ exp(0.700    + 0.365* log(tot_drainage_areasqkm)),
     #     TRUE            ~ bf_width
-    #   )
-    # ) %>%
+    #   )) %>%
     dplyr::select(
       hy_id = id,
       lengthkm,
@@ -80,29 +77,26 @@ for(i in 1:nrow(path_df)) {
   # flines$bf_width <- ifelse(is.na(flines$bf_width),  exp(0.700    + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width)
 
   time1 <- Sys.time()
-  
-  # system.time({
-    
-    # create transect lines
-    transects <- hydrofabric3D::cut_cross_sections(
-      net               = flines,                        # flowlines network
-      id                = "hy_id",                       # Unique feature ID
-      cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
-      # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
-      num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
-      smooth            = TRUE,                          # smooth lines
-      densify           = 3,                             # densify linestring points
-      rm_self_intersect = TRUE,                          # remove self intersecting transects
-      fix_braids        = FALSE,                         # whether to fix braided flowlines or not
-      #### Arguments used for when fix_braids = TRUE
-      # terminal_id       = NULL,
-      # braid_threshold   = NULL,
-      # version           = 2,
-      # braid_method      = "comid",
-      # precision         = 1,
-      add               = TRUE                           # whether to add back the original data
-    )
-  # })
+
+  # create transect lines
+  transects <- hydrofabric3D::cut_cross_sections(
+    net               = flines,                        # flowlines network
+    id                = "hy_id",                       # Unique feature ID
+    cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+    num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+    smooth            = TRUE,                          # smooth lines
+    densify           = 3,                             # densify linestring points
+    rm_self_intersect = TRUE,                          # remove self intersecting transects
+    fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+    #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+    # terminal_id       = NULL,
+    # braid_threshold   = NULL,
+    # version           = 2,
+    # braid_method      = "comid",
+    # precision         = 1,
+    add               = TRUE                           # whether to add back the original data
+  )
     
   time2 <- Sys.time()
   time_diff <- round(as.numeric(time2 - time1 ), 2)
@@ -120,24 +114,13 @@ for(i in 1:nrow(path_df)) {
     transects %>%
     dplyr::mutate(
       cs_source = net_source
-    ) %>%
-    dplyr::rename("cs_lengthm" = cs_widths)
+    )
+    # dplyr::rename("cs_lengthm" = cs_widths)
   
   # # add cs_source column and keep just the desired columns to save and upload to S3
-  # transects <-
-  #   transects %>%
-  #   dplyr::mutate(
-  #     cs_source = net_source
-  #   ) %>%
-  #   dplyr::select(
-  #     hy_id,
-  #     cs_source,
-  #     cs_id,
-  #     cs_measure,
-  #     cs_lengthm = cs_widths,
-  #     geometry
-  #   )
-  # tmp <- sf::read_sf(out_path)
+  # transects <- transects %>%
+  #   dplyr::mutate(cs_source = net_source) %>%
+  #   dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm = cs_widths, geometry)
 
   # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
   sf::write_sf(
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index dff9218..1f6faa8 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -1,7 +1,7 @@
 
-###########################################################################################################################################
-################################################.     REDO EVERYTHING   #######################################################
-###########################################################################################################################################
+##############################################################################################################################
+################################################     REDO EVERYTHING   #######################################################
+##############################################################################################################################
 
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
@@ -48,7 +48,7 @@ path_df <- align_files_by_vpu(
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 20:nrow(path_df)) {
+for (i in 1:nrow(path_df)) {
   
   start <- Sys.time()
   
@@ -84,99 +84,58 @@ for (i in 20:nrow(path_df)) {
   
   # read in waterbodies reference features layer
   waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
-  
-  ##### subset flowlines and transects to first 5 features for testing #####
-  # flines = dplyr::slice(flines, 1:5) 
-  # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
-  ##### #####
-  
-  # system.time({
+
   # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
   feature_subsets <- wb_intersects(flines, transects, waterbodies)
-  # })
-  
-  # tmp <- transects %>% 
-  #   dplyr::slice(1:15000)
-  # tmp_flines <- flines %>% 
-  #   dplyr::filter(id %in% tmp$hy_id)
-  # mapview::mapview(tmp_flines, color = "dodgerblue") + mapview::mapview(tmp, color = "red") 
-  
+
   # replace flowlines and transects objects with updated versions in "updated_features"
   flines    <- flines[feature_subsets$valid_flowlines, ]
   transects <- transects[feature_subsets$valid_transects, ]
   
-  rm(waterbodies)
-  gc()
-  
-  #################################################################
-  ##### Temporary subsetting to speed local development up ########
-  #################################################################
-  
-  # flines <- 
-  #   flines %>% 
-  #   dplyr::group_by(order) %>% 
-  #   dplyr::slice(1:100) %>% 
-  #   dplyr::ungroup()
-  # # flines <- 
-  # #   flines %>% 
-  # #   dplyr::slice(1:2500) 
-  # 
-  # transects <- 
-  #   transects %>% 
-  #   # dplyr::filter(hy_id %in% unique(tmp$id)) 
-  #   dplyr::filter(hy_id %in% unique(flines$id)) 
-  
-  #################################################################
-  #################################################################
-  
   start_cs_pts <- Sys.time()
   message("Extracting cross section points (", start_cs_pts, ")")
-  # message("Extracting cross section points (", Sys.time(),")")    
-  
-  # STEP 1: Extract cs points from DEM
-  # system.time({
-    
-    # get cross section point elevations
-    cs_pts <- hydrofabric3D::cross_section_pts(
-      cs             = transects,
-      points_per_cs  = NULL,
-      min_pts_per_cs = 10,
-      dem            = DEM_URL
-    )
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 1: Extract cs points from DEM ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # get cross section point elevations
+  cs_pts <- hydrofabric3D::cross_section_pts(
+    cs             = transects,
+    points_per_cs  = NULL,
+    min_pts_per_cs = 10,
+    dem            = DEM_URL
+  )
     
-  # })
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
+  # ----------------------------------------------------------------------------------------------------------------
   
-  # system.time({
-  
-  # STEP 2:
-  # Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+  # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
   cs_pts <- 
     cs_pts %>% 
-    # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
     dplyr::group_by(hy_id, cs_id) %>% 
     dplyr::filter(!any(is.na(Z))) %>% 
     dplyr::ungroup() %>% 
-    hydrofabric3D::classify_points(pct_of_length_for_relief = 0.01)  
-  
-  # })
-  
-  # STEP 3: Try to rectify any no relief and invalid banks cross sections
-  system.time({
-    # cs_pts <- hydrofabric3D::rectify_flat_cs(
-    fixed_pts <- hydrofabric3D::rectify_cs(
-      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
-      net            = flines,    # original flowline network
-      transects      = transects, # original transect lines
-      points_per_cs  = NULL, 
-      min_pts_per_cs = 10, # number of points per cross sections
-      dem            = DEM_URL, # DEM to extract points from
-      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
-      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
-      fix_ids = FALSE,
-      verbose = TRUE
-    )
-  })
-  # })
+    hydrofabric3D::classify_points(pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF)  
+
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more
+  fixed_pts <- hydrofabric3D::rectify_cs(
+  # cs_pts <- hydrofabric3D::rectify_flat_cs(
+    cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+    net            = flines,    # original flowline network
+    transects      = transects, # original transect lines
+    points_per_cs  = NULL, 
+    min_pts_per_cs = 10, # number of points per cross sections
+    dem            = DEM_URL, # DEM to extract points from
+    scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+    fix_ids = FALSE,
+    verbose = TRUE
+  )
   
   # get a summary dataframe and print out details message
   rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE)
@@ -189,6 +148,10 @@ for (i in 20:nrow(path_df)) {
   
   readr::write_csv(rectify_summary, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
   
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 4: Update transects with extended transects (if exists) ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
   # get the counts of each point type to add this data to the transects dataset
   point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
   
@@ -201,7 +164,6 @@ for (i in 20:nrow(path_df)) {
     fixed_pts %>% 
     dplyr::filter(is_extended) %>% 
     hydrofabric3D::add_tmp_id()
-  # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
   
   # extract transects that have a "hy_id" in the "extended_pts" dataset
   update_transects <-
@@ -215,25 +177,11 @@ for (i in 20:nrow(path_df)) {
     
     update_transects <-
       update_transects %>%
-      # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
       # apply extend_by_percent function to each transect line: 
       hydrofabric3D:::extend_by_percent(
         pct        = EXTENSION_PCT,
         length_col = "cs_lengthm"
       )
-    
-    # # Number of transects being updated
-    # if(COLLECT_META) {
-    # extended_transects_count <- nrow(update_transects)
-    # extended_transects_ids   <- length(unique(update_transects$tmp_id))
-    # }
-    
-    # start_uids <- hydrofabric3D:::get_unique_tmp_ids(cs_pts)
-    # end_uids <- hydrofabric3D:::get_unique_tmp_ids(fixed_pts)
-    # removed_tmp_ids <- start_uids[!start_uids %in% end_uids]
-    # transects %>% 
-    #   hydrofabric3D::add_tmp_id() %>% 
-    #   dplyr::filter(!tmp_id %in% removed_tmp_ids)
   
     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
     # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
@@ -286,9 +234,9 @@ for (i in 20:nrow(path_df)) {
                   geom
                   )
   
-  # -------------------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   # ---- Re enumerate the transects & cross section points "cs_id" ----
-  # -------------------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   
   # make a dataframe that has a new_cs_id column that has 
   # the cs_id renumbered to fill in any missing IDs,
@@ -327,16 +275,6 @@ for (i in 20:nrow(path_df)) {
                                     geometry = geom
                       )
   
-  # # fline_lengths <-  sf::st_drop_geometry(flines) %>% 
-  # #   dplyr::filter(id %in% out_transects$hy_id) %>% 
-  # #   dplyr::mutate(lengthm = lengthkm * 1000) %>% 
-  # #   dplyr::select(hy_id = id, lengthm, lengthkm) 
-  # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% 
-  #   dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% 
-  #   dplyr::select(-sinuosity) %>% 
-  #   dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% 
-  #   dplyr::rename("geometry" = geom)
-  
   # Renumber the cross sections points to have correct CS IDs
   fixed_pts <- dplyr::left_join(
                     hydrofabric3D::add_tmp_id(fixed_pts),
@@ -346,15 +284,11 @@ for (i in 20:nrow(path_df)) {
                     dplyr::select(-cs_id, -tmp_id) %>% 
                     dplyr::rename(cs_id = new_cs_id)
   
-  # mapview::mapview(transects, color = "red") +
-  #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
-  #  mapview::mapview(flines, color = "dodgerblue") 
-  
   ###################################### 
   
-  # ----------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   # ---- Cross section points parquet to S3 ----
-  # ----------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   
   # classify the cross section points
   fixed_pts <-
@@ -386,11 +320,11 @@ for (i in 20:nrow(path_df)) {
                     class, point_type, 
                     bottom, left_bank, right_bank, valid_banks, has_relief)
   
-  ###################################### 
+  ############################################################################## 
   
-  # ----------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   # ---- Re upload the updated transects geopackage to S3 ----
-  # ----------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path)
   
   ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
@@ -413,12 +347,11 @@ for (i in 20:nrow(path_df)) {
   
   system(trans_to_s3, intern = TRUE)
   
-  ###################################### 
-  ###################################### 
+  ############################################################################## 
   
-  # ----------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   # ---- Upload the cross section points parquet to S3 ----
-  # ----------------------------------------------------------
+  # ----------------------------------------------------------------------------------------------------------------
   
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
@@ -446,618 +379,6 @@ for (i in 20:nrow(path_df)) {
   message("- Completed at: ", end)
   message("==========================")
   
-  rm(fixed_pts)
-  gc()
-  gc()
-  
 }
-# 
-# transects
-# flines
-# ###########################################################################################################################################
+
 # ###########################################################################################################################################
-# 
-# # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-# source("runners/cs_runner/config.R")
-# 
-# # # load libraries
-# library(hydrofabric3D)
-# library(dplyr)
-# library(sf)
-# 
-# # cross section bucket prefix
-# cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/")
-# # cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
-# 
-# # transect bucket prefix
-# transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
-# 
-# # paths to nextgen datasets
-# nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
-# 
-# # paths to nextgen datasets
-# transect_files <- list.files(transects_dir, full.names = FALSE)
-# 
-# # string to fill in "cs_source" column in output datasets
-# cs_source <- "hydrofabric3D"
-# 
-# # reference features dataframe
-# ref_df <- data.frame(
-#   vpu      = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), 
-#   ref_file = ref_features
-# )
-# 
-# # ensure the files are in the same order and matched up by VPU
-# path_df <- align_files_by_vpu(
-#               x    = nextgen_files,
-#               y    = transect_files,
-#               base = base_dir
-#             ) %>% 
-#           dplyr::left_join(
-#             ref_df,
-#             by = "vpu"
-#             )
-# 
-# # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
-# # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
-# # Save parquet locally and upload to specified S3 bucket
-# for (i in 1:nrow(path_df)) {
-# 
-#   # i = 8
-#   
-#   start <- Sys.time()
-#   
-#   # nextgen file and full path
-#   nextgen_file <- path_df$x[i]
-#   nextgen_path <- paste0(nextgen_dir, nextgen_file)
-#   
-#   # model attributes file and full path
-#   transect_file <- path_df$y[i]
-#   transect_path <- paste0(transects_dir, transect_file)
-#   
-#   # model attributes file and full path
-#   ref_file <- path_df$ref_file[i]
-#   ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
-#   
-#   # current VPU being processed
-#   VPU = path_df$vpu[i]
-#   
-#   message("Creating VPU ", VPU, 
-#           " cross section points:\n - flowpaths: '", nextgen_file,
-#           "'\n - transects: '", transect_file, "'", 
-#           "\n - waterbodies: '", ref_file, "'",
-#           "'\n - start time: '", start, "'"
-#           )
-# 
-#   ################### 
-#   
-#     # read in transects data
-#     transects <- sf::read_sf(transect_path)
-#   
-#     # read in nextgen data
-#     flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-#   
-#     # read in waterbodies reference features layer
-#     waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
-# 
-#     ##### subset flowlines and transects to first 5 features for testing #####
-#     # flines = dplyr::slice(flines, 1:5) 
-#     # transects = dplyr::filter(transects, hy_id %in% unique(flines$id))
-#     ##### #####
-#     
-#     # system.time({
-#       # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
-#       feature_subsets <- wb_intersects(flines, transects, waterbodies)
-#     # })
-#     
-#     # Collect meta data on features and changes
-#     if(COLLECT_META) {
-#       
-#       fline_count       <- nrow(flines)
-#       transect_count    <- nrow(transects)
-#       wb_count          <- nrow(waterbodies)
-#       
-#       fline_wb_count    <- sum(feature_subsets$valid_flowlines)
-#       transect_wb_count <- sum(feature_subsets$valid_transects)
-#     }
-#     
-#     # replace flowlines and transects objects with updated versions in "updated_features"
-#     flines    <- flines[feature_subsets$valid_flowlines, ]
-#     transects <- transects[feature_subsets$valid_transects, ]
-#     
-    # rm(waterbodies)
-    # gc()
-#     
-#     flines <- 
-#       flines %>% 
-#       dplyr::group_by(order) %>% 
-#       dplyr::slice(1:100) %>% 
-#       dplyr::ungroup()
-#     # flines <- 
-#     #   flines %>% 
-#     #   dplyr::slice(1:2500) 
-#     
-#     transects <- 
-#       transects %>% 
-#       # dplyr::filter(hy_id %in% unique(tmp$id)) 
-#       dplyr::filter(hy_id %in% unique(flines$id)) 
-#     
-#     
-#     start_cs_pts <- Sys.time()
-#     
-#     message("Extracting cross section points (", start_cs_pts,")")    
-#     
-#     system.time({
-#   
-#     # get cross section point elevations
-#     cs_pts <- hydrofabric3D::cross_section_pts(
-#       cs             = transects,
-#       points_per_cs  = NULL,
-#       min_pts_per_cs = 10,
-#       dem            = DEM_URL
-#       )
-#     
-#     })
-#     
-#     # Remove any cross section that has ANY missing (NA) Z values. 
-#     cs_pts2 <-
-#       cs_pts %>% 
-#       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
-#       dplyr::group_by(hy_id, cs_id) %>% 
-#       dplyr::filter(!any(is.na(Z))) %>% 
-#       dplyr::ungroup() 
-#     
-#     # Stash meta data about the points 
-#     pts_meta <- 
-#       cs_pts2 %>% 
-#       sf::st_drop_geometry() %>% 
-#       dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended)
-#     
-#     # classify the points
-#     cs_pts2 <- hydrofabric3D::classify_points(cs_pts2, pct_of_length_for_relief = 0.01)
-#     
-#     system.time({
-#       # cs_pts <- hydrofabric3D::rectify_flat_cs(
-#       fixed_pts <- hydrofabric3D::rectify_cs(
-#         cs_pts         = cs_pts2,    # cross section points generated from hydrofabric3D::cross_section_pts()
-#         net            = flines,    # original flowline network
-#         transects      = transects, # original transect lines
-#         points_per_cs  = NULL, 
-#         min_pts_per_cs = 10, # number of points per cross sections
-#         dem            = DEM_URL, # DEM to extract points from
-#         scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
-#         pct_of_length_for_relief = 0.01, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
-#         fix_ids = FALSE,
-#         verbose = TRUE
-#       )
-#     })
-#     
-#     rectify_summary <- hydrofabric3D::rectify_summary(cs_pts2, fixed_pts)
-#     
-#     end_cs_pts <- Sys.time()
-#     message("\n ---> Completed extraction of cross section points (", end_cs_pts,")")
-#     
-#     if(COLLECT_META) {
-#       start_cs_pts_count <- nrow(cs_pts)
-#     }
-#     
-#     # cs_pts_time <- round(as.numeric(end_cs_pts - start_cs_pts ), 2)
-#     # message("\n\n ---> Cross section point elevations processed in ", cs_pts_time)
-#     
-#     start_rectify <- Sys.time()
-#     message("Rectifying cross section points (", start_rectify,")")
-#     
-#     # collect the hy_ids and number of stream orders in cs_pts
-#     if(COLLECT_META) {
-#     
-#       cs_pts_ids       <- unique(cs_pts$hy_id)
-#       start_cs_pts_ids <- length(cs_pts_ids)
-#       
-#       start_order_count <- 
-#         flines %>% 
-#         sf::st_drop_geometry() %>% 
-#         dplyr::filter(id %in% cs_pts_ids) %>% 
-#         dplyr::group_by(order) %>% 
-#         dplyr::count() %>% 
-#         tidyr::pivot_wider(names_from = order,  
-#                          names_glue   = "start_order_{order}", 
-#                          values_from  = n
-#                          ) %>% 
-#         dplyr::ungroup()
-#     }
-#     
-#     # Remove any cross section that has ANY missing (NA) Z values. 
-#     cs_pts <- 
-#       cs_pts %>% 
-#       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
-#       dplyr::group_by(hy_id, cs_id) %>% 
-#       dplyr::filter(!any(is.na(Z))) %>% 
-#       dplyr::ungroup()
-#     # try to extend any cross sections that returned cross section points with 
-#     # identical Z values within a certain threshold ("flat" cross sections)
-#     
-#     system.time({
-#     # cs_pts <- hydrofabric3D::rectify_flat_cs(
-#       fixed_pts <- hydrofabric3D::rectify_flat_cs(
-#                   cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
-#                   net            = flines,    # original flowline network
-#                   cs             = transects, # original transect lines
-#                   points_per_cs  = NULL, 
-#                   min_pts_per_cs = 10, # number of points per cross sections
-#                   dem            = DEM_URL, # DEM to extract points from
-#                   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
-#                   threshold      = 1,    # 1 meter from bottom
-#                   pct_threshold  = 0.99, # rectify if 99% points are within 1 meter from the bottom
-#                   fix_ids        = FALSE
-#                   )
-#     })
-#     
-#     
-#     end_rectify <- Sys.time()
-#     rectify_time <- round(as.numeric(end_rectify - start_rectify ), 2)
-#     
-#     message("\n ---> Completed rectifying cross section points (", end_rectify,")")
-#     
-#     if(COLLECT_META) {
-#       rectify_cs_pts_count <- nrow(fixed_pts)
-#       # collect the hy_ids and number of stream orders in the RECTIFIED cs_pts
-#       rectify_cs_pts_ids      <- unique(fixed_pts$hy_id)
-#       rectify_cs_pts_id_count <- length(rectify_cs_pts_ids)
-# 
-#       rectify_order_count <- 
-#         flines %>% 
-#         sf::st_drop_geometry() %>% 
-#         dplyr::filter(id %in% rectify_cs_pts_ids) %>% 
-#         dplyr::group_by(order) %>% 
-#         dplyr::count() %>% 
-#         tidyr::pivot_wider(names_from = order,  
-#                            names_glue   = "rectify_order_{order}", 
-#                            values_from  = n
-#         ) %>% 
-#         dplyr::ungroup()
-#     }
-#     
-#     rm(cs_pts)
-#     gc()
-#     
-#     message("\n\n ---> Cross section points rectified in ", rectify_time, " (seconds?) ")
-#     
-#     # Remove any cross section that has ANY missing (NA) Z values. 
-#     fixed_pts <- 
-#       fixed_pts %>% 
-#       # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
-#       # dplyr::group_by(hy_id, cs_id) %>% 
-#       # dplyr::filter(!any(is.na(Z))) %>% 
-#       # dplyr::ungroup() %>% 
-#       hydrofabric3D::add_tmp_id()
-#     
-#     # Number of cross section points after removing any cross sections that contain any NA Z values
-#     if(COLLECT_META) {
-#       cs_pts_na_removed_count <- nrow(fixed_pts)
-#     }
-#     
-#     # Stash meta data about the points 
-#     pts_meta <- 
-#       fixed_pts %>% 
-#       sf::st_drop_geometry() %>% 
-#       dplyr::select(hy_id, cs_id, pt_id, cs_measure, is_extended)
-#     
-#     message("Classifying cross section points...")
-#     
-#     # # Classify points
-#     # fixed_pts <- hydrofabric3D::classify_points(fixed_pts)
-#     
-#     # add meta data back to the points
-#     fixed_pts <- 
-#       fixed_pts %>% 
-#       dplyr::select(-is_extended) %>% 
-#       dplyr::left_join(
-#         pts_meta,
-#         # dplyr::select(pts_meta, -is_extended),
-#         by = c("hy_id", "cs_id", "pt_id")
-#         # dplyr::select(pts_meta, hy_id, cs_id, pt_id, cs_measure, is_extended)
-#       )
-#     
-#     message("Gathering count of point types per cross section...")
-#     
-#     # get the counts of each point type to add this data to the transects dataset
-#     point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
-#     
-#     # # check the number of cross sections that were extended
-#     # fixed_pts$is_extended %>% table()
-#     message("Subsetting cross section points generated after extending transects...")
-#     
-#     # extract cross section points that have an "is_extended" value of TRUE
-#     extended_pts <-
-#       fixed_pts %>% 
-#       dplyr::filter(is_extended) %>% 
-#       hydrofabric3D::add_tmp_id()
-#       # dplyr::mutate(tmp_id = paste0(hy_id, "_", cs_id)) 
-#     
-#     # extract transects that have a "hy_id" in the "extended_pts" dataset
-#     update_transects <-
-#       transects %>% 
-#       hydrofabric3D::add_tmp_id() %>% 
-#       dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
-#     
-#     # Number of cross section points generated from extending transects and number of tmpIDs
-#     if(COLLECT_META) {
-#       extended_pts_count <- nrow(extended_pts)
-#       extended_pts_ids   <- length(unique(extended_pts$tmp_id))
-#       extended_transects_count <- nrow(update_transects)
-#       extended_transects_ids   <- length(unique(update_transects$tmp_id))
-#     }
-#     
-#     # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
-#     if (nrow(update_transects) > 0) {
-#       message("Updating ", nrow(update_transects), " transects")
-#     
-#       update_transects <-
-#         update_transects %>%
-#         # dplyr::filter(hy_id %in% unique(extended_pts$hy_id)) %>% 
-#         # apply extend_by_percent function to each transect line: 
-#         hydrofabric3D:::extend_by_percent(
-#           pct        = EXTENSION_PCT,
-#           length_col = "cs_lengthm"
-#         )
-#       
-#       # # Number of transects being updated
-#       # if(COLLECT_META) {
-#         # extended_transects_count <- nrow(update_transects)
-#         # extended_transects_ids   <- length(unique(update_transects$tmp_id))
-#       # }
-#       
-#       # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
-#       # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
-#       # and then replace with old transects with the "update_transects" 
-#       out_transects <- 
-#         transects %>% 
-#         hydrofabric3D::add_tmp_id() %>% 
-#         # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-#         # dplyr::filter(!tmp_id %in% )
-#         dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
-#         dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-#         dplyr::bind_rows(
-#           dplyr::mutate(
-#             update_transects,
-#             is_extended = TRUE
-#             )
-#           )  
-#       
-#         # dplyr::mutate(is_extended = FALSE) %>%
-#         # dplyr::bind_rows(
-#         #   dplyr::mutate(update_transects, is_extended = TRUE)
-#         #   )  %>% 
-#         # dplyr::select(-tmp_id) 
-#       
-#     } else {
-#       
-#       out_transects <- 
-#         transects %>% 
-#         hydrofabric3D::add_tmp_id() %>% 
-#         dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
-#         dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
-#     }
-#     
-#       # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
-#       if(COLLECT_META) {
-#         output_transects_count <- nrow(out_transects)
-#         output_transects_ids   <- length(unique(out_transects$tmp_id))
-#       }
-#     
-#       # finalize new transects
-#       out_transects <- 
-#         out_transects %>% 
-#         dplyr::left_join(
-#           point_type_counts,
-#           by = c("hy_id", "cs_id")
-#         ) %>% 
-#         dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
-#                       # sinuosity, 
-#                       is_extended, 
-#                       left_bank_count, right_bank_count, channel_count, bottom_count,
-#                       geom)
-#       
-#       # -------------------------------------------------------------------
-#       # ---- Re enumerate the transects & cross section points "cs_id" ----
-#       # -------------------------------------------------------------------
-#       
-#       # make a dataframe that has a new_cs_id column that has 
-#       # the cs_id renumbered to fill in any missing IDs,
-#       # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
-#       # The dataframe below will be used to join the "new_cs_id" with 
-#       # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
-#       renumbered_ids <-
-#         fixed_pts %>% 
-#         sf::st_drop_geometry() %>% 
-#         # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
-#         dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
-#         dplyr::group_by(hy_id, cs_id) %>% 
-#         dplyr::slice(1) %>% 
-#         dplyr::ungroup() %>% 
-#         hydrofabric3D::add_tmp_id() %>% 
-#         dplyr::group_by(hy_id) %>% 
-#         dplyr::mutate(
-#           new_cs_id = 1:dplyr::n()
-#         ) %>% 
-#         dplyr::ungroup() %>% 
-#         dplyr::select(new_cs_id, tmp_id)
-#       
-#       # Renumber the transects to have correct CS IDs
-#       out_transects <- dplyr::left_join(
-#                           hydrofabric3D::add_tmp_id(out_transects),
-#                           renumbered_ids,
-#                           by = "tmp_id"
-#                         ) %>% 
-#                         dplyr::select(-cs_id, -tmp_id) %>% 
-#                         dplyr::select(hy_id, cs_source, cs_id = new_cs_id, 
-#                                       cs_measure, cs_lengthm, 
-#                                       # sinuosity,
-#                                       is_extended, 
-#                                       left_bank_count, right_bank_count, channel_count, bottom_count,
-#                                       geometry = geom
-#                                       )
-#       
-#       # # fline_lengths <-  sf::st_drop_geometry(flines) %>% 
-#       # #   dplyr::filter(id %in% out_transects$hy_id) %>% 
-#       # #   dplyr::mutate(lengthm = lengthkm * 1000) %>% 
-#       # #   dplyr::select(hy_id = id, lengthm, lengthkm) 
-#       # tmp <- dplyr::left_join( out_transects, fline_lengths, by = "hy_id") %>% 
-#       #   dplyr::mutate(ds_distance = (cs_measure * lengthm) / 100) %>% 
-#       #   dplyr::select(-sinuosity) %>% 
-#       #   dplyr::relocate(hy_id, cs_id, cs_measure, lengthm, ds_distance, lengthkm) %>% 
-#       #   dplyr::rename("geometry" = geom)
-#       
-#       # Renumber the cross sections points to have correct CS IDs
-#       fixed_pts <- dplyr::left_join(
-#                             hydrofabric3D::add_tmp_id(fixed_pts),
-#                             renumbered_ids,
-#                             by = "tmp_id"
-#                           ) %>% 
-#                         dplyr::select(-cs_id, -tmp_id) %>% 
-#                         dplyr::rename(cs_id = new_cs_id)
-#       
-#     # mapview::mapview(transects, color = "red") +
-#     #  mapview::mapview(dplyr::filter(out_transects, is_extended), color = "green") +
-#     #  mapview::mapview(flines, color = "dodgerblue") 
-# 
-#     ###################################### 
-# 
-#     # ----------------------------------------------------------
-#     # ---- Cross section points parquet to S3 ----
-#     # ----------------------------------------------------------
-#     
-#     # classify the cross section points
-#     fixed_pts <- 
-#       fixed_pts %>% 
-#       dplyr::mutate(
-#         X = sf::st_coordinates(.)[,1],
-#         Y = sf::st_coordinates(.)[,2]
-#       ) %>%
-#       sf::st_drop_geometry() %>% 
-#       dplyr::select(
-#         hy_id, cs_id, pt_id,
-#         cs_lengthm,
-#         relative_distance,
-#         X, Y, Z,
-#         class, point_type
-#         )
-# 
-#   # Drop point geometries, leaving just X, Y, Z values
-#   fixed_pts <- sf::st_drop_geometry(fixed_pts)
-# 
-#   # add Z_source column for source of elevation data
-#   fixed_pts <-
-#     fixed_pts %>%
-#     dplyr::mutate(
-#       Z_source = cs_source
-#       ) %>%
-#     dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class)
-# 
-#   # Number of final output transects and the number of unique tmpIDs (hy_id/cs_id , i.e. cross sections)
-#   if(COLLECT_META) {
-#     output_cs_pts_count      <- nrow(fixed_pts)
-#     output_cs_pts_ids        <- length(unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id))
-#     dropped_transects_count  <- transect_count - output_transects_count
-#     }
-#   
-#   ###################################### 
-#   
-#   # ----------------------------------------------------------
-#   # ---- Re upload the updated transects geopackage to S3 ----
-#   # ----------------------------------------------------------
-#   updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path)
-#   
-#   ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
-#   message("Saving updated transects to:\n - filepath: '", updated_path, "'")
-#   
-#   # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
-#   sf::write_sf(
-#     out_transects,
-#     # transect_path
-#     updated_path
-#   )
-#  
-#   # command to copy transects geopackage to S3
-#   trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, 
-#                         ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-#   
-#   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
-#           trans_to_s3, 
-#           "'\n==========================")
-#   
-#   system(trans_to_s3, intern = TRUE)
-#   
-#   ###################################### 
-#   ###################################### 
-#   
-#   # ----------------------------------------------------------
-#   # ---- Upload the cross section points parquet to S3 ----
-#   # ----------------------------------------------------------
-#   
-#   # name of file and path to save transects gpkg too
-#   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
-#   out_path <- paste0(cs_pts_dir, out_file)
-#   
-#   message("Saving cross section points to:\n - filepath: '", out_path, "'")
-#   
-#   # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
-#   arrow::write_parquet(fixed_pts, out_path)
-#   
-#   # command to copy cross section points parquet to S3
-#   copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-#                           ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-# 
-#   message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
-#           paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-#                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
-#           "'\n==========================")
-#   
-#   system(copy_cs_pts_to_s3, intern = TRUE)
-#   
-#   end <- Sys.time()
-#   
-#   message("Finished cross section point generation for VPU ", VPU)
-#   message("- Completed at: ", end)
-#   message("==========================")
-#   
-#   if(COLLECT_META) {
-#     
-#     meta_df <- data.frame(
-#       vpu                 = VPU,
-#       start               = as.character(start),
-#       end                 = as.character(end),
-#       start_cs_pts        = as.character(start_cs_pts),
-#       end_cs_pts          = as.character(end_cs_pts),
-#       start_rectify       = as.character(start_rectify),
-#       end_rectify         = as.character(end_rectify),
-#       fline_count         = fline_count,
-#       transect_count      = transect_count,
-#       wb_count            = wb_count,
-#       fline_wb_count      = fline_wb_count,
-#       transect_wb_count   = transect_wb_count,
-#       start_cs_pts_count  = start_cs_pts_count,
-#       start_cs_pts_ids    = start_cs_pts_ids,
-#       rectify_cs_pts_count       = rectify_cs_pts_count,
-#       rectify_cs_pts_ids         = rectify_cs_pts_id_count,
-#       extended_transects_count   = extended_transects_count,
-#       extended_transects_ids     = extended_transects_ids,
-#       dropped_transects          = dropped_transects_count,
-#       output_transects_count     = output_transects_count,
-#       output_cs_pts_count        = output_cs_pts_count,
-#       output_transects_ids       = output_transects_ids,
-#       output_cs_pts_ids          = output_cs_pts_ids
-#     )
-#     
-#     order_df <- cbind(data.frame(vpu = VPU), start_order_count, rectify_order_count)
-#     
-#     readr::write_csv(meta_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
-#     readr::write_csv(order_df, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_streamorder.csv"))
-#   }
-#   
-  # rm(fixed_pts)
-  # gc()
-  # gc()
-#   
-#   }
-# 

From adeee656277a49965109f3427314d079f8385926 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 21 Mar 2024 08:09:11 -0700
Subject: [PATCH 10/64] cleaned up download_fema100.R script and replaced
 glue::glue() with paste0() in that script

---
 runners/cs_runner/download_fema100.R | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index f55e8d7..acb90f9 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -6,13 +6,15 @@
 # load config variables
 source("runners/cs_runner/config_vars.R")
 
-# create FEMA100/ directory if it does NOT exist
+# -------------------------------------------------------------------------------------
+# ---- Create FEMA100/ directory (if it does NOT exist) ----
+# -------------------------------------------------------------------------------------
+
 if (!dir.exists(FEMA_FGB_PATH)) {
-  message(glue::glue('FEMA100/ directory does not exist...\nCreating directory: {FEMA_FGB_PATH}'))
+  message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'"))
   dir.create(FEMA_FGB_PATH)
 }
 
-
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 fema_list_command <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
@@ -27,13 +29,22 @@ fema_list_command <- paste0('#!/bin/bash
             echo "$S3_OBJECTS"'
 )
 
-# ---- Get nextgen geopackages ----
+# -------------------------------------------------------------------------------------
+# ---- Get the S3 buckets object keys for FEMA 100 FGB files ----
+# -------------------------------------------------------------------------------------
+
 # Run the script to get a list of the nextgen geopackages that matched the regular expression above
 FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
 
+# create bucket object URIs
 # FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, FEMA_BUCKET_KEYS)
 
+# -------------------------------------------------------------------------------------
+# ---- Download FEMA 100 year FGB files from S3 ----
+# -------------------------------------------------------------------------------------
+
 # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+FEMA_BUCKET_KEYS[1:2]
 for (key in FEMA_BUCKET_KEYS) {
   local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
   

From bfa3c9b530eef8b4d2ca550fc0947a7f7f931190 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 21 Mar 2024 08:34:25 -0700
Subject: [PATCH 11/64] fixed aws s3 copy command to specify the aws_profile to
 use

---
 runners/cs_runner/download_fema100.R | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index acb90f9..fee84dd 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -44,18 +44,19 @@ FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
 # -------------------------------------------------------------------------------------
 
 # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
-FEMA_BUCKET_KEYS[1:2]
-for (key in FEMA_BUCKET_KEYS) {
+for (key in FEMA_BUCKET_KEYS[3:length(FEMA_BUCKET_KEYS)]) {
   local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
   
   if(!file.exists(local_save_path)) {
-    copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path)
+    copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", aws_profile)
     
-    message("Copying S3 object:\n", local_save_path)
+    message("S3 object:\n > '", FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, "'")
+    message("Downloading S3 object to:\n > '", local_save_path, "'")
+    # message("Copy command:\n > '", copy_cmd, "'")
     
-    # system(copy_cmd)
+    system(copy_cmd)
     
-    message("Download '", key, "' complete!")
-    message("------------------")
+    message(" > '", key, "' download complete!")
+    message("----------------------------------")
   }
-}
\ No newline at end of file
+}

From 6b8bfe7a09d4608fa88bef61320dd10cb1882788 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Sat, 6 Apr 2024 08:28:28 -0700
Subject: [PATCH 12/64] working on new runner for injecting ML outputs to dem
 cross sections

---
 runners/cs_runner/02_cs_pts.R        |  91 +++++++--
 runners/cs_runner/03_inject_ml.R     | 263 +++++++++++++++++++++++++++
 runners/cs_runner/config.R           |   4 +
 runners/cs_runner/config_vars.R      |  33 +++-
 runners/cs_runner/download_nextgen.R |  27 ++-
 5 files changed, 400 insertions(+), 18 deletions(-)
 create mode 100644 runners/cs_runner/03_inject_ml.R

diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 1f6faa8..7d5cc8e 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -48,7 +48,9 @@ path_df <- align_files_by_vpu(
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 1:nrow(path_df)) {
+for (i in 13:nrow(path_df)) {
+  
+  # i = 1
   
   start <- Sys.time()
   
@@ -92,12 +94,16 @@ for (i in 1:nrow(path_df)) {
   flines    <- flines[feature_subsets$valid_flowlines, ]
   transects <- transects[feature_subsets$valid_transects, ]
   
+  rm(waterbodies)
+  gc()
+  
   start_cs_pts <- Sys.time()
   message("Extracting cross section points (", start_cs_pts, ")")
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 1: Extract cs points from DEM ----
   # ----------------------------------------------------------------------------------------------------------------
-  
+  # system.time({
+    
   # get cross section point elevations
   cs_pts <- hydrofabric3D::cross_section_pts(
     cs             = transects,
@@ -105,11 +111,13 @@ for (i in 1:nrow(path_df)) {
     min_pts_per_cs = 10,
     dem            = DEM_URL
   )
-    
+  # })
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
   # ----------------------------------------------------------------------------------------------------------------
-  
+
+  # system.time({
+    
   # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
   cs_pts <- 
     cs_pts %>% 
@@ -117,14 +125,17 @@ for (i in 1:nrow(path_df)) {
     dplyr::filter(!any(is.na(Z))) %>% 
     dplyr::ungroup() %>% 
     hydrofabric3D::classify_points(pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF)  
-
+  
+  # })
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
   # ----------------------------------------------------------------------------------------------------------------
-  
-  # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more
-  fixed_pts <- hydrofabric3D::rectify_cs(
-  # cs_pts <- hydrofabric3D::rectify_flat_cs(
+  # system.time({
+  # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief
+  fixed_pts <- hydrofabric3D::improve_invalid_cs(
     cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
     net            = flines,    # original flowline network
     transects      = transects, # original transect lines
@@ -136,6 +147,24 @@ for (i in 1:nrow(path_df)) {
     fix_ids = FALSE,
     verbose = TRUE
   )
+  # })
+  
+  ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+  # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more
+  # fixed_pts <- hydrofabric3D::rectify_cs(
+  # # cs_pts <- hydrofabric3D::rectify_flat_cs(
+  #   cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  #   net            = flines,    # original flowline network
+  #   transects      = transects, # original transect lines
+  #   points_per_cs  = NULL, 
+  #   min_pts_per_cs = 10, # number of points per cross sections
+  #   dem            = DEM_URL, # DEM to extract points from
+  #   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  #   pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  #   fix_ids = FALSE,
+  #   verbose = TRUE
+  # )
   
   # get a summary dataframe and print out details message
   rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE)
@@ -153,7 +182,7 @@ for (i in 1:nrow(path_df)) {
   # ----------------------------------------------------------------------------------------------------------------
   
   # get the counts of each point type to add this data to the transects dataset
-  point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, add = FALSE)
+  point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts)
   
   # # check the number of cross sections that were extended
   # fixed_pts$is_extended %>% table()
@@ -266,7 +295,8 @@ for (i in 1:nrow(path_df)) {
                       by = "tmp_id"
                     ) %>% 
                       dplyr::select(-cs_id, -tmp_id) %>% 
-                      dplyr::select(hy_id, cs_source, cs_id = new_cs_id, 
+                      dplyr::select(hy_id, cs_source, 
+                                    cs_id = new_cs_id, 
                                     cs_measure, cs_lengthm, 
                                     # sinuosity,
                                     is_extended, 
@@ -320,6 +350,42 @@ for (i in 1:nrow(path_df)) {
                     class, point_type, 
                     bottom, left_bank, right_bank, valid_banks, has_relief)
   
+  ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts)
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  fixed_pts <- hydrofabric3D::classify_points(
+                  cs_pts                   = fixed_pts, 
+                  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+    message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  } else {
+    message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  }
+  
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  
   ############################################################################## 
   
   # ----------------------------------------------------------------------------------------------------------------
@@ -379,6 +445,9 @@ for (i in 1:nrow(path_df)) {
   message("- Completed at: ", end)
   message("==========================")
   
+  rm(fixed_pts)
+  gc()
+  gc()
 }
 
 # ###########################################################################################################################################
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
new file mode 100644
index 0000000..113adb7
--- /dev/null
+++ b/runners/cs_runner/03_inject_ml.R
@@ -0,0 +1,263 @@
+# ----------------------------------------------------------------------------------------------------------------
+# ---- data paths -----
+# ----------------------------------------------------------------------------------------------------------------
+library(dplyr)
+library(hydrofabric3D)
+library(sf)
+library(patchwork)
+
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# cross section bucket prefix
+CS_ML_PTS_S3_PREFIX <- paste0(s3_bucket, version_prefix, "/3D/cross-sections/")
+# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
+
+ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE)
+
+# paths to nextgen datasets
+nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
+
+# paths to nextgen datasets
+cs_files <- list.files(cs_pts_dir, full.names = FALSE)
+
+# string to fill in "cs_source" column in output datasets
+cs_source <- "hydrofabric3D"
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = nextgen_files,
+  y    = cs_files,
+  base = base_dir
+) 
+# dplyr::left_join(
+#   ref_df,
+#   by = "vpu"
+# )
+
+# ML Outputs
+ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
+
+# cs_ml_data_path <- "/Users/anguswatters/Desktop/cs_pts_for_ml_tests/nextgen_06_cross_sections_for_ml.parquet"
+# ml_output_path <- "/Users/anguswatters/Desktop/lynker-spatial/ml-outputs/channel_ml_outputs.parquet"
+# conus_network_path <- 's3://lynker-spatial/v20.1/conus_net.parquet'
+
+# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# Save parquet locally and upload to specified S3 bucket
+for (i in 1:nrow(path_df)) {
+  
+  # i = 15
+  
+  start <- Sys.time()
+  
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  
+  # model attributes file and full path
+  cs_file      <- path_df$y[i]
+  cs_pts_path  <- paste0(cs_pts_dir, cs_file)
+  
+  # # model attributes file and full path
+  # ref_file <- path_df$ref_file[i]
+  # ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
+  
+  # current VPU being processed
+  VPU = path_df$vpu[i]
+  
+  message("Creating VPU ", VPU, 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - cross section points: '", cs_file, "'", 
+          # "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+  )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Read in data -----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # CONUS network parquet
+  net <- 
+    CONUS_NETWORK_URI %>% 
+    arrow::open_dataset() %>%
+    dplyr::filter(vpu == VPU) %>%
+    dplyr::collect()
+  
+  # Cross section points parquet
+  cs_pts    <- arrow::read_parquet(cs_pts_path)
+  
+  fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Extract the max hydroseq "hy_id" for each flowline in the CONUS network parquet -----
+  # Use this to join "hy_id" to "hf_id" in ML outputs data
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  net_subset <- 
+    net %>% 
+    dplyr::select(id, hf_id, hf_hydroseq) %>% 
+    # dplyr::filter(id %in% unique(cs$hy_id)) %>%
+    dplyr::filter(stringr::str_detect(id, "^wb-") & !is.na(id)) %>% 
+    dplyr::group_by(id) %>% 
+    dplyr::slice_max(hf_hydroseq, with_ties = FALSE) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(hy_id = id, 
+                  hf_id,
+                  hf_hydroseq
+    ) 
+  
+  number_hyids_in_cs_pts    <- length(unique(cs_pts$hy_id ))
+  number_hyids_in_conus_net <- length(unique(net_subset$hy_id))
+  
+  message("VPU ", VPU, ": ", 
+          "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'", 
+          "'\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'"
+  )
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Find the stream orders for the flowlines in the network -----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # stream_order <- 
+  #   fline_net %>% 
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::select(hy_id = id, 
+  #                 order, 
+  #                 # hydroseq, 
+  #                 tot_drainage_areasqkm
+  #   ) %>% 
+  #   dplyr::filter(hy_id %in% net_subset$hy_id) 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Subset ML data to specific VPU and add "hy_id" column to ML data -----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  
+  # # ML Outputs
+  # ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
+  
+  # Join hy_id onto the ML outputs and then remove the rows WITHOUT matches in hy_id
+  #  this should give us a (nearly) one-to-one cross walk between "hy_id" in the cross section points 
+  # and "hf_id" in the ML outputs dataset
+  ml_subset <- 
+    ml_output %>% 
+    dplyr::left_join(
+      net_subset,
+      by = "hf_id"
+    ) %>% 
+    dplyr::filter(!is.na(hy_id)) %>%
+    dplyr::relocate(hy_id, hf_id) 
+  
+  # join the ML outputs data to the cross section points
+  cs_pts <- 
+    cs_pts %>% 
+    dplyr::left_join(
+      dplyr::select(ml_subset, 
+                    hy_id, 
+                    hf_id, 
+                    owp_tw_inchan, 
+                    owp_y_inchan, 
+                    owp_tw_bf, 
+                    owp_y_bf, 
+                    owp_dingman_r),
+      by = "hy_id"
+    ) 
+  
+  cs_bottom_lengths <- get_cs_bottom_length(cs_pts)
+  
+  # TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+  # TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+  cs_pts <-
+    cs_pts %>% 
+    dplyr::left_join(
+      cs_bottom_lengths,
+      by = c("hy_id", "cs_id")
+    ) %>% 
+    dplyr::mutate(
+      owp_tw_inchan = dplyr::case_when(
+        owp_tw_inchan <= 0 ~ bottom_length,
+        TRUE               ~ owp_tw_inchan
+      ),
+      owp_tw_bf = dplyr::case_when(
+        owp_tw_bf <= 0     ~ bottom_length,
+        TRUE               ~ owp_tw_bf
+      )
+    ) %>% 
+    dplyr::select(-bottom_length)
+  # dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) 
+  # dplyr::left_join( stream_order, by = "hy_id") 
+  
+  
+  # Split the cross sections into 2 groups:
+  # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+  # - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+  inchannel_cs <- dplyr::filter(cs_pts, 
+                                valid_banks & has_relief)
+  
+  bankful_cs   <- dplyr::filter(cs_pts, 
+                                !valid_banks | !has_relief)
+  
+  split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+  
+  if (!split_kept_all_rows) {
+    warning("When splitting cross section points into 'bankful' and 'inchannel' groups, some points were not put in either group")
+  }
+    # Add bathymetry using "inchannel" estimates
+    cs_bathy_inchannel <- add_cs_bathymetry(
+      cross_section_pts = inchannel_cs,
+      # cross_section_pts = dplyr::slice(inchannel_cs, 1:100000),
+      top_width         = "owp_tw_inchan",
+      depth             = "owp_y_inchan", 
+      dingman_r         = "owp_dingman_r"
+    )
+    
+    # Add bathymetry using "bankful" estimates
+    cs_bathy_bankful <- add_cs_bathymetry(
+      cross_section_pts = bankful_cs,
+      top_width         = "owp_tw_bf",
+      depth             = "owp_y_bf", 
+      dingman_r         = "owp_dingman_r"
+    )
+    
+    
+  final_cs <- dplyr::bind_rows(cs_bathy_inchannel, cs_bathy_bankful)
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # name of file and path to save transects gpkg too
+  out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
+  out_path <- paste0(final_dir, out_file)
+  
+  message("Saving ML augmented cross section points to:\n - filepath: '", out_path, "'")
+  
+  # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+  arrow::write_parquet(final_cs, out_path)
+  
+  # command to copy cross section points parquet to S3
+  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
+                              ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+  
+  message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
+          paste0("aws s3 cp ", out_path, " ", CS_ML_PTS_S3_PREFIX, out_file,
+                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
+          "'\n==========================")
+  
+  system(copy_cs_pts_to_s3, intern = TRUE)
+  
+  end <- Sys.time()
+  
+  message("Finished augmenting cross section points with ML for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(fixed_pts)
+  gc()
+  gc()
+  
+}
+  
+    
+  
\ No newline at end of file
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index 41f995d..8d22554 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -55,12 +55,16 @@ final_dir <- paste0(base_dir, "/cross_sections/")
 # directory to copy nextgen bucket data too
 ref_features_dir <- paste0(base_dir, "/00_reference_features/")
 
+# make a directory for the ML outputs data
+ML_OUTPUTS_DIR <- paste0(base_dir, "/ml-outputs/")
+
 # create directories 
 dir.create(transects_dir, showWarnings = FALSE)
 dir.create(cs_pts_dir,    showWarnings = FALSE)
 dir.create(ref_features_dir,     showWarnings = FALSE)
 dir.create(paste0(ref_features_dir, "gpkg/"),     showWarnings = FALSE)
 dir.create(final_dir,     showWarnings = FALSE)
+dir.create(ML_OUTPUTS_DIR,     showWarnings = FALSE)
 # dir.create(model_attr_dir,  showWarnings = FALSE)
 
 ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 8e345d8..3d0d910 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -1,4 +1,9 @@
 ### EDIT base_dir, aws_profile, and DEM_URL ###
+
+# ----------------------------------------------------------------------------
+# ---- General paths and constants variables ----
+# ----------------------------------------------------------------------------
+
 base_dir     <- '/Users/anguswatters/Desktop/lynker-spatial'
 
 # AWS profile to run CLI commands 
@@ -7,6 +12,9 @@ aws_profile  <- "angus-lynker"
 # name of S3 bucket
 s3_bucket    <- "s3://lynker-spatial/"
 
+# S3 prefix/folder of version run
+version_prefix <- "v20.1"
+
 # location of FEMA 100 year flood plain FGB files
 FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/"
 FEMA_S3_BUCKET_PREFIX <- "FEMA100/"
@@ -15,6 +23,10 @@ FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX)
 # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
 FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100")
 
+# ----------------------------------------------------------------------------
+# ---- Cross section point extraction constant variables ----
+# ----------------------------------------------------------------------------
+
 # DEM URL
 DEM_URL      <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
 
@@ -26,22 +38,31 @@ EXTENSION_PCT <- 0.5
 # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
 # 1% of the cross sections length is the default value we are using 
 # (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
-PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF = 0.01
+PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01
 
 # Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R)
+# TODO: Probably delete this 
 COLLECT_META <- TRUE
 
 # Where should meta data CSVs be saved to? 
 # Local path to save CSVs of cross section meta data during each iteration
+# TODO: Probably delete this 
 META_PATH <- '/Users/anguswatters/Desktop/cs_meta/'
 # META_PATH <-  "/local/path/to/save/cross_section_meta_data/"
 
+# ----------------------------------------------------------------------------
+# ---- Machine learning data path variables ----
+# ----------------------------------------------------------------------------
+
+ML_OUTPUTS_FILE   = "channel_ml_outputs.parquet"
+ML_OUTPUTS_PREFIX = "v20.1/3D/ml-outputs/"
+ML_OUTPUTS_URI    = paste0(s3_bucket, ML_OUTPUTS_PREFIX, ML_OUTPUTS_FILE)
+# ML_OUTPUTS_URI    = "s3://lynker-spatial/v20.1/3D/ml-outputs/channel_ml_outputs.parquet"
 
+ML_OUTPUTS_PATH <- paste0(base_dir, "/ml-outputs/", ML_OUTPUTS_FILE)
 
-# # create the directory if it does NOT exist
-# if(!dir.exists(base_dir)) {
-#   message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}'))
-#   dir.create(base_dir)
-# }
+# path to the remote CONUS net parquet file
+CONUS_NETWORK_FILENAME <- "conus_net.parquet"
+CONUS_NETWORK_URI <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME)
 
 ### EDIT ###
diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R
index c405fa9..dd0b807 100644
--- a/runners/cs_runner/download_nextgen.R
+++ b/runners/cs_runner/download_nextgen.R
@@ -40,6 +40,10 @@ if(!dir.exists(model_attr_dir)) {
   dir.create(model_attr_dir)
 }
 
+# ---------------------------------------------------------------------------
+# ---- List/Get the nextgen gpkgs from S3 bucket  ----
+# ---------------------------------------------------------------------------
+
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 command <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
@@ -71,7 +75,10 @@ for (key in bucket_keys) {
   message("------------------")
 }
 
-# ---- Get nextgen model attributes parquets ----
+# ---------------------------------------------------------------------------
+# ---- List/Get nextgen model attributes parquets from S3 bucket ----
+# ---------------------------------------------------------------------------
+
 # aws s3 ls s3://lynker-spatial/v20/3D/model_attributes/
 
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
@@ -103,6 +110,11 @@ for (key in model_attr_keys) {
   message("Download '", paste0(model_attr_prefix, key), "' complete!")
   message("------------------")
 }
+
+# ---------------------------------------------------------------------------
+# ---- List/Get reference features from S3 bucket ----
+# ---------------------------------------------------------------------------
+
 ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
 
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
@@ -134,3 +146,16 @@ for (key in ref_features) {
   message("Download '", paste0(ref_features_dir, "gpkg/", key), "' complete!")
   message("------------------")
 }
+
+# ---------------------------------------------------------------------------
+# ---- Get ML outputs data from S3 bucket ----
+# ---------------------------------------------------------------------------
+
+ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI)))
+
+message("Copying S3 object:\n", ML_OUTPUTS_URI)
+system(ml_copy_cmd)
+
+message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI)), "' complete!")
+message("------------------")
+

From ae7ae6d4812b830fcf10d46f329d42bdcb2a90e6 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 8 Apr 2024 15:16:52 -0700
Subject: [PATCH 13/64] updating inject ml runner to use updated hydrofabric3D
 functionality

---
 runners/cs_runner/03_inject_ml.R | 84 ++++++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 5 deletions(-)

diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index 113adb7..bbaa064 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -188,21 +188,45 @@ for (i in 1:nrow(path_df)) {
   # dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) 
   # dplyr::left_join( stream_order, by = "hy_id") 
   
+  missing_cs <- 
+    cs_pts %>% 
+    dplyr::filter(is.na(hf_id) | is.na(owp_tw_inchan) | is.na(owp_y_inchan) | is.na(owp_tw_bf) | is.na(owp_y_bf) | is.na(owp_dingman_r)) %>% 
+    hydrofabric3D::add_tmp_id()
   
   # Split the cross sections into 2 groups:
   # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
   # - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
-  inchannel_cs <- dplyr::filter(cs_pts, 
-                                valid_banks & has_relief)
+  inchannel_cs <-
+    cs_pts %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+    dplyr::select(-tmp_id) %>% 
+    dplyr::filter(valid_banks & has_relief) %>% 
+    dplyr::rename(
+      TW        = owp_tw_inchan, 
+      DEPTH     = owp_y_inchan,
+      DINGMAN_R = owp_dingman_r
+    )
   
-  bankful_cs   <- dplyr::filter(cs_pts, 
-                                !valid_banks | !has_relief)
+  bankful_cs   <- 
+    cs_pts %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+    dplyr::select(-tmp_id) %>% 
+    dplyr::filter(!valid_banks | !has_relief) %>% 
+    dplyr::rename(
+      TW        = owp_tw_bf, 
+      DEPTH     = owp_y_bf,
+      DINGMAN_R = owp_dingman_r
+    )
   
-  split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+  split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)
+  # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
   
   if (!split_kept_all_rows) {
     warning("When splitting cross section points into 'bankful' and 'inchannel' groups, some points were not put in either group")
   }
+  
     # Add bathymetry using "inchannel" estimates
     cs_bathy_inchannel <- add_cs_bathymetry(
       cross_section_pts = inchannel_cs,
@@ -223,6 +247,56 @@ for (i in 1:nrow(path_df)) {
     
   final_cs <- dplyr::bind_rows(cs_bathy_inchannel, cs_bathy_bankful)
   
+  final_cs <- dplyr::bind_rows(
+                  dplyr::select(cs_bathy_inchannel, 
+                                -owp_tw_bf, -owp_y_bf, -hf_id),
+                  dplyr::select(cs_bathy_bankful, 
+                                -owp_tw_inchan, -owp_y_inchan, -hf_id)
+                ) %>% 
+    dplyr::group_by(hy_id, cs_id) %>% 
+    tidyr::fill(
+      c(cs_lengthm, Z_source, TW, DEPTH, DINGMAN_R)
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(
+      -point_type, 
+      -class,
+      -bottom, -left_bank, -right_bank,
+      -has_relief, -valid_banks
+    )
+  
+  final_cs <- hydrofabric3D::classify_points(final_cs)
+  # final_classified %>% 
+  #   dplyr::filter(!valid_banks | !has_relief) 
+  # system.time({
+  #   final_classified <- 
+  #     final_cs %>% 
+  #     hydrofabric3D::classify_points()
+  # })
+
+  final_cs <- dplyr::bind_rows(
+                dplyr::relocate(
+                  final_cs,
+                    hy_id, cs_id, pt_id, 
+                    Z, relative_distance, cs_lengthm, class, point_type, 
+                    X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, 
+                    TW, DEPTH, DINGMAN_R, is_dem_point
+                  ),
+                dplyr::relocate(  
+                  dplyr::mutate(
+                    dplyr::select(missing_cs, 
+                                  -tmp_id, -hf_id, -owp_tw_inchan, -owp_y_inchan, -owp_tw_bf, -owp_y_bf, -owp_dingman_r),
+                    TW           = NA,
+                    DEPTH        = NA, 
+                    DINGMAN_R    = NA,
+                    is_dem_point = TRUE
+                  ),
+                  hy_id, cs_id, pt_id, 
+                  Z, relative_distance, cs_lengthm, class, point_type, 
+                  X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, 
+                  TW, DEPTH, DINGMAN_R, is_dem_point
+                )
+              )
   # ----------------------------------------------------------------------------------------------------------------
   # ---- Upload the cross section points parquet to S3 ----
   # ----------------------------------------------------------------------------------------------------------------

From 70eccc3c1dee8db84c1e70cc221c67b579229495 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 11 Apr 2024 13:20:41 -0700
Subject: [PATCH 14/64] updated runners/cs_runner to include machine learning
 estimated widths/depths calculations

---
 runners/cs_runner/01_transects.R              |   6 -
 runners/cs_runner/03_inject_ml.R              | 235 +++++++++++-------
 .../cs_runner/{03_driver.R => 04_driver.R}    |   3 +
 3 files changed, 148 insertions(+), 96 deletions(-)
 rename runners/cs_runner/{03_driver.R => 04_driver.R} (75%)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 9c081e8..5fe88c9 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -115,13 +115,7 @@ for(i in 1:nrow(path_df)) {
     dplyr::mutate(
       cs_source = net_source
     )
-    # dplyr::rename("cs_lengthm" = cs_widths)
   
-  # # add cs_source column and keep just the desired columns to save and upload to S3
-  # transects <- transects %>%
-  #   dplyr::mutate(cs_source = net_source) %>%
-  #   dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm = cs_widths, geometry)
-
   # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
   sf::write_sf(
     # save dataset with only subset of columns to upload to S3
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index bbaa064..cc0ee52 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -21,9 +21,6 @@ nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
 # paths to nextgen datasets
 cs_files <- list.files(cs_pts_dir, full.names = FALSE)
 
-# string to fill in "cs_source" column in output datasets
-cs_source <- "hydrofabric3D"
-
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
   x    = nextgen_files,
@@ -42,14 +39,19 @@ ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
 # ml_output_path <- "/Users/anguswatters/Desktop/lynker-spatial/ml-outputs/channel_ml_outputs.parquet"
 # conus_network_path <- 's3://lynker-spatial/v20.1/conus_net.parquet'
 
+# for (i in 1:5000) {
+#   start <- Sys.time()
+#   message(i, " - time: '", start, "'")
+# }
+
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
   
-  # i = 15
+  # i = 8
   
-  start <- Sys.time()
+  start <- round(Sys.time())
   
   
   # nextgen file and full path
@@ -67,16 +69,21 @@ for (i in 1:nrow(path_df)) {
   # current VPU being processed
   VPU = path_df$vpu[i]
   
-  message("Creating VPU ", VPU, 
-          " cross section points:\n - flowpaths: '", nextgen_file,
+  message("Augmenting DEM cross sections with ML estimated widths/depths: ", VPU, 
+          " cross section points:",
           "'\n - cross section points: '", cs_file, "'", 
+          "'\n - ML estimated widths/depths: '", ML_OUTPUTS_FILE, "'", 
+          # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_URI, "'", 
+          "\n - CONUS network file: '", CONUS_NETWORK_URI, "'",
+          "\n - flowpaths: '", nextgen_file,
           # "\n - waterbodies: '", ref_file, "'",
           "'\n - start time: '", start, "'"
-  )
+          )
   
   # ----------------------------------------------------------------------------------------------------------------
   # ---- Read in data -----
   # ----------------------------------------------------------------------------------------------------------------
+  message("Loading data...")
   
   # CONUS network parquet
   net <- 
@@ -88,7 +95,8 @@ for (i in 1:nrow(path_df)) {
   # Cross section points parquet
   cs_pts    <- arrow::read_parquet(cs_pts_path)
   
-  fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  # TODO: Not needed 
+  # fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
   # ----------------------------------------------------------------------------------------------------------------
   # ---- Extract the max hydroseq "hy_id" for each flowline in the CONUS network parquet -----
@@ -112,8 +120,8 @@ for (i in 1:nrow(path_df)) {
   number_hyids_in_conus_net <- length(unique(net_subset$hy_id))
   
   message("VPU ", VPU, ": ", 
-          "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'", 
-          "'\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'"
+          "\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'",
+          "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'"
   )
   # ----------------------------------------------------------------------------------------------------------------
   # ---- Find the stream orders for the flowlines in the network -----
@@ -149,6 +157,8 @@ for (i in 1:nrow(path_df)) {
     dplyr::filter(!is.na(hy_id)) %>%
     dplyr::relocate(hy_id, hf_id) 
   
+  message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
   # join the ML outputs data to the cross section points
   cs_pts <- 
     cs_pts %>% 
@@ -164,14 +174,16 @@ for (i in 1:nrow(path_df)) {
       by = "hy_id"
     ) 
   
-  cs_bottom_lengths <- get_cs_bottom_length(cs_pts)
+  message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+  
+  cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cs_pts)
   
   # TODO: for now we replace any negative TW values with the length of the bottom of the cross section
   # TODO: This method + the negative model output values both need to be looked into (04/05/2024)
   cs_pts <-
     cs_pts %>% 
     dplyr::left_join(
-      cs_bottom_lengths,
+      cs_bottom_lengths, 
       by = c("hy_id", "cs_id")
     ) %>% 
     dplyr::mutate(
@@ -185,25 +197,40 @@ for (i in 1:nrow(path_df)) {
       )
     ) %>% 
     dplyr::select(-bottom_length)
-  # dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0) 
+  
+  # cs_pts %>% dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0)
   # dplyr::left_join( stream_order, by = "hy_id") 
   
+  # extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+  # TODO: look at this stuff with Arash (04/09/2024)
   missing_cs <- 
     cs_pts %>% 
-    dplyr::filter(is.na(hf_id) | is.na(owp_tw_inchan) | is.na(owp_y_inchan) | is.na(owp_tw_bf) | is.na(owp_y_bf) | is.na(owp_dingman_r)) %>% 
+    dplyr::filter(is.na(hf_id) | 
+                  is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+                  is.na(owp_tw_bf) | is.na(owp_y_bf) |
+                  is.na(owp_dingman_r)) %>% 
     hydrofabric3D::add_tmp_id()
   
+  # TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" 
+  readr::write_csv(
+    dplyr::select(missing_cs, -tmp_id), 
+    paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+    )
+
   # Split the cross sections into 2 groups:
   # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
   # - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
   inchannel_cs <-
     cs_pts %>% 
     hydrofabric3D::add_tmp_id() %>% 
-    dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+    dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
     dplyr::select(-tmp_id) %>% 
     dplyr::filter(valid_banks & has_relief) %>% 
-    dplyr::rename(
-      TW        = owp_tw_inchan, 
+    # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+    # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+    # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+    dplyr::rename(        
+      TW        = owp_tw_inchan,    
       DEPTH     = owp_y_inchan,
       DINGMAN_R = owp_dingman_r
     )
@@ -220,114 +247,142 @@ for (i in 1:nrow(path_df)) {
       DINGMAN_R = owp_dingman_r
     )
   
+  # sanity check that all rows are accounted for after splitting up data
   split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)
   # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
   
   if (!split_kept_all_rows) {
-    warning("When splitting cross section points into 'bankful' and 'inchannel' groups, some points were not put in either group")
-  }
-  
-    # Add bathymetry using "inchannel" estimates
-    cs_bathy_inchannel <- add_cs_bathymetry(
-      cross_section_pts = inchannel_cs,
-      # cross_section_pts = dplyr::slice(inchannel_cs, 1:100000),
-      top_width         = "owp_tw_inchan",
-      depth             = "owp_y_inchan", 
-      dingman_r         = "owp_dingman_r"
+    warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+    "\nsome points were not put in either group.", 
+    "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+    "values or contain values other than TRUE/FALSE")
     )
-    
-    # Add bathymetry using "bankful" estimates
-    cs_bathy_bankful <- add_cs_bathymetry(
-      cross_section_pts = bankful_cs,
-      top_width         = "owp_tw_bf",
-      depth             = "owp_y_bf", 
-      dingman_r         = "owp_dingman_r"
+  }
+  message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+
+  # Add bathymetry using "inchannel" estimates
+  inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+      cross_section_pts = inchannel_cs
     )
-    
-    
-  final_cs <- dplyr::bind_rows(cs_bathy_inchannel, cs_bathy_bankful)
   
+  message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+
+  # Add bathymetry using "bankful" estimates
+  bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = bankful_cs
+  )
+    
+  # combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
   final_cs <- dplyr::bind_rows(
-                  dplyr::select(cs_bathy_inchannel, 
-                                -owp_tw_bf, -owp_y_bf, -hf_id),
-                  dplyr::select(cs_bathy_bankful, 
-                                -owp_tw_inchan, -owp_y_inchan, -hf_id)
-                ) %>% 
-    dplyr::group_by(hy_id, cs_id) %>% 
+                dplyr::select(
+                  inchannel_cs,
+                  -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point
+                ),
+                dplyr::select(
+                  bankful_cs,
+                  -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point
+                ),
+                dplyr::select(
+                  missing_cs,
+                  -hf_id, -dplyr::starts_with("owp"), -tmp_id
+                )
+              ) %>%
+    dplyr::group_by(hy_id, cs_id) %>%
     tidyr::fill(
-      c(cs_lengthm, Z_source, TW, DEPTH, DINGMAN_R)
-    ) %>% 
-    dplyr::ungroup() %>% 
+      c(cs_lengthm, Z_source)
+    ) %>%
+    dplyr::ungroup() %>%
     dplyr::select(
-      -point_type, 
+      -point_type,
       -class,
       -bottom, -left_bank, -right_bank,
       -has_relief, -valid_banks
     )
   
+  message(round(Sys.time()), " - Reclassifying cross section point types...")
+  
+  # reclassify
   final_cs <- hydrofabric3D::classify_points(final_cs)
-  # final_classified %>% 
-  #   dplyr::filter(!valid_banks | !has_relief) 
-  # system.time({
-  #   final_classified <- 
-  #     final_cs %>% 
-  #     hydrofabric3D::classify_points()
-  # })
 
-  final_cs <- dplyr::bind_rows(
-                dplyr::relocate(
-                  final_cs,
-                    hy_id, cs_id, pt_id, 
-                    Z, relative_distance, cs_lengthm, class, point_type, 
-                    X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, 
-                    TW, DEPTH, DINGMAN_R, is_dem_point
-                  ),
-                dplyr::relocate(  
-                  dplyr::mutate(
-                    dplyr::select(missing_cs, 
-                                  -tmp_id, -hf_id, -owp_tw_inchan, -owp_y_inchan, -owp_tw_bf, -owp_y_bf, -owp_dingman_r),
-                    TW           = NA,
-                    DEPTH        = NA, 
-                    DINGMAN_R    = NA,
-                    is_dem_point = TRUE
-                  ),
-                  hy_id, cs_id, pt_id, 
-                  Z, relative_distance, cs_lengthm, class, point_type, 
-                  X, Y, Z_source, bottom, left_bank, right_bank, valid_banks, has_relief, 
-                  TW, DEPTH, DINGMAN_R, is_dem_point
-                )
-              )
+  # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids()
+  # random_uids <- sample(x=final_uids, size=12)
+  # cs_subset <-  dplyr::filter(hydrofabric3D::add_tmp_id(final_cs), 
+                        # tmp_id %in% random_uids) 
+  # hydrofabric3D::classify_points(cs_subset) %>%  hydrofabric3D::plot_cs_pts(color = "point_type")
+  
+  starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts)
+  ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs)
+
+  has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
+  all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
+  all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids)
+
+  # throw some warnings if:
+  # - the number of uids in the input is different from the output
+  # - there are missing hy_id/cs_id
+  if (!has_same_number_of_uids) {
+    warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                   "starting cross section points from the final cross section points:",
+                   "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                   "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+                   ))
+  }
+
+  if (!all_starting_uids_in_ending_uids) {
+    number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+
+    # starting_uids %in% ending_uids
+    warning(
+      paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+            "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+             )
+      )
+
+    # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+    #          "is missing from the output cross section points"))
+
+  }
+  
   # ----------------------------------------------------------------------------------------------------------------
   # ---- Upload the cross section points parquet to S3 ----
   # ----------------------------------------------------------------------------------------------------------------
-  
+
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
   out_path <- paste0(final_dir, out_file)
   
-  message("Saving ML augmented cross section points to:\n - filepath: '", out_path, "'")
+  message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", out_path, "'")
   
   # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
   arrow::write_parquet(final_cs, out_path)
   
-  # command to copy cross section points parquet to S3
-  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                              ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-  
-  message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
-          paste0("aws s3 cp ", out_path, " ", CS_ML_PTS_S3_PREFIX, out_file,
-                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
-          "'\n==========================")
+  s3_save_uri <- paste0(CS_ML_PTS_S3_PREFIX, out_file)
   
+  # command to copy cross section points parquet to S3
+  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", 
+                              out_path,
+                              " ", 
+                              s3_save_uri,
+                              ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
+                              )
+  
+  message(
+    "Copy VPU ", path_df$vpu[i], " ML augmented cross sections to S3:\n - S3 copy command:\n'",
+    copy_cs_pts_to_s3, "'",
+    "'\n=========================="
+    )
+
   system(copy_cs_pts_to_s3, intern = TRUE)
   
-  end <- Sys.time()
+  end <- round(Sys.time())
   
   message("Finished augmenting cross section points with ML for VPU ", VPU)
   message("- Completed at: ", end)
   message("==========================")
   
-  rm(fixed_pts)
+  rm(net, net_subset, 
+     final_cs, cs_pts, 
+     inchannel_cs, bankful_cs)
   gc()
   gc()
   
diff --git a/runners/cs_runner/03_driver.R b/runners/cs_runner/04_driver.R
similarity index 75%
rename from runners/cs_runner/03_driver.R
rename to runners/cs_runner/04_driver.R
index cce576d..af8e1ca 100644
--- a/runners/cs_runner/03_driver.R
+++ b/runners/cs_runner/04_driver.R
@@ -11,3 +11,6 @@ source("runners/cs_runner/01_transects.R")
 
 # generate and upload cross sections points datasets 
 source("runners/cs_runner/02_cs_pts.R")
+
+# Apply machine learning topwidths and depths estimates to DEM cross section points
+source("runners/cs_runner/02_cs_pts.R")

From 4e40b009f87937371d680b63ebe8cdd4835c452b Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 17 Apr 2024 17:57:39 -0700
Subject: [PATCH 15/64] super minor cleanups

---
 runners/cs_runner/01_transects.R | 2 --
 runners/cs_runner/config_vars.R  | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 5fe88c9..b3727ba 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -7,8 +7,6 @@ source("runners/cs_runner/config.R")
 # library(sf)
 # install.packages("devtools")
 
-# devtools::install_github("anguswg-ucsb/hydrofabric3D")
-
 # transect bucket prefix
 transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 3d0d910..6d9e087 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -63,6 +63,6 @@ ML_OUTPUTS_PATH <- paste0(base_dir, "/ml-outputs/", ML_OUTPUTS_FILE)
 
 # path to the remote CONUS net parquet file
 CONUS_NETWORK_FILENAME <- "conus_net.parquet"
-CONUS_NETWORK_URI <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME)
+CONUS_NETWORK_URI      <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME)
 
 ### EDIT ###

From e4c2558fa3436ddd96b490a3991435d1a15846f1 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 23 Apr 2024 08:23:54 -0700
Subject: [PATCH 16/64] random cleanup

---
 runners/cs_runner/download_fema100.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index fee84dd..8732b0e 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -44,7 +44,7 @@ FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
 # -------------------------------------------------------------------------------------
 
 # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
-for (key in FEMA_BUCKET_KEYS[3:length(FEMA_BUCKET_KEYS)]) {
+for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) {
   local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
   
   if(!file.exists(local_save_path)) {
@@ -54,9 +54,11 @@ for (key in FEMA_BUCKET_KEYS[3:length(FEMA_BUCKET_KEYS)]) {
     message("Downloading S3 object to:\n > '", local_save_path, "'")
     # message("Copy command:\n > '", copy_cmd, "'")
     
-    system(copy_cmd)
+    # system(copy_cmd)
     
     message(" > '", key, "' download complete!")
     message("----------------------------------")
+  } else {
+    message("File already exists at:\n > '", local_save_path, "'")
   }
 }

From 0b290ce2060d25c68a70b6d34cdf005a0e7b9cfc Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 23 Apr 2024 13:13:11 -0700
Subject: [PATCH 17/64] basic layout for fema processing

---
 runners/cs_runner/00_fema.R | 100 ++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 runners/cs_runner/00_fema.R

diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R
new file mode 100644
index 0000000..6348dc9
--- /dev/null
+++ b/runners/cs_runner/00_fema.R
@@ -0,0 +1,100 @@
+library(dplyr) 
+
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# # # # load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+# install.packages("devtools")
+
+# transect bucket prefix
+transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+
+# paths to nextgen datasets and model attribute parquet files
+nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
+FEMA_files      <- list.files(FEMA_FGB_PATH, full.names = FALSE)
+transects_files <- list.files(transects_dir, full.names = FALSE)
+
+transects_files <- transects_files[!grepl("updated", transects_files)]
+
+# string to fill in "cs_source" column in output datasets
+net_source <- "hydrofabric3D"
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = nextgen_files,
+  y    = transects_files,
+  base = base_dir
+)
+
+path_df
+
+us_states <- 
+  USAboundaries::us_states() %>% 
+  sf::st_transform(5070)
+
+# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
+for(i in 1:nrow(path_df)) {
+  
+  i = 8
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(transects_dir, transect_file)
+  
+  transect_path
+  
+  # # model attributes file and full path
+  # model_attr_file <- path_df$y[i]
+  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  
+  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
+  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  flines_bb <- 
+    flines %>% 
+    sf::st_bbox() %>% 
+    sf::st_as_sfc() %>% 
+    sf::st_as_sf()
+  
+  transects <- sf::read_sf(transect_path)
+  
+  
+  # find the states intersecting with the given VPU flowlines
+  intersecting_states <- 
+    sf::st_intersection(us_states, flines_bb) %>% 
+    sf::st_drop_geometry() %>% 
+    .$name %>% 
+    gsub(" ", "-", .)
+  
+  # get the matching FEMA floodplain FGB file names
+  matching_fema_files <- unlist(lapply(intersecting_states, function(state_name) {
+    FEMA_files[grepl(state_name, FEMA_files)]
+    }))
+  
+  # full paths
+  files_of_interest <-  paste0(FEMA_FGB_PATH, "/", matching_fema_files)
+  
+  # Iterate over each FEMA file and determine optimal widths for cross sections.....
+  # for (file in rev(files_of_interest)) {
+  
+    fema_fgb <- 
+      file %>% 
+      sf::read_sf() %>% 
+      sf::st_transform(5070)
+    
+    fema_bb <- 
+      fema_fgb %>% 
+      sf::st_bbox() %>% 
+      sf::st_as_sfc() %>% 
+      sf::st_as_sf()
+    
+  
+  }
\ No newline at end of file

From f7815262fd86fa8fc323c2c09d9b89a190e776dd Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 25 Apr 2024 13:41:15 -0700
Subject: [PATCH 18/64] added code to simplfiy, dissolve, and explode fema
 geometries via mapshaper CLI, currently in the download_fema.R script but
 going to move it to its own script

---
 runners/cs_runner/00_fema.R          | 138 +++++++++++++++++-
 runners/cs_runner/config_vars.R      |  10 ++
 runners/cs_runner/download_fema100.R | 202 ++++++++++++++++++++++++++-
 3 files changed, 341 insertions(+), 9 deletions(-)

diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R
index 6348dc9..c2dfee9 100644
--- a/runners/cs_runner/00_fema.R
+++ b/runners/cs_runner/00_fema.R
@@ -1,5 +1,58 @@
 library(dplyr) 
 
+source("runners/cs_runner/config.R")
+
+# transect bucket prefix
+transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+
+# paths to nextgen datasets and model attribute parquet files
+nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
+FEMA_FGB_files  <- list.files(FEMA_FGB_PATH, full.names = TRUE)
+FEMA_files      <- list.files(FEMA_DISSOLVED_PATH, full.names = TRUE)
+# FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE)
+
+transects_files <- list.files(transects_dir, full.names = FALSE)
+transects_files <- transects_files[!grepl("updated", transects_files)]
+
+# string to fill in "cs_source" column in output datasets
+net_source <- "hydrofabric3D"
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = nextgen_files,
+  y    = transects_files,
+  base = base_dir
+)
+
+#############################################################
+########################################################################
+# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
+for(i in 1:nrow(path_df)) {
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(transects_dir, transect_file)
+  
+  transect_path
+  
+  # # model attributes file and full path
+  # model_attr_file <- path_df$y[i]
+  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  # FEMA_BB_files
+  
+  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
+  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  FEMA_files
+  
+  fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA100_dissolved/Tennessee-100yr-flood_valid_dissolved.geojson")
+  plot(fema$geometry)
+  
+  }
+
+library(dplyr) 
+
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
 
@@ -15,8 +68,9 @@ transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 # paths to nextgen datasets and model attribute parquet files
 nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
 FEMA_files      <- list.files(FEMA_FGB_PATH, full.names = FALSE)
-transects_files <- list.files(transects_dir, full.names = FALSE)
+FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE)
 
+transects_files <- list.files(transects_dir, full.names = FALSE)
 transects_files <- transects_files[!grepl("updated", transects_files)]
 
 # string to fill in "cs_source" column in output datasets
@@ -29,14 +83,35 @@ path_df <- align_files_by_vpu(
   base = base_dir
 )
 
-path_df
-
-us_states <- 
-  USAboundaries::us_states() %>% 
-  sf::st_transform(5070)
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
+
+  i = 8
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  
+  # transect_file <- path_df$y[i]
+  # transect_path <- paste0(transects_dir, transect_file)
+  # transect_path
+  
+  # # model attributes file and full path
+  # model_attr_file <- path_df$y[i]
+  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  FEMA_BB_files
+  
+  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
+  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+
+}
+
+# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
+# for(i in 1:nrow(path_df)) {
   
   i = 8
   
@@ -52,6 +127,7 @@ for(i in 1:nrow(path_df)) {
   # # model attributes file and full path
   # model_attr_file <- path_df$y[i]
   # model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  FEMA_BB_files
   
   message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
   # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
@@ -85,6 +161,7 @@ for(i in 1:nrow(path_df)) {
   # Iterate over each FEMA file and determine optimal widths for cross sections.....
   # for (file in rev(files_of_interest)) {
   
+  file = "/Users/anguswatters/Desktop/lynker-spatial/FEMA100/Tennessee-100yr-flood_valid.fgb" 
     fema_fgb <- 
       file %>% 
       sf::read_sf() %>% 
@@ -95,6 +172,53 @@ for(i in 1:nrow(path_df)) {
       sf::st_bbox() %>% 
       sf::st_as_sfc() %>% 
       sf::st_as_sf()
+
+    # fline_subset <- 
+    # flines %>% 
+    # sf::st_intersection(fema_fgb)
+    
+    fline_fema_intersects <- sf::st_intersects(flines, fema_fgb)
+    fema_fline_intersects <- sf::st_intersects(fema_fgb, flines)
+    fema_subset$FLD_AR_ID %>% unique() %>% length()
+    
+    fema_subset <- 
+      fema_fgb[unlist(fema_fline_intersects), ]  %>% 
+      rmapshaper::ms_simplify()
+    
+    fema_subset$FLD_AR_ID %>% unique() %>% length()
+    
+    fema_subset <- fema_subset %>% 
+      rmapshaper::ms_dissolve(field = "FLD_AR_ID")
+    
+    
+    fema_subset %>% mapview::npts()
+    unlist(flines_with_fema)[1]
+    fema_fgb 
+    
+    flines_with_fema      <- flines[lengths(fline_fema_intersects) > 0,]
+    
+    
+    # flines %>% 
+    #   sf::st_filter(
+    #     fema_fgb, 
+    #     .predicate = st_touches
+    #   )
+    fema_subset %>% 
+      dplyr::filter()
+    fema_subset %>% mapview::npts()
+    
+    transects_subset <- 
+      transects %>% 
+      dplyr::filter(hy_id %in% flines_with_fema$id)
+    
+    flines %>% sf::st_crs()
+    mapview::mapview(flines) + fema_bb
+    
+    # mapview::mapview(fema_fgb, col.regions = "dodgerblue") +
+    mapview::mapview(fema_subset, col.regions = "dodgerblue") +
+      mapview::mapview(transects_subset, color = "red") +
+      mapview::mapview(flines_with_fema, color = "green") 
     
+    message("file: ", file)
   
-  }
\ No newline at end of file
+  # }
\ No newline at end of file
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 6d9e087..3019047 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -22,6 +22,16 @@ FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX)
 
 # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
 FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100")
+FEMA_GEOJSON_PATH    <- paste0(base_dir, "/FEMA100_geojson")
+FEMA_CLEAN_PATH      <- paste0(base_dir, "/FEMA100_clean")
+FEMA_GPKG_PATH       <- paste0(base_dir, "/FEMA100_gpkg")
+FEMA_GPKG_BB_PATH    <- paste0(base_dir, "/FEMA100_bounding_box")
+
+# TODO: these can be deleted
+FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified")
+FEMA_DISSOLVED_PATH  <- paste0(base_dir, "/FEMA100_dissolved")
+FEMA_EXPLODED_PATH   <- paste0(base_dir, "/FEMA100_exploded")
+
 
 # ----------------------------------------------------------------------------
 # ---- Cross section point extraction constant variables ----
diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index 8732b0e..b7d745a 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -7,14 +7,61 @@
 source("runners/cs_runner/config_vars.R")
 
 # -------------------------------------------------------------------------------------
-# ---- Create FEMA100/ directory (if it does NOT exist) ----
+# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ----
 # -------------------------------------------------------------------------------------
-
+# create FEMA FGB directory (if not exists) 
 if (!dir.exists(FEMA_FGB_PATH)) {
   message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'"))
   dir.create(FEMA_FGB_PATH)
 }
 
+# create geojsons directory (if not exists) 
+if (!dir.exists(FEMA_GEOJSON_PATH)) {
+  message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'"))
+  dir.create(FEMA_GEOJSON_PATH)
+}
+
+# create directory for cleaned FEMA geometries (if not exists) 
+if (!dir.exists(FEMA_CLEAN_PATH)) {
+  message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'"))
+  dir.create(FEMA_CLEAN_PATH)
+}
+
+# create directory for cleaned FEMA geometries as geopackages (if not exists) 
+if (!dir.exists(FEMA_GPKG_PATH)) {
+  message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
+  dir.create(FEMA_GPKG_PATH)
+}
+
+# create simplified geojsons directory (if not exists)
+if (!dir.exists(FEMA_SIMPLIFIED_PATH)) {
+  message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'"))
+  dir.create(FEMA_SIMPLIFIED_PATH)
+}
+
+# create simplified geojsons directory (if not exists)
+if (!dir.exists(FEMA_DISSOLVED_PATH)) {
+  message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'"))
+  dir.create(FEMA_DISSOLVED_PATH)
+}
+
+# create exploded geojsons directory (if not exists)
+if (!dir.exists(FEMA_EXPLODED_PATH)) {
+  message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'"))
+  dir.create(FEMA_EXPLODED_PATH)
+}
+
+# create FEMA GPKG Bounding Boxes directory (if not exists)
+if (!dir.exists(FEMA_GPKG_BB_PATH)) {
+  message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
+  dir.create(FEMA_GPKG_BB_PATH)
+}
+
+
+# -------------------------------------------------------------------------------------
+# ---- Get list of FEMA FGB files in S3 bucket ----
+# -------------------------------------------------------------------------------------
+
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 fema_list_command <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
@@ -62,3 +109,154 @@ for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) {
     message("File already exists at:\n > '", local_save_path, "'")
   }
 }
+# -------------------------------------------------------------------------------------
+# ---- Run ogr2ogr to get FGB files into geojson ----
+# -------------------------------------------------------------------------------------
+
+for (key in FEMA_BUCKET_KEYS) {
+  
+  local_fema_path <- paste0(FEMA_FGB_PATH, "/", key)
+  
+  geojson_filename     <- gsub(".fgb", ".geojson", key)
+  geojson_save_path    <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename)
+  
+  message("S3 Key: '", key, "'")
+  message("Converting \n > '", key, "' to geojson '", geojson_filename, "'")
+  
+  ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
+  
+  system(ogr2ogr_command)
+  
+  message("Saved '", geojson_filename, "' saved to: \n > '", geojson_save_path, "'")
+  message()
+}  
+
+# -------------------------------------------------------------------------------------
+# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ----
+# -------------------------------------------------------------------------------------
+
+# paths to FEMA 100 year flood plain files
+FEMA_geojson_paths      <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE)
+# FEMA_BB_paths   <- list.files(FEMA_GPKG_BB_PATH, full.names = TRUE)
+
+for (fema_file in FEMA_geojson_paths) {
+  message("Fema 100 year flood plain:\n > '", basename(fema_file), "'")
+  # message("Fema 100 year flood plain:\n > '", fema_file, "'")
+  output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(fema_file))
+  output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename)
+  
+  message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
+  
+  # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path)
+  # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, 
+                             ' -simplify 0.15 visvalingam \\', 
+                             ' -dissolve \\', 
+                             ' -explode \\', 
+                             ' -o ', output_path
+                             )
+  system(mapshaper_command)
+  message("Mapshaper command: ", mapshaper_command)
+  message()
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Convert cleaned FEMA geometries to geopackages ----
+# -------------------------------------------------------------------------------------
+
+# paths to FEMA 100 year flood plain files
+FEMA_clean_paths      <- list.files(FEMA_CLEAN_PATH, full.names = TRUE)
+
+for (fema_file in FEMA_clean_paths) {
+  message("Fema 100 year flood plain:\n > '", basename(fema_file), "'")
+  # message("Fema 100 year flood plain:\n > '", fema_file, "'")
+  output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file))
+  output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+  
+  message("Converting geojson files to gpkg...")
+  
+  message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'")
+  
+  # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path)
+  # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
+  ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file)
+  
+  # system(ogr2ogr_command)
+  
+  message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'")
+  message()
+}
+
+# # -------------------------------------------------------------------------------------
+# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries  ----
+# # -------------------------------------------------------------------------------------
+# 
+# # paths to FEMA 100 year flood plain files
+# FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+# 
+# for (fema_file in FEMA_gpkg_paths) {
+#   message("Applying final cleaning process to:\n > '", basename(fema_file), "'")
+#   
+#   fema <- sf::read_sf(fema_file)
+#   
+#   fema
+#   
+#   # message("Fema 100 year flood plain:\n > '", fema_file, "'")
+#   output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file))
+#   output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+#   
+#   message("Converting geojson files to gpkg...")
+#   
+#   message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'")
+#   
+#   # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path)
+#   # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
+#   ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file)
+#   
+#   system(ogr2ogr_command)
+#   
+#   message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'")
+#   message()
+# }
+
+# -------------------------------------------------------------------------------------
+# ---- Generate bounding box gpkg for each FEMA FGB ----
+# -------------------------------------------------------------------------------------
+
+for (key in FEMA_BUCKET_KEYS) {
+ 
+  local_fema_path <- paste0(FEMA_FGB_PATH, "/", key)
+  
+  gpkg_filename   <- gsub(".fgb", "_bb.gpkg", key)
+  bb_save_path    <- paste0(FEMA_GPKG_BB_PATH, "/", gpkg_filename)
+  
+  message("S3 Key: '", key, "'")
+  message("Local FEMA file:\n > '", local_fema_path, "'")
+  message("Local output FEMA bounding box file:\n > '", bb_save_path, "'")
+  
+  # fema <- sf::read_sf(local_fema_path)
+  
+  fema_bb <- 
+    local_fema_path %>% 
+    sf::read_sf() %>% 
+    sf::st_bbox() %>% 
+    sf::st_as_sfc() %>% 
+    sf::st_as_sf() %>% 
+    dplyr::mutate(
+      fema_fgb      = key,
+      fema_fgb_path = local_fema_path,
+      state         = gsub("-100yr-flood_valid.fgb", "", key)
+    ) %>% 
+    dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% 
+    sf::st_transform(5070)
+  
+  message("Saving FEMA bounding box file:\n > '", bb_save_path, "'")
+  
+  sf::write_sf(fema_bb, bb_save_path)
+  message()
+}  
+  
+
+
+
+

From d49133e09c88028b5fa7a738fd6c31a44914e15c Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 26 Apr 2024 14:38:57 -0700
Subject: [PATCH 19/64] added code to apply clean_geometries from hydrofab

---
 runners/cs_runner/download_fema100.R | 63 +++++++++++++++-------------
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index b7d745a..c8a8dc2 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -166,6 +166,7 @@ for (fema_file in FEMA_geojson_paths) {
 
 # paths to FEMA 100 year flood plain files
 FEMA_clean_paths      <- list.files(FEMA_CLEAN_PATH, full.names = TRUE)
+# rmapshaper::ms_explode()
 
 for (fema_file in FEMA_clean_paths) {
   message("Fema 100 year flood plain:\n > '", basename(fema_file), "'")
@@ -181,8 +182,8 @@ for (fema_file in FEMA_clean_paths) {
   # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
   ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file)
   
-  # system(ogr2ogr_command)
-  
+  system(ogr2ogr_command)
+  # message("ogr2ogr: ", ogr2ogr_command)
   message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'")
   message()
 }
@@ -191,33 +192,37 @@ for (fema_file in FEMA_clean_paths) {
 # # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries  ----
 # # -------------------------------------------------------------------------------------
 # 
-# # paths to FEMA 100 year flood plain files
-# FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
-# 
-# for (fema_file in FEMA_gpkg_paths) {
-#   message("Applying final cleaning process to:\n > '", basename(fema_file), "'")
-#   
-#   fema <- sf::read_sf(fema_file)
-#   
-#   fema
-#   
-#   # message("Fema 100 year flood plain:\n > '", fema_file, "'")
-#   output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file))
-#   output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
-#   
-#   message("Converting geojson files to gpkg...")
-#   
-#   message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'")
-#   
-#   # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path)
-#   # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
-#   ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file)
-#   
-#   system(ogr2ogr_command)
-#   
-#   message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'")
-#   message()
-# }
+# paths to FEMA 100 year flood plain files
+FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+for (fema_file in FEMA_gpkg_paths) {
+  message("Applying final cleaning process to:\n > '", basename(fema_file), "'")
+
+  fema <- 
+    fema_file %>% 
+    sf::read_sf() %>% 
+    sf::st_transform(5070)%>% 
+    dplyr::mutate(
+      fema_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::relocate(fema_id)
+  
+  fema_clean <- hydrofab::clean_geometry(catchments = sf::st_cast(fema, "POLYGON"), ID = "fema_id")
+  # mapview::mapview(fema, col.region = "red") + mapview::mapview(fema_clean, col.region = "green")
+  
+  # message("Fema 100 year flood plain:\n > '", fema_file, "'")
+  # output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file))
+  # output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+
+  message("Applying hydrofab::clean_geometry() \n > '", fema_file)
+  sf::write_sf(
+    fema_clean,
+    fema_file
+  )
+
+  message("Rewritting '", fema_file, "'")
+  message()
+}
 
 # -------------------------------------------------------------------------------------
 # ---- Generate bounding box gpkg for each FEMA FGB ----

From 633109ba9b78d5f16a25eb25fd8b0f7790d75a0a Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 30 Apr 2024 09:14:49 -0700
Subject: [PATCH 20/64] created a preprocess_fema.R script in cs_runner/ that
 takes FEMA FGBs and simplifies, and cleans FGBs and outputs cleaned GPKGs for
 use with nextgen flowlines and transect generation

---
 runners/cs_runner/00_fema.R          |  86 +--------
 runners/cs_runner/config_vars.R      |   8 +-
 runners/cs_runner/download_fema100.R | 156 +----------------
 runners/cs_runner/preprocess_fema.R  | 249 +++++++++++++++++++++++++++
 4 files changed, 264 insertions(+), 235 deletions(-)
 create mode 100644 runners/cs_runner/preprocess_fema.R

diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R
index c2dfee9..5835ff3 100644
--- a/runners/cs_runner/00_fema.R
+++ b/runners/cs_runner/00_fema.R
@@ -1,58 +1,5 @@
 library(dplyr) 
 
-source("runners/cs_runner/config.R")
-
-# transect bucket prefix
-transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
-
-# paths to nextgen datasets and model attribute parquet files
-nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
-FEMA_FGB_files  <- list.files(FEMA_FGB_PATH, full.names = TRUE)
-FEMA_files      <- list.files(FEMA_DISSOLVED_PATH, full.names = TRUE)
-# FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE)
-
-transects_files <- list.files(transects_dir, full.names = FALSE)
-transects_files <- transects_files[!grepl("updated", transects_files)]
-
-# string to fill in "cs_source" column in output datasets
-net_source <- "hydrofabric3D"
-
-# ensure the files are in the same order and matched up by VPU
-path_df <- align_files_by_vpu(
-  x    = nextgen_files,
-  y    = transects_files,
-  base = base_dir
-)
-
-#############################################################
-########################################################################
-# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
-for(i in 1:nrow(path_df)) {
-  # nextgen file and full path
-  nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
-  
-  transect_file <- path_df$y[i]
-  transect_path <- paste0(transects_dir, transect_file)
-  
-  transect_path
-  
-  # # model attributes file and full path
-  # model_attr_file <- path_df$y[i]
-  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
-  # FEMA_BB_files
-  
-  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
-  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
-  FEMA_files
-  
-  fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA100_dissolved/Tennessee-100yr-flood_valid_dissolved.geojson")
-  plot(fema$geometry)
-  
-  }
-
-library(dplyr) 
-
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
 
@@ -68,8 +15,7 @@ transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 # paths to nextgen datasets and model attribute parquet files
 nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
 FEMA_files      <- list.files(FEMA_FGB_PATH, full.names = FALSE)
-FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = TRUE)
-
+FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE)
 transects_files <- list.files(transects_dir, full.names = FALSE)
 transects_files <- transects_files[!grepl("updated", transects_files)]
 
@@ -83,32 +29,11 @@ path_df <- align_files_by_vpu(
   base = base_dir
 )
 
+path_df
 
-# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
-for(i in 1:nrow(path_df)) {
-
-  i = 8
-  
-  # nextgen file and full path
-  nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
-  
-  # transect_file <- path_df$y[i]
-  # transect_path <- paste0(transects_dir, transect_file)
-  # transect_path
-  
-  # # model attributes file and full path
-  # model_attr_file <- path_df$y[i]
-  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
-  FEMA_BB_files
-  
-  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
-  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
-  
-  # read in nextgen data
-  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-
-}
+us_states <- 
+  USAboundaries::us_states() %>% 
+  sf::st_transform(5070)
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 # for(i in 1:nrow(path_df)) {
@@ -127,7 +52,6 @@ for(i in 1:nrow(path_df)) {
   # # model attributes file and full path
   # model_attr_file <- path_df$y[i]
   # model_attr_path <- paste0(model_attr_dir, model_attr_file)
-  FEMA_BB_files
   
   message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
   # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 3019047..71233c9 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -25,12 +25,12 @@ FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100")
 FEMA_GEOJSON_PATH    <- paste0(base_dir, "/FEMA100_geojson")
 FEMA_CLEAN_PATH      <- paste0(base_dir, "/FEMA100_clean")
 FEMA_GPKG_PATH       <- paste0(base_dir, "/FEMA100_gpkg")
-FEMA_GPKG_BB_PATH    <- paste0(base_dir, "/FEMA100_bounding_box")
+FEMA_GPKG_BB_PATH    <- paste0(base_dir, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
 
 # TODO: these can be deleted
-FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified")
-FEMA_DISSOLVED_PATH  <- paste0(base_dir, "/FEMA100_dissolved")
-FEMA_EXPLODED_PATH   <- paste0(base_dir, "/FEMA100_exploded")
+# FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified")
+# FEMA_DISSOLVED_PATH  <- paste0(base_dir, "/FEMA100_dissolved")
+# FEMA_EXPLODED_PATH   <- paste0(base_dir, "/FEMA100_exploded")
 
 
 # ----------------------------------------------------------------------------
diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index c8a8dc2..0606142 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -9,7 +9,7 @@ source("runners/cs_runner/config_vars.R")
 # -------------------------------------------------------------------------------------
 # ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ----
 # -------------------------------------------------------------------------------------
-# create FEMA FGB directory (if not exists) 
+
 if (!dir.exists(FEMA_FGB_PATH)) {
   message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'"))
   dir.create(FEMA_FGB_PATH)
@@ -57,6 +57,10 @@ if (!dir.exists(FEMA_GPKG_BB_PATH)) {
   dir.create(FEMA_GPKG_BB_PATH)
 }
 
+if (!dir.exists(FEMA_FGB_BB_PATH)) {
+  message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'"))
+  dir.create(FEMA_FGB_BB_PATH)
+}
 
 # -------------------------------------------------------------------------------------
 # ---- Get list of FEMA FGB files in S3 bucket ----
@@ -91,7 +95,7 @@ FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
 # -------------------------------------------------------------------------------------
 
 # Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
-for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) {
+for (key in FEMA_BUCKET_KEYS) {
   local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
   
   if(!file.exists(local_save_path)) {
@@ -109,157 +113,9 @@ for (key in FEMA_BUCKET_KEYS[1:length(FEMA_BUCKET_KEYS)]) {
     message("File already exists at:\n > '", local_save_path, "'")
   }
 }
-# -------------------------------------------------------------------------------------
-# ---- Run ogr2ogr to get FGB files into geojson ----
-# -------------------------------------------------------------------------------------
-
-for (key in FEMA_BUCKET_KEYS) {
-  
-  local_fema_path <- paste0(FEMA_FGB_PATH, "/", key)
-  
-  geojson_filename     <- gsub(".fgb", ".geojson", key)
-  geojson_save_path    <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename)
-  
-  message("S3 Key: '", key, "'")
-  message("Converting \n > '", key, "' to geojson '", geojson_filename, "'")
-  
-  ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
-  
-  system(ogr2ogr_command)
-  
-  message("Saved '", geojson_filename, "' saved to: \n > '", geojson_save_path, "'")
-  message()
-}  
-
-# -------------------------------------------------------------------------------------
-# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ----
-# -------------------------------------------------------------------------------------
-
-# paths to FEMA 100 year flood plain files
-FEMA_geojson_paths      <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE)
-# FEMA_BB_paths   <- list.files(FEMA_GPKG_BB_PATH, full.names = TRUE)
-
-for (fema_file in FEMA_geojson_paths) {
-  message("Fema 100 year flood plain:\n > '", basename(fema_file), "'")
-  # message("Fema 100 year flood plain:\n > '", fema_file, "'")
-  output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(fema_file))
-  output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename)
-  
-  message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
-  
-  # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path)
-  # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
-  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, 
-                             ' -simplify 0.15 visvalingam \\', 
-                             ' -dissolve \\', 
-                             ' -explode \\', 
-                             ' -o ', output_path
-                             )
-  system(mapshaper_command)
-  message("Mapshaper command: ", mapshaper_command)
-  message()
-}
 
-# -------------------------------------------------------------------------------------
-# ---- Convert cleaned FEMA geometries to geopackages ----
-# -------------------------------------------------------------------------------------
 
-# paths to FEMA 100 year flood plain files
-FEMA_clean_paths      <- list.files(FEMA_CLEAN_PATH, full.names = TRUE)
-# rmapshaper::ms_explode()
 
-for (fema_file in FEMA_clean_paths) {
-  message("Fema 100 year flood plain:\n > '", basename(fema_file), "'")
-  # message("Fema 100 year flood plain:\n > '", fema_file, "'")
-  output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file))
-  output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
-  
-  message("Converting geojson files to gpkg...")
-  
-  message("Converting \n > '", fema_file, "' to geojson '", output_gpkg_filename, "'")
-  
-  # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', fema_file, ' -simplify 0.15 visvalingam -o ', output_path)
-  # test_file_path <- "/Users/anguswatters/Desktop/lynker-spatial/FEMA100_simplified/Wyoming-100yr-flood_valid_clean.geojson"
-  ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", fema_file)
-  
-  system(ogr2ogr_command)
-  # message("ogr2ogr: ", ogr2ogr_command)
-  message("Saved '", output_gpkg_filename, "' saved to: \n > '", output_path, "'")
-  message()
-}
-
-# # -------------------------------------------------------------------------------------
-# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries  ----
-# # -------------------------------------------------------------------------------------
-# 
-# paths to FEMA 100 year flood plain files
-FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
-
-for (fema_file in FEMA_gpkg_paths) {
-  message("Applying final cleaning process to:\n > '", basename(fema_file), "'")
-
-  fema <- 
-    fema_file %>% 
-    sf::read_sf() %>% 
-    sf::st_transform(5070)%>% 
-    dplyr::mutate(
-      fema_id = 1:dplyr::n()
-    ) %>% 
-    dplyr::relocate(fema_id)
-  
-  fema_clean <- hydrofab::clean_geometry(catchments = sf::st_cast(fema, "POLYGON"), ID = "fema_id")
-  # mapview::mapview(fema, col.region = "red") + mapview::mapview(fema_clean, col.region = "green")
-  
-  # message("Fema 100 year flood plain:\n > '", fema_file, "'")
-  # output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(fema_file))
-  # output_path <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
-
-  message("Applying hydrofab::clean_geometry() \n > '", fema_file)
-  sf::write_sf(
-    fema_clean,
-    fema_file
-  )
-
-  message("Rewritting '", fema_file, "'")
-  message()
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Generate bounding box gpkg for each FEMA FGB ----
-# -------------------------------------------------------------------------------------
-
-for (key in FEMA_BUCKET_KEYS) {
- 
-  local_fema_path <- paste0(FEMA_FGB_PATH, "/", key)
-  
-  gpkg_filename   <- gsub(".fgb", "_bb.gpkg", key)
-  bb_save_path    <- paste0(FEMA_GPKG_BB_PATH, "/", gpkg_filename)
-  
-  message("S3 Key: '", key, "'")
-  message("Local FEMA file:\n > '", local_fema_path, "'")
-  message("Local output FEMA bounding box file:\n > '", bb_save_path, "'")
-  
-  # fema <- sf::read_sf(local_fema_path)
-  
-  fema_bb <- 
-    local_fema_path %>% 
-    sf::read_sf() %>% 
-    sf::st_bbox() %>% 
-    sf::st_as_sfc() %>% 
-    sf::st_as_sf() %>% 
-    dplyr::mutate(
-      fema_fgb      = key,
-      fema_fgb_path = local_fema_path,
-      state         = gsub("-100yr-flood_valid.fgb", "", key)
-    ) %>% 
-    dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% 
-    sf::st_transform(5070)
-  
-  message("Saving FEMA bounding box file:\n > '", bb_save_path, "'")
-  
-  sf::write_sf(fema_bb, bb_save_path)
-  message()
-}  
   
 
 
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
new file mode 100644
index 0000000..aa5c9aa
--- /dev/null
+++ b/runners/cs_runner/preprocess_fema.R
@@ -0,0 +1,249 @@
+# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3
+# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages 
+
+# Processing steps:
+# - Convert FGBs to GEOJSON (via ogr2ogr)
+# - Simplifies
+# - Dissolves
+# - Explodes
+# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr)
+# - Apply hydrofab::clean_geometry()
+# - Partition FEMA 100 geometries by VPU      # TODO still
+# - Get FEMA bounding box geometries (maybe)
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+library(dplyr)
+library(sf)
+
+# -------------------------------------------------------------------------------------
+# ---- OVERWRITE_FEMA_FILES constant logical ----
+# ---- > if TRUE, processing steps will be run again 
+#          and overwrite existing previously processed files
+# -------------------------------------------------------------------------------------
+
+# Default is TRUE (i.e. a fresh processing run is done from start to finish)
+OVERWRITE_FEMA_FILES <- TRUE
+
+# -------------------------------------------------------------------------------------
+# ---- Create directories (if they do NOT exist) ----
+# -------------------------------------------------------------------------------------
+
+# create geojsons directory (if not exists) 
+if (!dir.exists(FEMA_GEOJSON_PATH)) {
+  message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'"))
+  dir.create(FEMA_GEOJSON_PATH)
+}
+
+# create directory for cleaned FEMA geometries (if not exists) 
+if (!dir.exists(FEMA_CLEAN_PATH)) {
+  message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'"))
+  dir.create(FEMA_CLEAN_PATH)
+}
+
+# create directory for cleaned FEMA geometries as geopackages (if not exists) 
+if (!dir.exists(FEMA_GPKG_PATH)) {
+  message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
+  dir.create(FEMA_GPKG_PATH)
+}
+
+# create FEMA GPKG Bounding Boxes directory (if not exists)
+if (!dir.exists(FEMA_GPKG_BB_PATH)) {
+  message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
+  dir.create(FEMA_GPKG_BB_PATH)
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Get paths to downloaded FEMA 100 FGBs ----
+# -------------------------------------------------------------------------------------
+
+FEMA_FILENAMES  <- list.files(FEMA_FGB_PATH, full.names = FALSE)
+FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
+
+# -------------------------------------------------------------------------------------
+# ---- Run ogr2ogr to get FGB files into geojson ----
+# -------------------------------------------------------------------------------------
+
+for (file in FEMA_FILENAMES) {
+  
+  local_fema_path <- paste0(FEMA_FGB_PATH, "/", file)
+  
+  geojson_filename     <- gsub(".fgb", ".geojson", file)
+  geojson_save_path    <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename)
+  
+  message("FEMA filename: '", file, "'")
+  message("Converting \n > '", file, "' to geojson '", geojson_filename, "'")
+  
+  geojson_exists <- file.exists(geojson_save_path)
+  
+  message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  # ogr2ogr command converting FGBs to GEOJSON for mapshaper processing
+  ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
+  
+  if (OVERWRITE_FEMA_FILES) {
+    system(ogr2ogr_command)
+    message("Writting '", geojson_filename, "' to: \n > '", geojson_save_path, "'")
+  }
+  
+  message()
+}  
+
+# -------------------------------------------------------------------------------------
+# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ----
+# -------------------------------------------------------------------------------------
+
+# paths to FEMA 100 year flood plain files
+FEMA_geojson_paths      <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE)
+
+for (file in FEMA_geojson_paths) {
+  
+  message("Simplify, dissolve, explode > '", basename(file), "'")
+  # message("Fema 100 year flood plain:\n > '", file, "'")
+  output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(file))
+  output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename)
+  
+  clean_geojson_exists <- file.exists(output_path)
+  message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, 
+                             ' -simplify 0.15 visvalingam \\', 
+                             ' -dissolve \\', 
+                             ' -explode \\', 
+                             ' -o ', output_path
+  )
+  
+  if (OVERWRITE_FEMA_FILES) {
+    message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
+    system(mapshaper_command)
+    message("Writting '", output_clean_filename, "' to: \n > '", output_path, "'")
+  }
+  
+  message()
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Convert cleaned FEMA geometries to geopackages ----
+# -------------------------------------------------------------------------------------
+
+# paths to FEMA 100 year flood plain files
+FEMA_clean_paths      <- list.files(FEMA_CLEAN_PATH, full.names = TRUE)
+
+for (file in FEMA_clean_paths) {
+  message("Fema 100 year flood plain:\n > '", basename(file), "'")
+  
+  output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(file))
+  output_path          <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+  
+  message("Converting GEOJSON file to GPKG:\n > '", basename(file), "' > '", output_gpkg_filename, "'")
+  
+  # system(ogr2ogr_command)
+  
+  clean_gpkg_exists <- file.exists(output_path)
+  
+  message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file)
+  # ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file)
+  
+  if (OVERWRITE_FEMA_FILES) {
+    system(ogr2ogr_command)
+    message("Writting '", output_gpkg_filename, "' to: \n > '", output_path, "'")
+  }
+  message()
+}
+
+# # -------------------------------------------------------------------------------------
+# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries  ----
+# # -------------------------------------------------------------------------------------
+# 
+# paths to FEMA 100 year flood plain files
+FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+for (file_path in FEMA_gpkg_paths) {
+  message("Applying hydrofab::clean_geometry() to:\n > '", basename(file_path), "'")
+  
+  fema <-
+    file_path %>%
+    sf::read_sf() %>%
+    sf::st_transform(5070) %>%
+    sf::st_cast("POLYGON") %>%
+    dplyr::mutate(
+      fema_id = 1:dplyr::n()
+    ) %>%
+    dplyr::select(fema_id, geometry = geom)
+
+  message(" > ", nrow(fema), " POLYGONs")
+  message("Start time: ", Sys.time())
+
+  fema_clean <- hydrofab::clean_geometry(
+    catchments = fema,
+    ID         = "fema_id"
+    )
+  
+  fema_clean <-
+    fema_clean %>%
+    dplyr::mutate(
+      source = basename(file_path),
+      state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+    ) %>%
+    dplyr::select(fema_id, source, state, areasqkm, geometry)
+
+  message("End time: ", Sys.time())
+  
+  # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ])
+  # mapview::mapview(fema[1, ], col.regions = "red") +
+  # mapview::mapview(fema_clean[1, ], col.regions = "green") +
+  # mapview::mapview(geom_diff, col.regions = "white")
+  
+  if (OVERWRITE_FEMA_FILES) {
+    message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
+    sf::write_sf(
+      fema_clean,
+      file_path
+    )
+  }
+  message()
+  
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Generate bounding box gpkg for each FEMA FGB ----
+# -------------------------------------------------------------------------------------
+
+for (key in FEMA_FILENAMES) {
+ 
+  local_fema_path <- paste0(FEMA_FGB_PATH, "/", key)
+  
+  gpkg_filename   <- gsub(".fgb", "_bb.gpkg", key)
+  bb_save_path    <- paste0(FEMA_FGB_BB_PATH, "/", gpkg_filename)
+  
+  message("S3 Key: '", key, "'")
+  message("Local FEMA file:\n > '", local_fema_path, "'")
+  message("Local output FEMA bounding box file:\n > '", bb_save_path, "'")
+  
+  # fema <- sf::read_sf(local_fema_path)
+  
+  fema_bb <- 
+    local_fema_path %>% 
+    sf::read_sf() %>% 
+    sf::st_bbox() %>% 
+    sf::st_as_sfc() %>% 
+    sf::st_as_sf() %>% 
+    dplyr::mutate(
+      fema_fgb      = key,
+      fema_fgb_path = local_fema_path,
+      state         = gsub("-100yr-flood_valid.fgb", "", key)
+    ) %>% 
+    dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% 
+    sf::st_transform(5070)
+  
+  message("Saving FEMA bounding box file:\n > '", bb_save_path, "'")
+  
+  sf::write_sf(fema_bb, bb_save_path)
+  message()
+}  

From 703fadfd334dab339a8a32c60e552836a2d18900 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 30 Apr 2024 18:29:53 -0700
Subject: [PATCH 21/64] partioning fema floodplains by vpu code

---
 runners/cs_runner/config_vars.R     |  12 +++
 runners/cs_runner/preprocess_fema.R | 151 ++++++++++++++++++++++++++++
 runners/cs_runner/utils.R           |  28 ++++++
 3 files changed, 191 insertions(+)
 create mode 100644 runners/cs_runner/utils.R

diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 71233c9..7774d5b 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -27,6 +27,18 @@ FEMA_CLEAN_PATH      <- paste0(base_dir, "/FEMA100_clean")
 FEMA_GPKG_PATH       <- paste0(base_dir, "/FEMA100_gpkg")
 FEMA_GPKG_BB_PATH    <- paste0(base_dir, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
 
+FEMA_BY_VPU_PATH     <- paste0(base_dir, "/FEMA_BY_VPU")
+VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+
+FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# FEMA_VPU_SUBFOLDERS <- paste0(
+#                         FEMA_BY_VPU_PATH, "/VPU_",
+#                         unlist(
+#                           lapply(list.files(nextgen_dir, full.names = FALSE), function(vpu_file_names) {
+#                             unlist(regmatches(vpu_file_names,  gregexpr("\\d+[A-Za-z]*", vpu_file_names)))})
+#                           )
+#                         )
 # TODO: these can be deleted
 # FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified")
 # FEMA_DISSOLVED_PATH  <- paste0(base_dir, "/FEMA100_dissolved")
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index aa5c9aa..00d31e6 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -13,9 +13,12 @@
 
 # load config variables
 source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
 
 library(dplyr)
 library(sf)
+library(geos)
 
 # -------------------------------------------------------------------------------------
 # ---- OVERWRITE_FEMA_FILES constant logical ----
@@ -48,6 +51,24 @@ if (!dir.exists(FEMA_GPKG_PATH)) {
   dir.create(FEMA_GPKG_PATH)
 }
 
+# create directory for FEMA geomteries partioned by VPU
+if (!dir.exists(FEMA_BY_VPU_PATH)) {
+  message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'"))
+  dir.create(FEMA_BY_VPU_PATH)
+}
+
+for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) {
+  # create directory for FEMA geomteries by VPU
+  if (!dir.exists(VPU_SUBFOLDER)) {
+    message("Creating FEMA VPU subfolder...")
+    message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\nCreating directory:\n > '", VPU_SUBFOLDER, "'"))
+    dir.create(VPU_SUBFOLDER)
+  }
+}
+
+# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+
 # create FEMA GPKG Bounding Boxes directory (if not exists)
 if (!dir.exists(FEMA_GPKG_BB_PATH)) {
   message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
@@ -211,6 +232,136 @@ for (file_path in FEMA_gpkg_paths) {
   
 }
 
+# # -------------------------------------------------------------------------------------
+# # ---- Partion parts of each FEMA GPKGs to the a Nextgen VPU ---- 
+# # -------------------------------------------------------------------------------------
+
+# Clean FEMA GPKG files
+FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+# paths to nextgen datasets and model attribute parquet files
+NEXTGEN_FILENAMES  <- list.files(nextgen_dir, full.names = FALSE)
+NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
+
+for (file_path in FEMA_CLEAN_GPKG_PATHS) {
+  fema_file <- basename(file_path)
+  message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'")
+  
+  # read in fema polygons
+  fema <- sf::read_sf(file_path)
+  
+  for (nextgen_path in NEXTGEN_FILE_PATHS) {
+    nextgen_basename <- basename(nextgen_path)
+    vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
+    
+    message("VPU: ", vpu)   
+    message("- nextgen gpkg:\n > '", nextgen_path, "'")   
+    message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
+    
+    # nextgen_path <- NEXTGEN_FILE_PATHS[13]
+    
+    # read in nextgen flowlines 
+    flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+
+    # get the FEMA polygons that intersect with the nextgen flowlines
+    fema_intersect <- polygons_with_line_intersects(fema, flines)
+
+    fema_in_nextgen <-  nrow(fema_intersect) != 0
+    
+    message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
+
+    if(fema_in_nextgen) {
+      
+      # create filepaths
+      vpu_subfolder      <- paste0("VPU_", vpu)
+      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
+      # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
+      
+      fema_intersect <-
+        fema_intersect %>%
+        dplyr::mutate(
+          vpu = vpu
+        ) %>%
+        dplyr::select(vpu, fema_id, source, state,
+                      areasqkm, geom)
+      
+      # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
+
+      fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
+      fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
+      
+
+      if (OVERWRITE_FEMA_FILES) {
+        message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
+        
+        sf::write_sf(
+          fema_intersect,
+          fema_vpu_path
+        )
+      }
+
+        
+    }
+    message()
+  }
+  
+  
+  message(
+    "--------------------------------------------------------------\n", 
+    "Completed all VPU intersections for: \n > '", fema_file, "'",
+    "\n--------------------------------------------------------------\n"
+    )
+  
+}
+#  
+#   text = "nextgen_03W.gpkg"
+#   VPU <- unlist(regmatches(text, 
+#                         gregexpr("\\d+[A-Za-z]*", text)
+#                         ))
+# 
+#   fema <-
+#     file_path %>%
+#     sf::read_sf() %>%
+#     sf::st_transform(5070) %>%
+#     sf::st_cast("POLYGON") %>%
+#     dplyr::mutate(
+#       fema_id = 1:dplyr::n()
+#     ) %>%
+#     dplyr::select(fema_id, geometry = geom)
+# 
+#   message(" > ", nrow(fema), " POLYGONs")
+#   message("Start time: ", Sys.time())
+# 
+#   fema_clean <- hydrofab::clean_geometry(
+#     catchments = fema,
+#     ID         = "fema_id"
+#     )
+#   
+#   fema_clean <-
+#     fema_clean %>%
+#     dplyr::mutate(
+#       source = basename(file_path),
+#       state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+#     ) %>%
+#     dplyr::select(fema_id, source, state, areasqkm, geometry)
+# 
+#   message("End time: ", Sys.time())
+#   
+#   # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ])
+#   # mapview::mapview(fema[1, ], col.regions = "red") +
+#   # mapview::mapview(fema_clean[1, ], col.regions = "green") +
+#   # mapview::mapview(geom_diff, col.regions = "white")
+#   
+#   if (OVERWRITE_FEMA_FILES) {
+#     message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
+#     sf::write_sf(
+#       fema_clean,
+#       file_path
+#     )
+#   }
+#   message()
+#   
+# }
 # -------------------------------------------------------------------------------------
 # ---- Generate bounding box gpkg for each FEMA FGB ----
 # -------------------------------------------------------------------------------------
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
new file mode 100644
index 0000000..ae40b70
--- /dev/null
+++ b/runners/cs_runner/utils.R
@@ -0,0 +1,28 @@
+#' Get the polygons that interesect with any of the linestring geometries
+#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes
+#' @param polygons polygon sf object. Default is NULL
+#' @param lines linestring sf object. Default is NULL.
+#'
+#' @return sf dataframe of polygons that intersect with the linestrings
+polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) {
+  
+  if (is.null(polygons)) {
+    stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries")
+  }
+  
+  if (is.null(lines)) {
+    stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries")
+  }
+  
+  # Convert the SF geometries to geos geometries
+  polygons_geos   <- geos::as_geos_geometry(polygons)
+  lines_geos      <- geos::as_geos_geometry(lines)
+  
+  # create an index between the polygons and linestrings
+  lines_index <-  geos::geos_intersects_matrix(polygons_geos, lines_geos)
+  
+  # get the polygons that have atleast 1 intersection with the 'lines'
+  polygons_with_lines <- polygons[lengths(lines_index) != 0, ]
+  
+  return(polygons_with_lines)
+}

From 5d36e1ecd35967b5619d5fbdadda0f22f56af51c Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 1 May 2024 13:59:07 -0700
Subject: [PATCH 22/64] small minor cleanups

---
 runners/cs_runner/02_cs_pts.R       | 15 +++------
 runners/cs_runner/03_inject_ml.R    | 23 -------------
 runners/cs_runner/preprocess_fema.R | 51 -----------------------------
 3 files changed, 5 insertions(+), 84 deletions(-)

diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 7d5cc8e..4b18b93 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -13,7 +13,6 @@ library(sf)
 
 # cross section bucket prefix
 cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/")
-# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
 
 # transect bucket prefix
 transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
@@ -48,9 +47,8 @@ path_df <- align_files_by_vpu(
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 13:nrow(path_df)) {
-  
-  # i = 1
+for (i in 1:nrow(path_df)) {
+
   
   start <- Sys.time()
   
@@ -102,8 +100,7 @@ for (i in 13:nrow(path_df)) {
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 1: Extract cs points from DEM ----
   # ----------------------------------------------------------------------------------------------------------------
-  # system.time({
-    
+
   # get cross section point elevations
   cs_pts <- hydrofabric3D::cross_section_pts(
     cs             = transects,
@@ -111,7 +108,7 @@ for (i in 13:nrow(path_df)) {
     min_pts_per_cs = 10,
     dem            = DEM_URL
   )
-  # })
+
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
   # ----------------------------------------------------------------------------------------------------------------
@@ -313,9 +310,7 @@ for (i in 13:nrow(path_df)) {
                   ) %>% 
                     dplyr::select(-cs_id, -tmp_id) %>% 
                     dplyr::rename(cs_id = new_cs_id)
-  
-  ###################################### 
-  
+
   # ----------------------------------------------------------------------------------------------------------------
   # ---- Cross section points parquet to S3 ----
   # ----------------------------------------------------------------------------------------------------------------
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index cc0ee52..c607e9f 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -35,25 +35,13 @@ path_df <- align_files_by_vpu(
 # ML Outputs
 ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
 
-# cs_ml_data_path <- "/Users/anguswatters/Desktop/cs_pts_for_ml_tests/nextgen_06_cross_sections_for_ml.parquet"
-# ml_output_path <- "/Users/anguswatters/Desktop/lynker-spatial/ml-outputs/channel_ml_outputs.parquet"
-# conus_network_path <- 's3://lynker-spatial/v20.1/conus_net.parquet'
-
-# for (i in 1:5000) {
-#   start <- Sys.time()
-#   message(i, " - time: '", start, "'")
-# }
-
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
   
-  # i = 8
-  
   start <- round(Sys.time())
   
-  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -62,10 +50,6 @@ for (i in 1:nrow(path_df)) {
   cs_file      <- path_df$y[i]
   cs_pts_path  <- paste0(cs_pts_dir, cs_file)
   
-  # # model attributes file and full path
-  # ref_file <- path_df$ref_file[i]
-  # ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
-  
   # current VPU being processed
   VPU = path_df$vpu[i]
   
@@ -141,10 +125,6 @@ for (i in 1:nrow(path_df)) {
   # ---- Subset ML data to specific VPU and add "hy_id" column to ML data -----
   # ----------------------------------------------------------------------------------------------------------------
   
-  
-  # # ML Outputs
-  # ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
-  
   # Join hy_id onto the ML outputs and then remove the rows WITHOUT matches in hy_id
   #  this should give us a (nearly) one-to-one cross walk between "hy_id" in the cross section points 
   # and "hf_id" in the ML outputs dataset
@@ -198,9 +178,6 @@ for (i in 1:nrow(path_df)) {
     ) %>% 
     dplyr::select(-bottom_length)
   
-  # cs_pts %>% dplyr::filter(owp_tw_inchan <= 0 | owp_tw_bf <= 0)
-  # dplyr::left_join( stream_order, by = "hy_id") 
-  
   # extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
   # TODO: look at this stuff with Arash (04/09/2024)
   missing_cs <- 
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 00d31e6..9e38680 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -258,8 +258,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     message("- nextgen gpkg:\n > '", nextgen_path, "'")   
     message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
     
-    # nextgen_path <- NEXTGEN_FILE_PATHS[13]
-    
     # read in nextgen flowlines 
     flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
 
@@ -313,55 +311,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     )
   
 }
-#  
-#   text = "nextgen_03W.gpkg"
-#   VPU <- unlist(regmatches(text, 
-#                         gregexpr("\\d+[A-Za-z]*", text)
-#                         ))
-# 
-#   fema <-
-#     file_path %>%
-#     sf::read_sf() %>%
-#     sf::st_transform(5070) %>%
-#     sf::st_cast("POLYGON") %>%
-#     dplyr::mutate(
-#       fema_id = 1:dplyr::n()
-#     ) %>%
-#     dplyr::select(fema_id, geometry = geom)
-# 
-#   message(" > ", nrow(fema), " POLYGONs")
-#   message("Start time: ", Sys.time())
-# 
-#   fema_clean <- hydrofab::clean_geometry(
-#     catchments = fema,
-#     ID         = "fema_id"
-#     )
-#   
-#   fema_clean <-
-#     fema_clean %>%
-#     dplyr::mutate(
-#       source = basename(file_path),
-#       state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
-#     ) %>%
-#     dplyr::select(fema_id, source, state, areasqkm, geometry)
-# 
-#   message("End time: ", Sys.time())
-#   
-#   # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ])
-#   # mapview::mapview(fema[1, ], col.regions = "red") +
-#   # mapview::mapview(fema_clean[1, ], col.regions = "green") +
-#   # mapview::mapview(geom_diff, col.regions = "white")
-#   
-#   if (OVERWRITE_FEMA_FILES) {
-#     message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
-#     sf::write_sf(
-#       fema_clean,
-#       file_path
-#     )
-#   }
-#   message()
-#   
-# }
 # -------------------------------------------------------------------------------------
 # ---- Generate bounding box gpkg for each FEMA FGB ----
 # -------------------------------------------------------------------------------------

From 59a8d0af49628ccf9ecf6966fc197535e3506f9e Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 2 May 2024 14:42:01 -0700
Subject: [PATCH 23/64] random cleanups

---
 runners/cs_runner/00_fema.R     | 6 ------
 runners/cs_runner/config_vars.R | 7 -------
 2 files changed, 13 deletions(-)

diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R
index 5835ff3..304268f 100644
--- a/runners/cs_runner/00_fema.R
+++ b/runners/cs_runner/00_fema.R
@@ -3,12 +3,6 @@ library(dplyr)
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
 
-# # # # load libraries
-# library(hydrofabric3D)
-# library(dplyr)
-# library(sf)
-# install.packages("devtools")
-
 # transect bucket prefix
 transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 7774d5b..47483c9 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -3,7 +3,6 @@
 # ----------------------------------------------------------------------------
 # ---- General paths and constants variables ----
 # ----------------------------------------------------------------------------
-
 base_dir     <- '/Users/anguswatters/Desktop/lynker-spatial'
 
 # AWS profile to run CLI commands 
@@ -31,7 +30,6 @@ FEMA_BY_VPU_PATH     <- paste0(base_dir, "/FEMA_BY_VPU")
 VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
 
 FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
-
 # FEMA_VPU_SUBFOLDERS <- paste0(
 #                         FEMA_BY_VPU_PATH, "/VPU_",
 #                         unlist(
@@ -39,11 +37,6 @@ FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
 #                             unlist(regmatches(vpu_file_names,  gregexpr("\\d+[A-Za-z]*", vpu_file_names)))})
 #                           )
 #                         )
-# TODO: these can be deleted
-# FEMA_SIMPLIFIED_PATH <- paste0(base_dir, "/FEMA100_simplified")
-# FEMA_DISSOLVED_PATH  <- paste0(base_dir, "/FEMA100_dissolved")
-# FEMA_EXPLODED_PATH   <- paste0(base_dir, "/FEMA100_exploded")
-
 
 # ----------------------------------------------------------------------------
 # ---- Cross section point extraction constant variables ----

From d8742c152cf601b69177555a555e563b8d38cabf Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 6 May 2024 11:38:19 -0700
Subject: [PATCH 24/64] added ogr2ogr commands to merge together all femas
 within a VPU into single gpkgs

---
 runners/cs_runner/00_fema.R               |  1 +
 runners/cs_runner/add_fema_to_transects.R | 70 +++++++++++++++++++++++
 runners/cs_runner/preprocess_fema.R       | 44 ++++++++++++++
 3 files changed, 115 insertions(+)
 create mode 100644 runners/cs_runner/add_fema_to_transects.R

diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R
index 304268f..11f55be 100644
--- a/runners/cs_runner/00_fema.R
+++ b/runners/cs_runner/00_fema.R
@@ -52,6 +52,7 @@ us_states <-
   
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
   flines_bb <- 
     flines %>% 
     sf::st_bbox() %>% 
diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
new file mode 100644
index 0000000..5964c1f
--- /dev/null
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -0,0 +1,70 @@
+library(dplyr) 
+
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# transect bucket prefix
+transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+
+# paths to nextgen datasets and model attribute parquet files
+nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
+# FEMA_files      <- list.files(FEMA_FGB_PATH, full.names = FALSE)
+# FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE)
+transects_files <- list.files(transects_dir, full.names = FALSE)
+transects_files <- transects_files[!grepl("updated", transects_files)]
+
+FEMA_VPU_SUBFOLDERS
+# string to fill in "cs_source" column in output datasets
+FEMA_files
+net_source <- "hydrofabric3D"
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = nextgen_files,
+  y    = transects_files,
+  base = base_dir
+)
+
+path_df
+
+us_states <- 
+  USAboundaries::us_states() %>% 
+  sf::st_transform(5070)
+
+# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
+# for(i in 1:nrow(path_df)) {
+  
+  i = 8
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(transects_dir, transect_file)
+  
+  VPU <- path_df$vpu[i]
+  transect_path
+  
+  # # model attributes file and full path
+  # model_attr_file <- path_df$y[i]
+  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  
+  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
+  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+
+  
+  FEMA_vpu_dir <- FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", VPU), basename(FEMA_VPU_SUBFOLDERS))]
+  
+  list.files(FEMA_vpu_dir)
+  
+  VPU
+  
+  fema <- sf::read_sf()
+  
+  
+  
+    
\ No newline at end of file
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 9e38680..c1d9772 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -311,6 +311,50 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     )
   
 }
+
+# -------------------------------------------------------------------------------------
+# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
+# -------------------------------------------------------------------------------------
+
+DELETE_STAGING_GPKGS <- TRUE
+
+for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+  
+  message("Merging files in VPU dir '", vpu_dir, "'")
+  fema_vpu_gpkgs <- list.files(vpu_dir, full.names = TRUE)
+  
+  master_name <- paste0("fema_", tolower(basename(vpu_dir)))
+  master_gpkg_name <- paste0(master_name, ".gpkg")
+  
+  for(gpkg_file in fema_vpu_gpkgs) {
+    message("- Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
+            paste0(vpu_dir, "/", master_gpkg_name), "'")
+    
+    ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",   
+                                    paste0(vpu_dir, "/", master_gpkg_name), 
+                                    " ", gpkg_file
+                                    )
+    
+    if (OVERWRITE_FEMA_FILES) {
+      system(ogr2ogr_merge_command)
+    }
+    
+    if(DELETE_STAGING_GPKGS) {
+      message("Deleting individual gpkgs from\n > '", vpu_dir, "'")
+      
+      files_to_delete <- fema_vpu_gpkgs[!grepl(master_gpkg_name, fema_vpu_gpkgs)]
+      remove_gpkg_cmds <- paste0("rm ", files_to_delete)
+      
+      for (remove_cmd in remove_gpkg_cmds) {
+        message("  >  '", remove_cmd, "'")
+        system(remove_cmd)
+      }
+    }
+    message()
+  }
+   
+}
+
 # -------------------------------------------------------------------------------------
 # ---- Generate bounding box gpkg for each FEMA FGB ----
 # -------------------------------------------------------------------------------------

From 4d4d0570217c44e8cb52c9def50aaca4688d3ba9 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 6 May 2024 16:10:19 -0700
Subject: [PATCH 25/64] work in progress on extending transects according to
 fema polygons

---
 runners/cs_runner/add_fema_to_transects.R | 79 ++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 5964c1f..5cceadb 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -59,12 +59,87 @@ us_states <-
   
   FEMA_vpu_dir <- FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", VPU), basename(FEMA_VPU_SUBFOLDERS))]
   
-  list.files(FEMA_vpu_dir)
+  vpu_fema_file <- list.files(FEMA_vpu_dir, full.names = TRUE)
   
+  FEMA_vpu_dir
   VPU
   
-  fema <- sf::read_sf()
+  fema <- sf::read_sf(vpu_fema_file)
+    
+   
+  transects <- sf::read_sf(transect_path)
   
+  transects_geos <- geos::as_geos_geometry(transects)
   
+  fema_geos <- geos::as_geos_geometry(fema)     
+
+  
+  fema_geos  
+  fema_transects_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) 
+  transects_fema_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) 
+  
+  fema_transects_matrix
+  # get the polygons that have atleast 1 intersection with the 'lines'
+  transects_with_fema <- transects[lengths(fema_transects_matrix) != 0, ]
+  fema_with_transects <- fema[lengths(transects_fema_matrix) != 0, ]
+  
+  lengths(transects_fema_matrix)
+  mapview::mapview(transects_with_fema, color = "green") + fema_with_transects
+  unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30]
+  transects  %>% 
+    hydrofabric3D::add_tmp_id(transects) %>% 
+    .$tmp_id %>% 
+    unique() %>% .[1:30]
+  trans_subset <- 
+    transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in%  unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30])
+  
+  fema_subset <- 
+    fema %>% 
+    dplyr::filter(fema_id == "1268")
+
+  extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(500, nrow(trans_subset)))
+  extended
+  clipped_trans <- rmapshaper::ms_clip(extended, fema)
+  
+  rmapshaper::ms_clip(extended, fema_subset)
+  mapview::mapview(trans_subset, color = "red") + 
+    mapview::mapview(extended, color = "yellow") +
+    # mapview::mapview(  sf::st_difference(extended, fema_subset), color = "green") +
+    mapview::mapview(clipped_trans,  color = "green") +
+    fema 
+  
+   rep(50, nrow(trans_subset))
+  extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(50, nrow(trans_subset)))
+  
+  
+  hydrofabric3D::geos_extend_line(trans_subset, 50) %>% 
+    sf::st_as_sf() %>% mapview::mapview()
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+ 
+  
+  
+       
+  
+     
+  
+     
+  
+     
+  
+    
   
     
\ No newline at end of file

From 89242b264cc2e1457818e346be42d103fe10ac8b Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 7 May 2024 16:01:22 -0700
Subject: [PATCH 26/64] small changes to dir structure for final merged fema
 polygons

---
 runners/cs_runner/add_fema_to_transects.R | 17 +++++
 runners/cs_runner/preprocess_fema.R       | 85 +++++++++++++++++------
 2 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 5cceadb..1a33558 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -80,6 +80,23 @@ us_states <-
   
   fema_transects_matrix
   # get the polygons that have atleast 1 intersection with the 'lines'
+  fema_transects_matrix[[557]]
+  fema_tmp <- fema[fema_transects_matrix[[557]], ]
+  trans_tmp <- transects[557, ]
+  
+ fema_dissolve <-  rmapshaper::ms_dissolve(fema_tmp)
+ fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% 
+   # rmapshaper::ms_dissolve(field = "state")
+ sf::st_union()
+  mapview::mapview(trans_tmp, color = "green") + 
+    mapview::mapview(fema_tmp[1, ], col.region = "red") + 
+    mapview::mapview(fema_tmp[2, ],  col.region = "dodgerblue") + 
+    mapview::mapview(fema_simple,  col.region = "yellow")
+  transects
+  
+  
+  fema_transects_matrix
+  
   transects_with_fema <- transects[lengths(fema_transects_matrix) != 0, ]
   fema_with_transects <- fema[lengths(transects_fema_matrix) != 0, ]
   
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index c1d9772..6b6c2f8 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -59,11 +59,32 @@ if (!dir.exists(FEMA_BY_VPU_PATH)) {
 
 for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) {
   # create directory for FEMA geomteries by VPU
+  # message(VPU_SUBFOLDER)
+  
+  state_dir  = paste0(VPU_SUBFOLDER, "/states/")
+  merged_dir = paste0(VPU_SUBFOLDER, "/merged/")
+  
   if (!dir.exists(VPU_SUBFOLDER)) {
     message("Creating FEMA VPU subfolder...")
-    message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\nCreating directory:\n > '", VPU_SUBFOLDER, "'"))
+    message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n  Creating directory:\n > '", VPU_SUBFOLDER, "'"))
     dir.create(VPU_SUBFOLDER)
   }
+  
+  if (!dir.exists(state_dir)) { 
+      message("Creating FEMA VPU states subfolder...")
+      message(paste0("'/", basename(state_dir), "' directory does not exist...\n  Creating directory:\n > '", state_dir, "'"))
+      
+      dir.create(state_dir)
+     
+     }
+  
+  if (!dir.exists(merged_dir)) { 
+    message("Creating FEMA VPU merged subfolder...")
+    message(paste0("'/", basename(merged_dir), "' directory does not exist...\n  Creating directory:\n > '", merged_dir, "'"))
+    
+    dir.create(merged_dir)
+    
+    }
 }
 
 # FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
@@ -242,6 +263,7 @@ FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 # paths to nextgen datasets and model attribute parquet files
 NEXTGEN_FILENAMES  <- list.files(nextgen_dir, full.names = FALSE)
 NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
+# OVERWRITE_FEMA_FILES
 
 for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   fema_file <- basename(file_path)
@@ -272,7 +294,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
       
       # create filepaths
       vpu_subfolder      <- paste0("VPU_", vpu)
-      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
+      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
       # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
       
       fema_intersect <-
@@ -316,43 +338,62 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
 # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
 # -------------------------------------------------------------------------------------
 
-DELETE_STAGING_GPKGS <- TRUE
+DELETE_STAGING_GPKGS <- FALSE 
+
+# FEMA_VPU_SUBFOLDERS
 
 for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+  message("Merging files in '", basename(vpu_dir), "' directory...")
+  
+  # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06'
+  vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
+  
+  states_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/states"), vpu_subdirs)]
+  merged_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/merged"), vpu_subdirs)]
   
-  message("Merging files in VPU dir '", vpu_dir, "'")
-  fema_vpu_gpkgs <- list.files(vpu_dir, full.names = TRUE)
+  # fema state geopackages partioned for the specific VPU
+  fema_state_gpkgs <- list.files(states_dir, full.names = TRUE)
   
-  master_name <- paste0("fema_", tolower(basename(vpu_dir)))
+  master_name      <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
   master_gpkg_name <- paste0(master_name, ".gpkg")
   
-  for(gpkg_file in fema_vpu_gpkgs) {
-    message("- Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
-            paste0(vpu_dir, "/", master_gpkg_name), "'")
+  # path to the merged directory where the final merged geopackge will end up
+  master_filepath  <- paste0(merged_dir, "/", master_gpkg_name)
+  
+  for(gpkg_file in fema_state_gpkgs) {
+    # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
+    #         basename(gpkg_file), " > ", basename(master_filepath), 
+    #         "'")
+    message(" > '", 
+            basename(gpkg_file), " > ", basename(master_filepath), 
+            "'")
     
     ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",   
-                                    paste0(vpu_dir, "/", master_gpkg_name), 
+                                    master_filepath, 
                                     " ", gpkg_file
                                     )
     
     if (OVERWRITE_FEMA_FILES) {
       system(ogr2ogr_merge_command)
     }
+  }
+  
+  if(DELETE_STAGING_GPKGS) {
+    message(" - Deleting individual gpkgs from '/states' directory...")
+    # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'")
     
-    if(DELETE_STAGING_GPKGS) {
-      message("Deleting individual gpkgs from\n > '", vpu_dir, "'")
-      
-      files_to_delete <- fema_vpu_gpkgs[!grepl(master_gpkg_name, fema_vpu_gpkgs)]
-      remove_gpkg_cmds <- paste0("rm ", files_to_delete)
-      
-      for (remove_cmd in remove_gpkg_cmds) {
-        message("  >  '", remove_cmd, "'")
-        system(remove_cmd)
-      }
+    remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
+    
+    for (remove_cmd in remove_gpkg_cmds) {
+      message("  >  '", remove_cmd, "'")
+      system(remove_cmd)
     }
-    message()
   }
-   
+  
+  # message()
+  message("Merge complete!")
+  message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'")
+  message()
 }
 
 # -------------------------------------------------------------------------------------

From c8a1ff174e28c692103dc9a5c86fa13df19e0e0f Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 8 May 2024 16:30:51 -0700
Subject: [PATCH 27/64] one more round of dissolving and exploding

---
 runners/cs_runner/preprocess_fema.R | 76 ++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 6b6c2f8..ba74369 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -342,9 +342,13 @@ DELETE_STAGING_GPKGS <- FALSE
 
 # FEMA_VPU_SUBFOLDERS
 
-for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+# for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+for (i in 1:4) {
+  vpu_dir = FEMA_VPU_SUBFOLDERS[i]
   message("Merging files in '", basename(vpu_dir), "' directory...")
   
+# }
+  
   # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06'
   vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
   
@@ -396,6 +400,76 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
   message()
 }
 
+# -------------------------------------------------------------------------------------
+# ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
+# -------------------------------------------------------------------------------------
+
+MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged")
+
+for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+  vpu_dir = FEMA_VPU_SUBFOLDERS[i]
+  
+  VPU        <- basename(vpu_dir)
+  
+  message("Attempting to union FEMA polygons for '", VPU, "'...")
+  merged_dir <- paste0(vpu_dir, "/merged")
+  fema_vpu_file <- list.files(merged_dir, full.names = TRUE)
+  
+  has_fema_vpu_file <- ifelse(length(fema_vpu_file) > 0, TRUE, FALSE)
+  # message()
+  # fema_vpu_file
+# }
+  if(!has_fema_vpu_file) { 
+    message("No FEMA geometries in '", VPU, "'")
+    message()
+    next
+  }
+
+  message("> Re-unioning and re-exploding geometries in '", basename(fema_vpu_file), "'")
+  
+  fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)]
+
+  fema_vpu <- sf::read_sf(fema_vpu_file)
+
+  # fema_ids <- c(695)
+  # fema_vpu <-
+  #   fema_vpu %>%
+  #   dplyr::group_by(source) %>%
+  #   dplyr::summarise()  %>%
+  #   dplyr::ungroup()
+  
+  # 2633 = old number of polygons
+  fema_vpu  <- rmapshaper::ms_dissolve(fema_vpu,
+                                       field = "source",
+                                       sys = TRUE,
+                                       sys_mem = 16
+                                       )
+  fema_vpu  <- rmapshaper::ms_explode(fema_vpu,     
+                                      sys = TRUE,
+                                      sys_mem = 16)
+
+  fema_vpu <-
+    fema_vpu %>%
+    dplyr::group_by(source) %>%
+    dplyr::mutate(
+      state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
+      vpu      = gsub("VPU_", "", VPU),
+      fema_id  = paste0(state, "_", 1:dplyr::n())
+      ) %>%
+    dplyr::ungroup() %>%
+    dplyr::relocate(vpu, fema_id, source, state, geom)
+  
+  if (OVERWRITE_FEMA_FILES) {
+    union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
+    message("> Writting '", basename(union_file_path), "' (unioned and exploded version)")
+    sf::write_sf(
+      fema_vpu,
+      union_file_path
+      )
+  }
+  message()
+}
+
 # -------------------------------------------------------------------------------------
 # ---- Generate bounding box gpkg for each FEMA FGB ----
 # -------------------------------------------------------------------------------------

From b34d3b3a28f17a659a635c85866c7edaa13afbc0 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 10 May 2024 11:02:10 -0700
Subject: [PATCH 28/64] added code for splitting up transects to check within
 FEMA relations and began checking for min extension distances, WIP, needs
 cleanups

---
 runners/cs_runner/add_fema_to_transects.R | 557 ++++++++++++++++++++--
 1 file changed, 510 insertions(+), 47 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 1a33558..3ca2eb7 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -1,24 +1,23 @@
 library(dplyr) 
+library(lwgeom)
 
-# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-source("runners/cs_runner/config.R")
+# generate the flowlines layer for the final cross_sections_<vpu>.gpkg for each vpu
+source("runners/cs_runner/config.r")
 
 # transect bucket prefix
-transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+transects_prefix <- paste0(s3_bucket, version_prefix, "/3d/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
 nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
-# FEMA_files      <- list.files(FEMA_FGB_PATH, full.names = FALSE)
-# FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE)
+# fema_files      <- list.files(fema_fgb_path, full.names = FALSE)
+# fema_bb_files   <- list.files(fema_fgb_bb_path, full.names = FALSE)
 transects_files <- list.files(transects_dir, full.names = FALSE)
 transects_files <- transects_files[!grepl("updated", transects_files)]
 
-FEMA_VPU_SUBFOLDERS
-# string to fill in "cs_source" column in output datasets
-FEMA_files
-net_source <- "hydrofabric3D"
 
-# ensure the files are in the same order and matched up by VPU
+net_source <- "hydrofabric3d"
+
+# ensure the files are in the same order and matched up by vpu
 path_df <- align_files_by_vpu(
   x    = nextgen_files,
   y    = transects_files,
@@ -27,11 +26,7 @@ path_df <- align_files_by_vpu(
 
 path_df
 
-us_states <- 
-  USAboundaries::us_states() %>% 
-  sf::st_transform(5070)
-
-# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
+# loop over each vpu and generate cross sections, then save locally and upload to s3 bucket
 # for(i in 1:nrow(path_df)) {
   
   i = 8
@@ -43,62 +38,530 @@ us_states <-
   transect_file <- path_df$y[i]
   transect_path <- paste0(transects_dir, transect_file)
   
-  VPU <- path_df$vpu[i]
+  vpu <- path_df$vpu[i]
   transect_path
   
   # # model attributes file and full path
   # model_attr_file <- path_df$y[i]
   # model_attr_path <- paste0(model_attr_dir, model_attr_file)
   
-  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
-  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  message("creating vpu ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
+  # message("creating vpu ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
   
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
 
-  
-  FEMA_vpu_dir <- FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", VPU), basename(FEMA_VPU_SUBFOLDERS))]
-  
-  vpu_fema_file <- list.files(FEMA_vpu_dir, full.names = TRUE)
-  
-  FEMA_vpu_dir
-  VPU
-  
+  fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+  fema_vpu_dir
+  vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
+  # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)]
+  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
+  vpu_fema_file
+  # fema1 <- sf::read_sf(vpu_fema_file1)
+  # fema2 <- sf::read_sf(vpu_fema_filev2)  
   fema <- sf::read_sf(vpu_fema_file)
-    
+  # fema %>%  mapview::npts() 
+  # fema2 %>%  mapview::npts() 
+  
+  fema <- 
+    fema %>%
+    rmapshaper::ms_simplify(keep = 0.10)
    
+  fema %>%  mapview::npts()
+  
   transects <- sf::read_sf(transect_path)
   
   transects_geos <- geos::as_geos_geometry(transects)
-  
-  fema_geos <- geos::as_geos_geometry(fema)     
+  fema_geos      <- geos::as_geos_geometry(fema)     
 
+  length(fema_geos)
   
-  fema_geos  
-  fema_transects_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) 
-  transects_fema_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) 
+  # fema_geos  
+  transects_fema_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) 
+  fema_transects_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) 
   
-  fema_transects_matrix
+  # transects_fema_matrix
   # get the polygons that have atleast 1 intersection with the 'lines'
-  fema_transects_matrix[[557]]
-  fema_tmp <- fema[fema_transects_matrix[[557]], ]
-  trans_tmp <- transects[557, ]
+  # transects_fema_matrix[[557]]
+  # fema_tmp <- fema[transects_fema_matrix[[557]], ]
+  # trans_tmp <- transects[557, ]
   
- fema_dissolve <-  rmapshaper::ms_dissolve(fema_tmp)
- fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% 
+ # fema_dissolve <-  rmapshaper::ms_dissolve(fema_tmp)
+ # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% 
    # rmapshaper::ms_dissolve(field = "state")
- sf::st_union()
-  mapview::mapview(trans_tmp, color = "green") + 
-    mapview::mapview(fema_tmp[1, ], col.region = "red") + 
-    mapview::mapview(fema_tmp[2, ],  col.region = "dodgerblue") + 
-    mapview::mapview(fema_simple,  col.region = "yellow")
-  transects
+
+  # mapview::mapview(trans_tmp, color = "green") + 
+  #   mapview::mapview(fema_tmp[1, ], col.region = "red") + 
+  #   mapview::mapview(fema_tmp[2, ],  col.region = "dodgerblue") + 
+  #   mapview::mapview(fema_simple,  col.region = "yellow")
+
+  fema_transects_matrix
   
+  trans_fema    <- transects[lengths(transects_fema_matrix) != 0, ]
+  # fema_polygons <- fema[lengths(fema_transects_matrix) != 0, ]
+  fema_polygons <- fema_geos[lengths(fema_transects_matrix) != 0]
   
-  fema_transects_matrix
   
-  transects_with_fema <- transects[lengths(fema_transects_matrix) != 0, ]
-  fema_with_transects <- fema[lengths(transects_fema_matrix) != 0, ]
+  fema_lines <- 
+    fema_polygons %>% 
+    sf::st_as_sf() %>% 
+    sf::st_cast("MULTILINESTRING") %>% 
+    geos::as_geos_geometry() %>% 
+    geos::geos_simplify_preserve_topology(25)
+  
+  # length(fema_geos)
+  # nrow(fema)
+  # fema_geos     <- geos::as_geos_geometry(fema_polygons)
+  
+  tmp_trans <- trans_fema[1:5000, ]
+  
+  # tmp_centroid <- sf::st_centroid(tmp_trans)
+  # sf::st_segmentize()
+  # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid)
+  # # tmp_trans
+  max_extension_distance <- 2500
+  
+  # unlist(tmp_trans$geom)
+  # unlist(sf::st_segmentize(tmp_trans, 4)$geom)
+  min_segmentation <- min(tmp_trans$cs_lengthm %/% 2)
+  
+  # which.min(tmp_trans$cs_lengthm %/% 2)
+  
+  segmented_trans <- sf::st_segmentize(tmp_trans, min_segmentation)
+  
+  # unlist(segmented_trans$geom)
+  unique(lengths(segmented_trans$geom))
+  length(lengths(segmented_trans$geom))
+  lengths(segmented_trans$geom)  
+  
+  # left_trans   <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50)
+  # right_trans  <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1)
+  
+  # mapview::mapview(left_trans, col.regions = "dodgerblue") +
+    # mapview::mapview(tmp_trans, color = "red") +
+    #   mapview::mapview(tmp_trans[42, ], color = "yellow") +
+    # mapview::mapview(right_trans, color = "dodgerblue") +
+    # mapview::mapview(left_trans, color = "green")
+  
+  
+  left_trans <- 
+    segmented_trans %>% 
+    lwgeom::st_linesubstring(0, 0.50) %>% 
+    dplyr::mutate(
+      partition         = "left",
+      partition_lengthm = as.numeric(sf::st_length(geom))
+    ) %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                  cs_lengthm, is_extended, partition, partition_lengthm, geom)
+  
+  # Find the distances from the right side of transect lines 
+  right_trans <- 
+    segmented_trans %>% 
+    lwgeom::st_linesubstring(0.50, 1) %>% 
+    dplyr::mutate(
+      partition         = "right",
+      partition_lengthm = as.numeric(sf::st_length(geom))
+    ) %>% 
+    hydrofabric3D::add_tmp_id() %>%
+    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                  cs_lengthm, is_extended, partition, partition_lengthm, geom)
+ 
+  # convert the transect geometries to geos types
+  # get the fema polygon indices for the transect halves that are completely within a fema polygon
+  # add the fema polygons index as a column to the transect dataframes
+  left_trans_geos   <- geos::as_geos_geometry(left_trans)
+  right_trans_geos  <- geos::as_geos_geometry(right_trans)
+  
+  left_within_matrix  <- geos::geos_within_matrix(left_trans_geos, fema_polygons)
+  right_within_matrix <- geos::geos_within_matrix(right_trans_geos, fema_polygons)
+  
+  left_within_vect    <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
+  right_within_vect   <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
+  
+  # add the fema polygon indexes as columns
+  left_trans$left_fema_index    <- left_within_vect
+  right_trans$right_fema_index  <- right_within_vect
+  
+  # add boolean columns whether the transect is fully within the FEMA polygons
+  left_trans <- 
+    left_trans %>% 
+    dplyr::mutate(
+      left_is_within_fema = dplyr::case_when(
+        !is.na(left_fema_index) ~ TRUE,
+        TRUE                    ~ FALSE
+      )
+    ) %>% 
+    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                  cs_lengthm, is_extended, partition, partition_lengthm,
+                  left_fema_index, left_is_within_fema, 
+                  geom
+    )
+   
+  right_trans <- 
+    right_trans %>% 
+    dplyr::mutate(
+      right_is_within_fema = dplyr::case_when(
+        !is.na(right_fema_index) ~ TRUE,
+        TRUE               ~ FALSE
+      )
+    ) %>% 
+    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                  cs_lengthm, is_extended, partition, partition_lengthm,
+                  right_fema_index, right_is_within_fema, 
+                  geom
+    ) 
+ # left_trans
+ #  
+ # sf::st_within(left_trans, fema_polygons)
+ # geos::as_geos_geometry(fema_polygons)
+ # geos::as_geos_geometry(left_trans)
+ # geos::geos_within(geos::as_geos_geometry(left_trans), geos::as_geos_geometry(fema_polygons))
+  
+  left_trans_geos     <- geos::as_geos_geometry(left_trans)
+  left_within_matrix  <- geos::geos_within_matrix(left_trans_geos, fema_polygons)
+
+  left_within_vect    <- lapply(left_within_matrix, function(i) {
+                          if(length(i) > 0) { c(i) } else { c(NA) } }
+                          )
+  left_trans
+  
+  
+  left_trans <- 
+    left_trans %>% 
+    dplyr::mutate(
+      left_is_within_fema = dplyr::case_when(
+        !is.na(left_fema_index) ~ TRUE,
+        TRUE               ~ FALSE
+      )
+    ) %>% 
+    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                  cs_lengthm, is_extended, partition, partition_lengthm,
+                  left_fema_index, left_is_within_fema, 
+                  geom
+                  )
+  left_trans
+  # left_trans$geom2 <- left_trans_geos 
+  
+  # left_trans <- 
+  #   left_trans %>% 
+  #   sf::st_drop_geometry() %>%
+  #   dplyr::mutate(
+  #     geom = geos::as_geos_geometry(left_trans_geos)
+  #     # geom2 = left_trans_geos
+  #   )
+  
+  left_trans_geos
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # Loop over every left and right halfs of transects and 
+  # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  left_ids          <- left_trans$tmp_id
+  
+  left_fema_indexes <- left_trans$left_fema_index
+  left_fema_bool    <- left_trans$left_is_within_fema
+  
+  # preallocate vector that stores the extension. distances
+  left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids)))
+  
+  # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool))
+  # 1:length(left_ids)
+  extension_count = 0
+  
+  for(i in 1:length(left_ids)) {
+    # i = 1
+    tmp_id                 <- left_ids[i]
+    is_within_fema_polygon <- left_fema_bool[i]
+    fema_index             <- left_fema_indexes[i]
+
+    message("Transect: '", tmp_id, "' - (", i, ")")
+    # if(is_within_fema_polygon) {
+    #   break
+    # }
+    # fema_index <- left_trans$left_fema_index[i]
+    # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE)
+    
+    if(is_within_fema_polygon) {
+      
+      message("- Left side of transect intersects with FEMA")
+      message("\t > FEMA index: ", fema_index)
+      extension_count = extension_count + 1
+      message("\t > extension_count: ", extension_count)
+      
+      trans_geom <- left_trans_geos[i]
+      index_vect <- sort(unlist(fema_index))
+      
+      # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(fema_lines[index_vect])
+      
+      # fema_lines[index_vect]
+      
+      dist_to_fema <- hydrofabric3D:::geos_bs_distance(
+                          distances    = 1:5000,
+                          line         = trans_geom,
+                          geoms_to_cut = fema_lines[index_vect],
+                          direction    = "both"
+                        )
+      
+      left_extension_dists[i] <- dist_to_fema
+      
+    }
+    message()
+  }
+  
+  left_trans_geos
+  vctrs::vec_c(
+    vctrs::vec_c(
+      left_trans$hy_id
+  ),
+  vctrs::vec_cast(left_trans$cs_id)
+  )
+  
+ #  
+ #  # mapview::mapview(left_trans[477, ]) + fema_polygons[1888, ]
+ # left_trans <- 
+ #    left_trans %>%
+ #    dplyr::mutate(
+ #      fema_index = unlist(sf::st_within(., fema_polygons))
+ #    ) %>% 
+ #    dplyr::relocate(fema_index)
+ #  
+  # left_trans_geos <- geos::as_geos_geometry(left_trans)
+  
+  sort(na.omit(unlist(unique(left_trans$fema_index))))
+  fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
+  
+  fema_polygons %>% na.omit()
+  
+  # NOTE: sorting the fema polygon indices (not sure if necessary)
+  left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))]
+  
+  left_fema
+  
+  left_fema %>% plot()
+  
+  geos::geos_make_linestring(geom = left_fema)
+  left_trans$fema_index
+  sort(na.omit(unlist(unique(left_trans$fema_index))))
+  
+  # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
+  left_fema_lines <- 
+    left_fema %>% 
+    sf::st_as_sf() %>% 
+    sf::st_cast("MULTILINESTRING") %>% 
+    geos::as_geos_geometry() %>% 
+    geos::geos_simplify_preserve_topology(25)
+  
+  left_fema %>% 
+    sf::st_as_sf() %>% 
+    mapview::npts()
+  
+  geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% 
+    geos::geos_num_coordinates() %>%
+    sum()
+  
+  geos::geos_num_coordinates(left_fema_lines) %>% sum()
+  
+  geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% 
+    .[3] %>% 
+    plot()
+  left_fema_lines[3] %>% plot()
+  
+  
+  left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
+    
+    hydrofabric3D:::geos_bs_distance(
+      distances    = 1:2000,
+      line         = left_trans_geos[i],
+      geoms_to_cut = left_fema_lines,
+      direction    = "head"
+    )
+  }) %>% 
+    unlist()
+  
+  left_trans$left_head_extension_dist <- left_extension_dists
+  
+  # Find the distances from the right side of transect lines 
+  right_trans <- 
+    segmented_trans %>% 
+    lwgeom::st_linesubstring(0.50, 1) %>% 
+    dplyr::mutate(
+      partition         = "right",
+      partition_lengthm = as.numeric(sf::st_length(geom))
+    ) %>% 
+    dplyr::select(hy_id, cs_source, cs_id, cs_measure,  
+                  cs_lengthm, is_extended, partition, partition_lengthm, geom)
+  right_trans_geos <- geos::as_geos_geometry(right_trans)
+  right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons))
+ 
+  right_within_vect <- lapply(right_within_matrix, function(i) {
+                      if(length(i) > 0) { c(i) } else { c(NA_real_) } }
+                      )
+ right_trans$fema_index <- right_within_vect
+# 
+#  right_trans <- 
+#     right_trans %>%
+#     dplyr::mutate(
+#       fema_index = unlist(sf::st_within(., fema_polygons))
+#     ) %>% 
+#     dplyr::relocate(fema_index)
+#   
+  
+  right_fema <- fema_polygons[unique(right_trans$fema_index),] 
+  right_fema_lines <- 
+    right_fema %>% 
+    sf::st_cast("LINESTRING")
+   
+  right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) {
+    
+    hydrofabric3D:::geos_bs_distance(
+      distances    = 1:2000,
+      line         = right_trans_geos[i],
+      geoms_to_cut = right_fema_lines,
+      direction    = "tail"
+    )
+  }) %>% 
+    unlist()
+  
+  right_trans$right_tail_extension_dists <- right_extension_dists
+  right_trans$right_tail_extension_dists
+  unlist(sf::st_within(left_trans, fema_polygons))
+  
+  left_trans <- 
+    left_trans %>%
+    dplyr::mutate(
+      fema_index = unlist(sf::st_within(., fema_polygons))
+    ) %>% 
+  dplyr::relocate(fema_index)
+  
+  left_fema <- fema_polygons[unique(left_trans$fema_index),]
+  left_fema_lines <- 
+    left_fema %>% 
+    sf::st_cast("LINESTRING")
+  
+  geos::as_geos_geometry(left_fema)
+  mapview::mapview(left_fema, col.regions = "dodgerblue") +
+    mapview::mapview(left_ls, color = "yellow") +
+    mapview::mapview(left_trans, color = "red") +
+    mapview::mapview(right_trans, color = "green")
+  
+  left_trans_geos <- geos::as_geos_geometry(left_trans)
+  left_trans_geos %>% plot()
+  left_trans_geos[2]
+  
+  length(left_trans_geos)
+  
+  left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
+    
+   hydrofabric3D:::geos_bs_distance(
+      distances    = 1:2000,
+      line         = left_trans_geos[i],
+      geoms_to_cut = left_fema_lines,
+      direction    = "head"
+    )
+  }) %>% 
+    unlist()
+  
+  left_extensions  <- geos::geos_empty()
+  
+  for (i in 1:length(left_extension_dists)) {
+    dist = left_extension_dists[i]
+    geos_line <- left_trans_geos[i]
+    message(glue::glue("i: {i}\ndist: {dist}"))
+    
+    extended <- hydrofabric3D::geos_extend_line(
+      geos_line,
+      dist,
+      "head"
+    )
+    left_extensions <- vctrs::vec_c(left_extensions, extended)
+    
+  }
+  # index for only valid transects
+  # is_valid <- !geos::geos_is_empty(left_extensions)
+  left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)]
+  # !geos::geos_is_empty(left_extensions)
+  
+  new_left_trans <- 
+    left_trans %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::mutate(
+      geom = sf::st_as_sfc(left_extensions)
+    ) %>% 
+    sf::st_as_sf()
+  # geos::sf
+  mapview::mapview(left_fema, col.regions = "dodgerblue") +
+    mapview::mapview(left_ls, color = "yellow") +
+    mapview::mapview(left_trans, color = "red") +
+    mapview::mapview(new_left_trans, color = "green")
+  mapply(function(geom, dist) {
+      hydrofabric3D::geos_extend_line(geom, dist, "head") 
+    },
+  left_trans_geos,
+  left_extension_dists
+  )
+  
+  left_extensions <- lapply(seq_along(left_trans_geos), function(i) {
+      
+    extend_dist <- hydrofabric3D:::geos_bs_distance(
+        distances    = 1:2000,
+        line         = left_trans_geos[i],
+        geoms_to_cut = left_fema_lines,
+        direction    = "head"
+      )
+      
+    hydrofabric3D::geos_extend_line(left_trans_geos[i], extend_dist, "head") 
+    
+    })
+  # unlist(left_extensions)
+  
+  left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
+    hydrofabric3D:::geos_bs_distance(
+      distances    = 1:2000,
+      line         = left_trans_geos[i],
+      geoms_to_cut = left_fema_lines,
+      direction    = "head"
+    )
+  }) 
+  
+  distance_to_extend <- 
+    hydrofabric3D:::geos_bs_distance(
+      distances    = 1:1500,
+      line         = left_trans_geos[1],
+      geoms_to_cut = left_fema_lines,
+      direction    = "head"
+    )
+  
+  extended <- hydrofabric3D::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% 
+    sf::st_as_sf()
+  mapview::mapview(left_fema, col.regions = "dodgerblue") +
+    mapview::mapview(left_ls, color = "yellow") +
+    mapview::mapview(left_trans, color = "red") +
+    mapview::mapview(extended, color = "green")
+    # mapview::mapview(right_trans, color = "green")
+  left_trans$geom %>% sf::st_length()
+  plot(segmented_trans$geom, col = "red", lwd =5)
+  plot(left_trans$geom, col = "green", lwd=3, add = TRUE)
+  plot(right_trans$geom, col = "blue", lwd=3, add = TRUE)
+  unlist(left_trans$geom)
+   unlist(right_trans$geom)
+  
+  unlist(tmp_trans$geom )
+  unlist(split_trans$geom )
+  split_trans %>% 
+    sf::st_collection_extract("LINESTRING")
+  split_trans$geom
+  
+  tmp_trans$geom
+  nngeo::st_segments(tmp_trans) %>% 
+    .$result %>% 
+    plot()
+  
+  mapview::mapview(tmp_trans) + tmp_centroid
+  geos::geos_clip_by_rect()
+  
+  transects_with_
   
   lengths(transects_fema_matrix)
   mapview::mapview(transects_with_fema, color = "green") + fema_with_transects

From 75e93bd03259449f3147a15f6350a029273b2740 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 13 May 2024 15:50:00 -0700
Subject: [PATCH 29/64] continued work on generating extended transect lines
 from fema files

---
 runners/cs_runner/add_fema_to_transects.R | 702 ++++++++++++++--------
 1 file changed, 445 insertions(+), 257 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 3ca2eb7..d328e89 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -60,255 +60,443 @@ path_df
   # fema1 <- sf::read_sf(vpu_fema_file1)
   # fema2 <- sf::read_sf(vpu_fema_filev2)  
   fema <- sf::read_sf(vpu_fema_file)
-  # fema %>%  mapview::npts() 
-  # fema2 %>%  mapview::npts() 
-  
-  fema <- 
-    fema %>%
-    rmapshaper::ms_simplify(keep = 0.10)
-   
-  fema %>%  mapview::npts()
-  
   transects <- sf::read_sf(transect_path)
   
-  transects_geos <- geos::as_geos_geometry(transects)
-  fema_geos      <- geos::as_geos_geometry(fema)     
-
-  length(fema_geos)
-  
-  # fema_geos  
-  transects_fema_matrix <- geos::geos_intersects_matrix(transects_geos, fema_geos) 
-  fema_transects_matrix <- geos::geos_intersects_matrix(fema_geos, transects_geos) 
-  
-  # transects_fema_matrix
-  # get the polygons that have atleast 1 intersection with the 'lines'
-  # transects_fema_matrix[[557]]
-  # fema_tmp <- fema[transects_fema_matrix[[557]], ]
-  # trans_tmp <- transects[557, ]
-  
- # fema_dissolve <-  rmapshaper::ms_dissolve(fema_tmp)
- # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% 
-   # rmapshaper::ms_dissolve(field = "state")
-
-  # mapview::mapview(trans_tmp, color = "green") + 
-  #   mapview::mapview(fema_tmp[1, ], col.region = "red") + 
-  #   mapview::mapview(fema_tmp[2, ],  col.region = "dodgerblue") + 
-  #   mapview::mapview(fema_simple,  col.region = "yellow")
-
-  fema_transects_matrix
-  
-  trans_fema    <- transects[lengths(transects_fema_matrix) != 0, ]
-  # fema_polygons <- fema[lengths(fema_transects_matrix) != 0, ]
-  fema_polygons <- fema_geos[lengths(fema_transects_matrix) != 0]
-  
-  
-  fema_lines <- 
-    fema_polygons %>% 
-    sf::st_as_sf() %>% 
-    sf::st_cast("MULTILINESTRING") %>% 
-    geos::as_geos_geometry() %>% 
-    geos::geos_simplify_preserve_topology(25)
-  
-  # length(fema_geos)
-  # nrow(fema)
-  # fema_geos     <- geos::as_geos_geometry(fema_polygons)
-  
-  tmp_trans <- trans_fema[1:5000, ]
-  
-  # tmp_centroid <- sf::st_centroid(tmp_trans)
-  # sf::st_segmentize()
-  # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid)
-  # # tmp_trans
-  max_extension_distance <- 2500
-  
-  # unlist(tmp_trans$geom)
-  # unlist(sf::st_segmentize(tmp_trans, 4)$geom)
-  min_segmentation <- min(tmp_trans$cs_lengthm %/% 2)
-  
-  # which.min(tmp_trans$cs_lengthm %/% 2)
-  
-  segmented_trans <- sf::st_segmentize(tmp_trans, min_segmentation)
-  
-  # unlist(segmented_trans$geom)
-  unique(lengths(segmented_trans$geom))
-  length(lengths(segmented_trans$geom))
-  lengths(segmented_trans$geom)  
-  
-  # left_trans   <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50)
-  # right_trans  <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1)
-  
-  # mapview::mapview(left_trans, col.regions = "dodgerblue") +
-    # mapview::mapview(tmp_trans, color = "red") +
-    #   mapview::mapview(tmp_trans[42, ], color = "yellow") +
-    # mapview::mapview(right_trans, color = "dodgerblue") +
-    # mapview::mapview(left_trans, color = "green")
-  
-  
-  left_trans <- 
-    segmented_trans %>% 
-    lwgeom::st_linesubstring(0, 0.50) %>% 
-    dplyr::mutate(
-      partition         = "left",
-      partition_lengthm = as.numeric(sf::st_length(geom))
-    ) %>% 
-    hydrofabric3D::add_tmp_id() %>% 
-    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                  cs_lengthm, is_extended, partition, partition_lengthm, geom)
-  
-  # Find the distances from the right side of transect lines 
-  right_trans <- 
-    segmented_trans %>% 
-    lwgeom::st_linesubstring(0.50, 1) %>% 
-    dplyr::mutate(
-      partition         = "right",
-      partition_lengthm = as.numeric(sf::st_length(geom))
-    ) %>% 
-    hydrofabric3D::add_tmp_id() %>%
-    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                  cs_lengthm, is_extended, partition, partition_lengthm, geom)
- 
-  # convert the transect geometries to geos types
-  # get the fema polygon indices for the transect halves that are completely within a fema polygon
-  # add the fema polygons index as a column to the transect dataframes
-  left_trans_geos   <- geos::as_geos_geometry(left_trans)
-  right_trans_geos  <- geos::as_geos_geometry(right_trans)
+  # fema %>%  mapview::npts() 
+  # fema2 %>%  mapview::npts() 
   
-  left_within_matrix  <- geos::geos_within_matrix(left_trans_geos, fema_polygons)
-  right_within_matrix <- geos::geos_within_matrix(right_trans_geos, fema_polygons)
+  # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons")
+  # internal function for extending transect lines out to FEMA 100 year flood plain polygons
+  # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons)
+  # polygons, set of sf polygons that transect lines should be exteneded 
+  # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons"
+  get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, max_extension_distance) {
+    
+    # transect_lines <- transects
+    # polygons <- fema
+    # max_extension_distance <- 2500
+    
+    # keep 10% of the original points for speed
+    polygons <- rmapshaper::ms_simplify(polygons, keep = 0.10)
+     
+    # fema %>%  mapview::npts()
+    
+    # transects <- sf::read_sf(transect_path)
+    
+    transects_geos  <- geos::as_geos_geometry(transects)
+    polygons_geos   <- geos::as_geos_geometry(polygons)     
   
-  left_within_vect    <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
-  right_within_vect   <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
+    # length(polygons_geos)
+    
+    # polygons_geos  
+    transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) 
+    polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) 
+    
+    # transects_polygons_matrix
+    # get the polygons that have atleast 1 intersection with the 'lines'
+    # transects_polygons_matrix[[557]]
+    # fema_tmp <- fema[transects_polygons_matrix[[557]], ]
+    # trans_tmp <- transects[557, ]
+    
+   # fema_dissolve <-  rmapshaper::ms_dissolve(fema_tmp)
+   # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% 
+     # rmapshaper::ms_dissolve(field = "state")
   
-  # add the fema polygon indexes as columns
-  left_trans$left_fema_index    <- left_within_vect
-  right_trans$right_fema_index  <- right_within_vect
+    # mapview::mapview(trans_tmp, color = "green") + 
+    #   mapview::mapview(fema_tmp[1, ], col.region = "red") + 
+    #   mapview::mapview(fema_tmp[2, ],  col.region = "dodgerblue") + 
+    #   mapview::mapview(fema_simple,  col.region = "yellow")
   
-  # add boolean columns whether the transect is fully within the FEMA polygons
-  left_trans <- 
-    left_trans %>% 
-    dplyr::mutate(
-      left_is_within_fema = dplyr::case_when(
-        !is.na(left_fema_index) ~ TRUE,
-        TRUE                    ~ FALSE
-      )
-    ) %>% 
-    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                  cs_lengthm, is_extended, partition, partition_lengthm,
-                  left_fema_index, left_is_within_fema, 
-                  geom
-    )
+    # polygons_transects_matrix
+    
+    # subset the transects and polygons to only those with intersections
+    intersect_transects  <- transects[lengths(transects_polygons_matrix) != 0, ]
+    intersect_polygons   <- polygons_geos[lengths(polygons_transects_matrix) != 0]
+    
+    # Convert our intersecting polygons to LINESTRINGS b/c we DON'T NEED polygons to calculate extension distances from our transect lines
+    # This can be done with just linestrings (not sure if this is actually more performent but I'm pretty sure it is....)
+    intersect_lines <- 
+      intersect_polygons %>% 
+      sf::st_as_sf() %>% 
+      sf::st_cast("MULTILINESTRING") %>% 
+      geos::as_geos_geometry() %>% 
+      geos::geos_simplify_preserve_topology(25)
+    
+    # length(polygons_geos)
+    # nrow(fema)
+    # polygons_geos     <- geos::as_geos_geometry(intersect_polygons)
+    
+    # tmp_trans <- intersect_transects[1:5000, ]
+    
+    # tmp_centroid <- sf::st_centroid(tmp_trans)
+    # sf::st_segmentize()
+    # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid)
+    # # tmp_trans
+    # max_extension_distance <- 2500
+    
+    # unlist(tmp_trans$geom)
+    # unlist(sf::st_segmentize(tmp_trans, 4)$geom)
+    
+    # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?)
+    # TODO: Double check this logic.
+    min_segmentation <- min(intersect_transects$cs_lengthm %/% 2)
+    
+    # which.min(intersect_transects$cs_lengthm %/% 2)
+    
+    # make each transect line have way more segments so we can take a left and right half of each transect line
+    segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation)
+    
+    # unlist(segmented_trans$geom)
+    unique(lengths(segmented_trans$geom))
+    length(lengths(segmented_trans$geom))
+    lengths(segmented_trans$geom)  
+    
+    # left_trans   <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50)
+    # right_trans  <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1)
+    
+    # mapview::mapview(left_trans, col.regions = "dodgerblue") +
+      # mapview::mapview(intersect_transects, color = "red") +
+      #   mapview::mapview(intersect_transects[42, ], color = "yellow") +
+      # mapview::mapview(right_trans, color = "dodgerblue") +
+      # mapview::mapview(left_trans, color = "green")
+    
+    # Seperate the transect lines into LEFT and RIGHT halves
+    # We do this so we can check if a side of a transect is ENTIRELY WITHIN a polygon. 
+    # If the half is entirely within a polygon, 
+    left_trans <- 
+      segmented_trans %>% 
+      lwgeom::st_linesubstring(0, 0.50) %>% 
+      dplyr::mutate(
+        partition         = "left",
+        partition_lengthm = as.numeric(sf::st_length(geom))
+      ) %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                    cs_lengthm, is_extended, partition, partition_lengthm, geom)
+    
+    # Find the distances from the right side of transect lines 
+    right_trans <- 
+      segmented_trans %>% 
+      lwgeom::st_linesubstring(0.50, 1) %>% 
+      dplyr::mutate(
+        partition         = "right",
+        partition_lengthm = as.numeric(sf::st_length(geom))
+      ) %>% 
+      hydrofabric3D::add_tmp_id() %>%
+      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                    cs_lengthm, is_extended, partition, partition_lengthm, geom)
    
-  right_trans <- 
-    right_trans %>% 
-    dplyr::mutate(
-      right_is_within_fema = dplyr::case_when(
-        !is.na(right_fema_index) ~ TRUE,
-        TRUE               ~ FALSE
+    # convert the transect geometries to geos types
+    # get the fema polygon indices for the transect halves that are completely within a fema polygon
+    # add the fema polygons index as a column to the transect dataframes
+    left_trans_geos     <- geos::as_geos_geometry(left_trans)
+    right_trans_geos    <- geos::as_geos_geometry(right_trans)
+    
+    left_within_matrix  <- geos::geos_within_matrix(left_trans_geos, intersect_polygons)
+    right_within_matrix <- geos::geos_within_matrix(right_trans_geos, intersect_polygons)
+    
+    left_within_vect    <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
+    right_within_vect   <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
+    
+    # add the fema polygon indexes as columns
+    left_trans$left_fema_index    <- left_within_vect
+    right_trans$right_fema_index  <- right_within_vect
+    
+    # add boolean columns whether the transect is fully within the FEMA polygons
+    left_trans <- 
+      left_trans %>% 
+      dplyr::mutate(
+        left_is_within_fema = dplyr::case_when(
+          !is.na(left_fema_index) ~ TRUE,
+          TRUE                    ~ FALSE
+        )
+      ) %>% 
+      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                    cs_lengthm, is_extended, partition, partition_lengthm,
+                    left_fema_index, left_is_within_fema, 
+                    geom
       )
-    ) %>% 
-    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                  cs_lengthm, is_extended, partition, partition_lengthm,
-                  right_fema_index, right_is_within_fema, 
-                  geom
-    ) 
- # left_trans
- #  
- # sf::st_within(left_trans, fema_polygons)
- # geos::as_geos_geometry(fema_polygons)
- # geos::as_geos_geometry(left_trans)
- # geos::geos_within(geos::as_geos_geometry(left_trans), geos::as_geos_geometry(fema_polygons))
-  
-  left_trans_geos     <- geos::as_geos_geometry(left_trans)
-  left_within_matrix  <- geos::geos_within_matrix(left_trans_geos, fema_polygons)
-
-  left_within_vect    <- lapply(left_within_matrix, function(i) {
-                          if(length(i) > 0) { c(i) } else { c(NA) } }
+     
+    right_trans <- 
+      right_trans %>% 
+      dplyr::mutate(
+        right_is_within_fema = dplyr::case_when(
+          !is.na(right_fema_index) ~ TRUE,
+          TRUE               ~ FALSE
+        )
+      ) %>% 
+      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
+                    cs_lengthm, is_extended, partition, partition_lengthm,
+                    right_fema_index, right_is_within_fema, 
+                    geom
+      ) 
+    
+    left_distances <- calc_extension_distances(
+      geos_geoms             = left_trans_geos,
+      ids                    = left_trans$tmp_id,
+      lines_to_cut           = intersect_lines,
+      lines_to_cut_indices   = left_trans$left_fema_index,
+      direction              = "head",
+      max_extension_distance = max_extension_distance
+    )
+    
+    right_distances <- calc_extension_distances(
+      geos_geoms             = right_trans_geos,
+      ids                    = right_trans$tmp_id,
+      lines_to_cut           = intersect_lines,
+      lines_to_cut_indices   = right_trans$right_fema_index,
+      direction              = "tail",
+      max_extension_distance = max_extension_distance
+    )  
+    
+    left_trans$left_distance   <- left_distances
+    right_trans$right_distance <- right_distances
+    
+    extensions_by_id <- dplyr::left_join(
+                          sf::st_drop_geometry(
+                            dplyr::select(left_trans, 
+                                          hy_id, cs_id, left_distance)
+                            ),
+                          sf::st_drop_geometry(
+                            dplyr::select(right_trans, 
+                                        hy_id, cs_id, 
+                                        right_distance)
+                            ),
+                          by = c("hy_id", "cs_id")
                           )
-  left_trans
-  
-  
-  left_trans <- 
-    left_trans %>% 
-    dplyr::mutate(
-      left_is_within_fema = dplyr::case_when(
-        !is.na(left_fema_index) ~ TRUE,
-        TRUE               ~ FALSE
+    transects_with_distances <- 
+      transects %>% 
+      dplyr::left_join(
+                extensions_by_id,
+                by = c("hy_id", "cs_id")
+            ) %>% 
+      hydrofabric3D::add_tmp_id()
+    
+    fline_id_array <- flines$id
+    
+    # Convert the net object into a geos_geometry
+    flines_geos <- geos::as_geos_geometry(flines)
+    
+    transect_hy_id_array <- transects_with_distances$hy_id
+    transect_cs_id_array <- transects_with_distances$cs_id
+    
+    transect_geoms  <- geos::as_geos_geometry(transects_with_distances$geom)
+   
+    left_distances  <- transects_with_distances$left_distance
+    right_distances <- transects_with_distances$right_distance
+    
+    # preallocate vector that stores the extension. distances
+    new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_ids)))
+    # new_transects <- geos::geos_empty()
+    # measures  <- vctrs::vec_c()
+    
+    for (i in seq_along(transect_ids)) {
+      message("i: ", i)
+      i = 1
+      
+      current_trans <- transect_geoms[i]
+      
+      current_hy_id <- transect_hy_id_array[i]
+      current_cs_id <- transect_cs_id_array[i]
+      
+      current_fline <- flines_geos[fline_id_array == current_hy_id]
+    
+      # current_fline <- 
+      
+       
+      left_distance_to_extend <- left_distances[i]
+      left_distance_to_extend
+      right_distance_to_extend <- right_distances[i]
+      right_distance_to_extend
+      
+      no_extension_required <- (left_distance_to_extend == 0 && right_distance_to_extend == 0)
+      message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")")
+      
+      if(no_extension_required) {
+        message("Skipping -left/right extension are both 0")
+        next
+      }
+      
+      trans
+      left_trans
+      curr_tmp_id
+      
+      curr_fema_index <- 
+        left_trans %>% 
+        dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% 
+        .$left_fema_index %>% .[[1]]
+      
+      left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, left_distance_to_extend, "head")
+      right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, right_distance_to_extend, "tail")
+      
+      # neighbor_transects <- 
+      left_intersects_fline <- geos::geos_intersection(
+        left_extended_trans,
+        current_fline
       )
-    ) %>% 
-    dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                  cs_lengthm, is_extended, partition, partition_lengthm,
-                  left_fema_index, left_is_within_fema, 
-                  geom
-                  )
-  left_trans
-  # left_trans$geom2 <- left_trans_geos 
-  
-  # left_trans <- 
-  #   left_trans %>% 
-  #   sf::st_drop_geometry() %>%
-  #   dplyr::mutate(
-  #     geom = geos::as_geos_geometry(left_trans_geos)
-  #     # geom2 = left_trans_geos
-  #   )
-  
-  left_trans_geos
-  
-  # ----------------------------------------------------------------------------------------------------------------
-  # Loop over every left and right halfs of transects and 
-  # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary
-  # ----------------------------------------------------------------------------------------------------------------
-  
-  left_ids          <- left_trans$tmp_id
-  
-  left_fema_indexes <- left_trans$left_fema_index
-  left_fema_bool    <- left_trans$left_is_within_fema
-  
-  # preallocate vector that stores the extension. distances
-  left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids)))
-  
-  # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool))
-  # 1:length(left_ids)
-  extension_count = 0
-  
-  for(i in 1:length(left_ids)) {
-    # i = 1
-    tmp_id                 <- left_ids[i]
-    is_within_fema_polygon <- left_fema_bool[i]
-    fema_index             <- left_fema_indexes[i]
-
-    message("Transect: '", tmp_id, "' - (", i, ")")
-    # if(is_within_fema_polygon) {
-    #   break
-    # }
-    # fema_index <- left_trans$left_fema_index[i]
-    # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE)
-    
-    if(is_within_fema_polygon) {
       
-      message("- Left side of transect intersects with FEMA")
-      message("\t > FEMA index: ", fema_index)
-      extension_count = extension_count + 1
-      message("\t > extension_count: ", extension_count)
+      left_intersects_fline_once = geos::geos_type(fline_intersects) == "point"
+      left_intersects_fline_once
+      
+      
+      fline_intersects
+      geos::geos_type(fline_intersects) == "point"
+      
+      geos::geos_intersects(current_trans, )
+      
+      # if(
+      #   geos::geos_type(fline_intersects)
+      #   
+      #   
+      # )
+      mapview::mapview(sf::st_as_sf(trans), color = "green") +
+       mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red")     +
+        mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red")     +
+        mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")   
+      
+      
+      
+        
+      
+    }
+    
+    
+    left_trans$left_extension_distance
+    
+    length(right_distances)
+    
+    
+    # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within 
+    calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) {
+      #####   #####   #####   #####   #####
+      # geos_geoms   <- left_trans_geos
+      # ids          <- left_trans$tmp_id
+      # lines_to_cut <- intersect_lines
+      # lines_to_cut_indices <- left_trans$left_fema_index
+      # direction = "head"
+      # max_extension_distance = 2500
+      # geos_geoms             = left_trans_geos
+      # ids                    = left_trans$tmp_id
+      # lines_to_cut           = intersect_lines
+      # lines_to_cut_indices   = left_trans$left_fema_index
+      # direction              = "head"
+      # max_extension_distance = max_extension_distance
       
-      trans_geom <- left_trans_geos[i]
-      index_vect <- sort(unlist(fema_index))
       
-      # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(fema_lines[index_vect])
       
-      # fema_lines[index_vect]
+      #####   #####   #####   #####   #####
       
-      dist_to_fema <- hydrofabric3D:::geos_bs_distance(
-                          distances    = 1:5000,
-                          line         = trans_geom,
-                          geoms_to_cut = fema_lines[index_vect],
-                          direction    = "both"
-                        )
+      if (!direction %in% c("head", "tail")) {
+          stop("Invalid 'direction' value, must be one of 'head' or 'tail'")
+      }
       
-      left_extension_dists[i] <- dist_to_fema
+      # preallocate vector that stores the extension. distances
+      extension_dists <- vctrs::vec_c(rep(0, length(ids)))
+
+      # extension_dists <- vector(mode = "numeric", length = nrow(trans_data))
+      for (i in seq_along(ids)) {
+        # i = 118
+        curr_id           <- ids[i]
+        is_within_polygon <- any(!is.na(lines_to_cut_indices[[i]]))
+        polygon_index     <- lines_to_cut_indices[[i]]
+        # any(is_within_polygon)
+        message("Transect: '", curr_id, "' - (", i, ")")
+        
+        if (is_within_polygon) {
+          message("- Side of transect intersects with FEMA")
+          message("\t > FEMA index: ", polygon_index)
+          
+          curr_geom  <- geos_geoms[[i]]
+          index_vect <- sort(unlist(polygon_index))
+          
+           distance_to_extend <- hydrofabric3D:::geos_bs_distance(
+            distances    = 1:max_extension_distance,
+            line         = curr_geom,
+            geoms_to_cut = lines_to_cut[index_vect],
+            direction    = direction
+          )
+          
+          extension_dists[i] <- distance_to_extend
+        }
+        
+      }
+           
+      return(extension_dists)
+    }
+    
+    
+    # ----------------------------------------------------------------------------------------------------------------
+    # Loop over every left and right halfs of transects and 
+    # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary
+    # ----------------------------------------------------------------------------------------------------------------
+    
+    left_ids          <- left_trans$tmp_id
+    
+    left_fema_indexes <- left_trans$left_fema_index
+    left_fema_bool    <- left_trans$left_is_within_fema
+    
+    # preallocate vector that stores the extension. distances
+    left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids)))
+    
+    # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool))
+    # 1:length(left_ids)
+    extension_count = 0
+    
+    for(i in 1:length(left_ids)) {
+      # i = 1
+      tmp_id                 <- left_ids[i]
+      is_within_fema_polygon <- left_fema_bool[i]
+      fema_index             <- left_fema_indexes[i]
+  
+      message("Transect: '", tmp_id, "' - (", i, ")")
+      # if(is_within_fema_polygon) {
+      #   break
+      # }
+      # fema_index <- left_trans$left_fema_index[i]
+      # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE)
       
+      if(is_within_fema_polygon) {
+        
+        message("- Left side of transect intersects with FEMA")
+        message("\t > FEMA index: ", fema_index)
+        extension_count = extension_count + 1
+        message("\t > extension_count: ", extension_count)
+        
+        trans_geom <- left_trans_geos[i]
+        index_vect <- sort(unlist(fema_index))
+        
+        # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect])
+        
+        # intersect_lines[index_vect]
+        
+        dist_to_fema <- hydrofabric3D:::geos_bs_distance(
+                            distances    = 1:max_extension_distance,
+                            line         = trans_geom,
+                            geoms_to_cut = intersect_lines[index_vect],
+                            direction    = "head"
+                          )
+        
+        left_extension_dists[i] <- dist_to_fema
+        
+      }
+      message()
     }
-    message()
+    
+    left_trans$left_extension_dist <- left_extension_dists
   }
+  tmp <- left_trans[1:5, ]
+  tmp
+  tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist)
+  
+  tmp_extended
+  
+  fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index))
+  
+  mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") +  
+    mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green")  +
+    sf::st_as_sf(intersect_polygons[fema_idx])
+  
+  length(left_extension_dists)
   
   left_trans_geos
   vctrs::vec_c(
@@ -319,11 +507,11 @@ path_df
   )
   
  #  
- #  # mapview::mapview(left_trans[477, ]) + fema_polygons[1888, ]
+ #  # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ]
  # left_trans <- 
  #    left_trans %>%
  #    dplyr::mutate(
- #      fema_index = unlist(sf::st_within(., fema_polygons))
+ #      fema_index = unlist(sf::st_within(., intersect_polygons))
  #    ) %>% 
  #    dplyr::relocate(fema_index)
  #  
@@ -334,7 +522,7 @@ path_df
   
   fema_polygons %>% na.omit()
   
-  # NOTE: sorting the fema polygon indices (not sure if necessary)
+  # note: sorting the fema polygon indices (not sure if necessary)
   left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))]
   
   left_fema
@@ -349,7 +537,7 @@ path_df
   left_fema_lines <- 
     left_fema %>% 
     sf::st_as_sf() %>% 
-    sf::st_cast("MULTILINESTRING") %>% 
+    sf::st_cast("multilinestring") %>% 
     geos::as_geos_geometry() %>% 
     geos::geos_simplify_preserve_topology(25)
   
@@ -371,7 +559,7 @@ path_df
   
   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
     
-    hydrofabric3D:::geos_bs_distance(
+    hydrofabric3d:::geos_bs_distance(
       distances    = 1:2000,
       line         = left_trans_geos[i],
       geoms_to_cut = left_fema_lines,
@@ -382,7 +570,7 @@ path_df
   
   left_trans$left_head_extension_dist <- left_extension_dists
   
-  # Find the distances from the right side of transect lines 
+  # find the distances from the right side of transect lines 
   right_trans <- 
     segmented_trans %>% 
     lwgeom::st_linesubstring(0.50, 1) %>% 
@@ -396,7 +584,7 @@ path_df
   right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons))
  
   right_within_vect <- lapply(right_within_matrix, function(i) {
-                      if(length(i) > 0) { c(i) } else { c(NA_real_) } }
+                      if(length(i) > 0) { c(i) } else { c(na_real_) } }
                       )
  right_trans$fema_index <- right_within_vect
 # 
@@ -411,11 +599,11 @@ path_df
   right_fema <- fema_polygons[unique(right_trans$fema_index),] 
   right_fema_lines <- 
     right_fema %>% 
-    sf::st_cast("LINESTRING")
+    sf::st_cast("linestring")
    
   right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) {
     
-    hydrofabric3D:::geos_bs_distance(
+    hydrofabric3d:::geos_bs_distance(
       distances    = 1:2000,
       line         = right_trans_geos[i],
       geoms_to_cut = right_fema_lines,
@@ -438,7 +626,7 @@ path_df
   left_fema <- fema_polygons[unique(left_trans$fema_index),]
   left_fema_lines <- 
     left_fema %>% 
-    sf::st_cast("LINESTRING")
+    sf::st_cast("linestring")
   
   geos::as_geos_geometry(left_fema)
   mapview::mapview(left_fema, col.regions = "dodgerblue") +
@@ -454,7 +642,7 @@ path_df
   
   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
     
-   hydrofabric3D:::geos_bs_distance(
+   hydrofabric3d:::geos_bs_distance(
       distances    = 1:2000,
       line         = left_trans_geos[i],
       geoms_to_cut = left_fema_lines,
@@ -470,7 +658,7 @@ path_df
     geos_line <- left_trans_geos[i]
     message(glue::glue("i: {i}\ndist: {dist}"))
     
-    extended <- hydrofabric3D::geos_extend_line(
+    extended <- hydrofabric3d::geos_extend_line(
       geos_line,
       dist,
       "head"
@@ -496,7 +684,7 @@ path_df
     mapview::mapview(left_trans, color = "red") +
     mapview::mapview(new_left_trans, color = "green")
   mapply(function(geom, dist) {
-      hydrofabric3D::geos_extend_line(geom, dist, "head") 
+      hydrofabric3d::geos_extend_line(geom, dist, "head") 
     },
   left_trans_geos,
   left_extension_dists
@@ -504,20 +692,20 @@ path_df
   
   left_extensions <- lapply(seq_along(left_trans_geos), function(i) {
       
-    extend_dist <- hydrofabric3D:::geos_bs_distance(
+    extend_dist <- hydrofabric3d:::geos_bs_distance(
         distances    = 1:2000,
         line         = left_trans_geos[i],
         geoms_to_cut = left_fema_lines,
         direction    = "head"
       )
       
-    hydrofabric3D::geos_extend_line(left_trans_geos[i], extend_dist, "head") 
+    hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") 
     
     })
   # unlist(left_extensions)
   
   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-    hydrofabric3D:::geos_bs_distance(
+    hydrofabric3d:::geos_bs_distance(
       distances    = 1:2000,
       line         = left_trans_geos[i],
       geoms_to_cut = left_fema_lines,
@@ -526,14 +714,14 @@ path_df
   }) 
   
   distance_to_extend <- 
-    hydrofabric3D:::geos_bs_distance(
+    hydrofabric3d:::geos_bs_distance(
       distances    = 1:1500,
       line         = left_trans_geos[1],
       geoms_to_cut = left_fema_lines,
       direction    = "head"
     )
   
-  extended <- hydrofabric3D::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% 
+  extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% 
     sf::st_as_sf()
   mapview::mapview(left_fema, col.regions = "dodgerblue") +
     mapview::mapview(left_ls, color = "yellow") +
@@ -542,15 +730,15 @@ path_df
     # mapview::mapview(right_trans, color = "green")
   left_trans$geom %>% sf::st_length()
   plot(segmented_trans$geom, col = "red", lwd =5)
-  plot(left_trans$geom, col = "green", lwd=3, add = TRUE)
-  plot(right_trans$geom, col = "blue", lwd=3, add = TRUE)
+  plot(left_trans$geom, col = "green", lwd=3, add = true)
+  plot(right_trans$geom, col = "blue", lwd=3, add = true)
   unlist(left_trans$geom)
    unlist(right_trans$geom)
   
-  unlist(tmp_trans$geom )
+  unlist(trans_fema$geom )
   unlist(split_trans$geom )
   split_trans %>% 
-    sf::st_collection_extract("LINESTRING")
+    sf::st_collection_extract("linestring")
   split_trans$geom
   
   tmp_trans$geom
@@ -563,17 +751,17 @@ path_df
   
   transects_with_
   
-  lengths(transects_fema_matrix)
+  lengths(transects_polygons_matrix)
   mapview::mapview(transects_with_fema, color = "green") + fema_with_transects
-  unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30]
+  unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]
   transects  %>% 
-    hydrofabric3D::add_tmp_id(transects) %>% 
+    hydrofabric3d::add_tmp_id(transects) %>% 
     .$tmp_id %>% 
     unique() %>% .[1:30]
   trans_subset <- 
     transects %>% 
-    hydrofabric3D::add_tmp_id() %>% 
-    dplyr::filter(tmp_id %in%  unique(hydrofabric3D::add_tmp_id(transects)$tmp_id)[1:30])
+    hydrofabric3d::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in%  unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30])
   
   fema_subset <- 
     fema %>% 

From 6056c857d3276804af715b594543460e77caccc3 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 14 May 2024 10:32:35 -0700
Subject: [PATCH 30/64] first cut of extensions across an entire vpu, added
 code to determine whether left, right, or both ends need to be extended from

---
 runners/cs_runner/add_fema_to_transects.R | 398 +++++++++++++++++-----
 1 file changed, 315 insertions(+), 83 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index d328e89..32763a6 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -48,22 +48,43 @@ path_df
   message("creating vpu ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
   # message("creating vpu ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
   
-  # read in nextgen data
-  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-
+ 
   fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
-  fema_vpu_dir
   vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
   # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)]
-  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
+  
+  # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
+  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
   vpu_fema_file
-  # fema1 <- sf::read_sf(vpu_fema_file1)
-  # fema2 <- sf::read_sf(vpu_fema_filev2)  
+  
+  # fema polygons and transect lines
   fema <- sf::read_sf(vpu_fema_file)
+  # mapview::npts(fema)
+  
   transects <- sf::read_sf(transect_path)
   
-  # fema %>%  mapview::npts() 
-  # fema2 %>%  mapview::npts() 
+  # read in nextgen flowlines data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+
+  # Given 2 geos_geometry point geometries, create a line between the 2 points
+  # start: geos_geoemtry, point
+  # end: geos_geoemtry, point
+  # Returns geos_geometry linestring
+  make_line_from_start_and_end_pts <- function(start, end, line_crs) {
+        Y_start <- geos::geos_y(start)
+        X_start <- geos::geos_x(start)
+        Y_end   <- geos::geos_y(end)
+        X_end   <- geos::geos_x(end)
+        
+        # make the new transect line from the start and points 
+        geos_ls <- geos::geos_make_linestring(x = c(X_start, X_end),
+                                                 y = c(Y_start, Y_end), 
+                                                 crs = line_crs)
+                                              
+        return(geos_ls)                                              
+        
+        
+      }
   
   # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons")
   # internal function for extending transect lines out to FEMA 100 year flood plain polygons
@@ -72,43 +93,26 @@ path_df
   # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons"
   get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, max_extension_distance) {
     
+    ###    ###    ###    ###    ###    ###    ###
+    ###    ###    ###    ###    ###    ###    ###
     # transect_lines <- transects
     # polygons <- fema
     # max_extension_distance <- 2500
+    ###    ###    ###    ###    ###    ###    ###
+    ###    ###    ###    ###    ###    ###    ###
     
     # keep 10% of the original points for speed
     polygons <- rmapshaper::ms_simplify(polygons, keep = 0.10)
-     
-    # fema %>%  mapview::npts()
     
     # transects <- sf::read_sf(transect_path)
     
     transects_geos  <- geos::as_geos_geometry(transects)
     polygons_geos   <- geos::as_geos_geometry(polygons)     
-  
-    # length(polygons_geos)
-    
+
     # polygons_geos  
     transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) 
     polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) 
-    
-    # transects_polygons_matrix
-    # get the polygons that have atleast 1 intersection with the 'lines'
-    # transects_polygons_matrix[[557]]
-    # fema_tmp <- fema[transects_polygons_matrix[[557]], ]
-    # trans_tmp <- transects[557, ]
-    
-   # fema_dissolve <-  rmapshaper::ms_dissolve(fema_tmp)
-   # fema_simple <- rmapshaper::ms_simplify(fema_tmp, keep = 1) %>% 
-     # rmapshaper::ms_dissolve(field = "state")
-  
-    # mapview::mapview(trans_tmp, color = "green") + 
-    #   mapview::mapview(fema_tmp[1, ], col.region = "red") + 
-    #   mapview::mapview(fema_tmp[2, ],  col.region = "dodgerblue") + 
-    #   mapview::mapview(fema_simple,  col.region = "yellow")
-  
-    # polygons_transects_matrix
-    
+
     # subset the transects and polygons to only those with intersections
     intersect_transects  <- transects[lengths(transects_polygons_matrix) != 0, ]
     intersect_polygons   <- polygons_geos[lengths(polygons_transects_matrix) != 0]
@@ -121,22 +125,7 @@ path_df
       sf::st_cast("MULTILINESTRING") %>% 
       geos::as_geos_geometry() %>% 
       geos::geos_simplify_preserve_topology(25)
-    
-    # length(polygons_geos)
-    # nrow(fema)
-    # polygons_geos     <- geos::as_geos_geometry(intersect_polygons)
-    
-    # tmp_trans <- intersect_transects[1:5000, ]
-    
-    # tmp_centroid <- sf::st_centroid(tmp_trans)
-    # sf::st_segmentize()
-    # split_trans <- lwgeom::st_split(tmp_trans, tmp_centroid)
-    # # tmp_trans
-    # max_extension_distance <- 2500
-    
-    # unlist(tmp_trans$geom)
-    # unlist(sf::st_segmentize(tmp_trans, 4)$geom)
-    
+
     # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?)
     # TODO: Double check this logic.
     min_segmentation <- min(intersect_transects$cs_lengthm %/% 2)
@@ -147,12 +136,9 @@ path_df
     segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation)
     
     # unlist(segmented_trans$geom)
-    unique(lengths(segmented_trans$geom))
-    length(lengths(segmented_trans$geom))
-    lengths(segmented_trans$geom)  
-    
-    # left_trans   <- lwgeom::st_linesubstring(segmented_trans, 0, 0.50)
-    # right_trans  <- lwgeom::st_linesubstring(segmented_trans, 0.50, 1)
+    # unique(lengths(segmented_trans$geom))
+    # length(lengths(segmented_trans$geom))
+    # lengths(segmented_trans$geom)  
     
     # mapview::mapview(left_trans, col.regions = "dodgerblue") +
       # mapview::mapview(intersect_transects, color = "red") +
@@ -249,9 +235,10 @@ path_df
       max_extension_distance = max_extension_distance
     )  
     
-    left_trans$left_distance   <- left_distances
-    right_trans$right_distance <- right_distances
+    left_trans$left_distance    <- left_distances
+    right_trans$right_distance  <- right_distances
     
+    # distance to extend LEFT and/or RIGHT for each hy_id/cs_id
     extensions_by_id <- dplyr::left_join(
                           sf::st_drop_geometry(
                             dplyr::select(left_trans, 
@@ -264,80 +251,323 @@ path_df
                             ),
                           by = c("hy_id", "cs_id")
                           )
+    
+    ######### ######## ######## ######## ####### 
+    # TODO: This is temporary !!!!!!
+    fema_index_df <- dplyr::left_join(
+      sf::st_drop_geometry(
+        dplyr::select(left_trans, 
+                      hy_id, cs_id, left_distance, left_fema_index)
+      ),
+      sf::st_drop_geometry(
+        dplyr::select(right_trans, 
+                      hy_id, cs_id, 
+                      right_distance, right_fema_index)
+      ),
+      by = c("hy_id", "cs_id")
+    )
+    ######## ######## ######## ######## ########
+    
+    foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
+      fema_id = fema_uids
+    )
+    
+    polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
+      fema_id = fema_uids
+    ) %>% 
+      dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590))
+    merged_polygon <- 
+      polygons_to_merge %>% 
+      sf::st_union()
+    merged_polygon %>% 
+      rmapshaper::ms_explode()
+    mapview::mapview(foi, col.regions = "dodgerblue") +
+      mapview::mapview(polygons_to_merge, col.regions = "yellow") +
+      mapview::mapview(merged_polygon, col.regions = "green") +
+      mapview::mapview(toi, color = "red") +
+      mapview::mapview(og_trans, color = "green")
+    # polygons %>% 
+      # dplyr::filter(fema_id )
+    ######## ######## ######## ######## ########
+    
+    # TODO: Add left/right extension distancces to transect data
+    # TODO: this can ultimately just be the "transects" variable, dont need to make new "transects_with_distances" variable
     transects_with_distances <- 
       transects %>% 
       dplyr::left_join(
                 extensions_by_id,
                 by = c("hy_id", "cs_id")
             ) %>% 
+      dplyr::mutate(
+        left_distance = dplyr::case_when(
+          is.na(left_distance) ~ 0,
+          TRUE                 ~ left_distance
+        ),
+        right_distance = dplyr::case_when(
+          is.na(right_distance) ~ 0,
+          TRUE                  ~ right_distance
+        )
+        # left_distance = dplyr::case_when(
+        #   left_distance == 0 ~ NA,
+        #   TRUE               ~ left_distance
+        # ),
+        # right_distance = dplyr::case_when(
+        #   right_distance == 0 ~ NA,
+        #   TRUE                ~ right_distance
+        # )
+      ) %>% 
       hydrofabric3D::add_tmp_id()
     
-    fline_id_array <- flines$id
+    # transects
+    # flines %>% 
+    #   dplyr::arrange(-tot_drainage_areasqkm) 
+    # flines %>% 
+    #   dplyr::arrange(-hydroseq) %>% 
+    #   dplyr::filter(hydroseq == min(hydroseq) | hydroseq == max(hydroseq)) %>%
+    #   # dplyr::filter(hydroseq == max(hydroseq)) %>% 
+    #   mapview::mapview()
+    
+    # extensions_by_id
+    transects_with_distances %>% 
+      dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id)
+    
+    ######## ######## ######## ######## ######## ######## ######## ######## 
+    left_extended_flag[1:20]
+    right_extended_flag[1:20]
+    both_extended_flag[1:20]
+    
+    range_vect <- 1:500
+    
+    fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index),     unlist(fema_index_df[range_vect, ]$right_fema_index)))
+    fema_uids <- fema_uids[!is.na(fema_uids)]
+fema_uids
+    foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
+      fema_id = fema_uids
+    )
+    # ooi <- sf::st_as_sf()
+    # toi <- sf::st_as_sf(new_transects[1:20])
+     toi <- sf::st_as_sf(transect_geoms[range_vect])
+    toi
+    og_trans <- transects_with_distances[range_vect, ]
+    mapview::mapview(foi, col.regions = "dodgerblue") +
+      mapview::mapview(toi, color = "red") +
+      mapview::mapview(og_trans, color = "green")
+    ######## ######## ### ##### ######## ######## ######## ######## ######## 
+    
+    left_extended_flag   <- rep(FALSE, length(transect_ids))   
+    right_extended_flag  <- rep(FALSE, length(transect_ids))
+    both_extended_flag   <- rep(FALSE, length(transect_ids))
+    
+    fline_id_array       <- flines$id
     
     # Convert the net object into a geos_geometry
-    flines_geos <- geos::as_geos_geometry(flines)
+    flines_geos          <- geos::as_geos_geometry(flines)
     
     transect_hy_id_array <- transects_with_distances$hy_id
     transect_cs_id_array <- transects_with_distances$cs_id
     
-    transect_geoms  <- geos::as_geos_geometry(transects_with_distances$geom)
+    transect_geoms       <- geos::as_geos_geometry(transects_with_distances$geom)
    
-    left_distances  <- transects_with_distances$left_distance
-    right_distances <- transects_with_distances$right_distance
+    left_distances       <- transects_with_distances$left_distance
+    right_distances      <- transects_with_distances$right_distance
     
     # preallocate vector that stores the extension. distances
     new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_ids)))
     # new_transects <- geos::geos_empty()
-    # measures  <- vctrs::vec_c()
+    # # measures  <- vctrs::vec_c()
+    # transects_with_distances[1:20, ]
+    # transects[1:20, ]
+    
+    # number of geometries that will be iterated over, keeping this variable to reference in message block  
+    total <- length(transect_ids)
+
+    # output a message every ~10% intervals
+    message_interval <- total %/% 10
+    number_of_skips = 0
     
     for (i in seq_along(transect_ids)) {
-      message("i: ", i)
-      i = 1
-      
+      # message("i: ", i)
+      # if(i > 2000) {
+      #   break
+      # }
+      # Check if the iteration is a multiple of 100
+      if (i %% message_interval == 0) {
+        
+        # get the percent complete
+        percent_done <- round(i/total, 2) * 100
+        
+        # Print the message every "message_interval"
+        # if(verbose) { 
+          message(i, " > ", percent_done, "% ") 
+          message("Number of skips: ", number_of_skips)
+          # }
+        # message("Iteration ", i, " / ", length(extended_trans), 
+        #         " - (", percent_done, "%) ")
+        
+      }
+      # i = 29
+      # get the current transect, hy_id, cs_id, flowline, and extension distances
       current_trans <- transect_geoms[i]
       
       current_hy_id <- transect_hy_id_array[i]
       current_cs_id <- transect_cs_id_array[i]
       
       current_fline <- flines_geos[fline_id_array == current_hy_id]
-    
-      # current_fline <- 
-      
        
       left_distance_to_extend <- left_distances[i]
-      left_distance_to_extend
       right_distance_to_extend <- right_distances[i]
-      right_distance_to_extend
       
       no_extension_required <- (left_distance_to_extend == 0 && right_distance_to_extend == 0)
-      message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")")
+      # no_extension_required <- is.na(left_distance_to_extend) && is.na(right_distance_to_extend)
+      # message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")")
       
       if(no_extension_required) {
-        message("Skipping -left/right extension are both 0")
+        # message("Skipping -left/right extension are both 0")
+        number_of_skips = number_of_skips + 1
+        
         next
       }
       
-      trans
-      left_trans
-      curr_tmp_id
-      
-      curr_fema_index <- 
-        left_trans %>% 
-        dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% 
-        .$left_fema_index %>% .[[1]]
+     
+#      curr_fema_index <- 
+#        left_trans %>% 
+#        dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% 
+#        .$left_fema_index %>% .[[1]]
       
+      # message("Extending transect line left and right")
+      # extend the lines
       left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, left_distance_to_extend, "head")
       right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, right_distance_to_extend, "tail")
       
       # neighbor_transects <- 
+      # message("Checking left and right intersections with flowline...")
+      
+      # Check that the extended transect lines only intersect the current flowline once
       left_intersects_fline <- geos::geos_intersection(
         left_extended_trans,
         current_fline
       )
       
-      left_intersects_fline_once = geos::geos_type(fline_intersects) == "point"
-      left_intersects_fline_once
+      right_intersects_fline <- geos::geos_intersection(
+        right_extended_trans,
+        current_fline
+      )
+      
+      # Define conditions to decide which version of the transect to use
+      # 1. Use transect with extension in BOTH directions
+      # 2. Use transect with LEFT extension only
+      # 3. Use transect with RIGHT extension only
+      left_intersects_fline_once  <- geos::geos_type(left_intersects_fline) == "point"
+      right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point"
+      
+      # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) 
+      # left_does_not_intersect_other_transects  <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
+      # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
+      # 
+      # use_left_extension  <- left_intersects_fline_once && left_does_not_intersect_other_transects
+      # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects
+      # use_both_extensions <- use_left_extension && use_right_extension
+      
+      
+      # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read
+      left_intersects_other_transects  <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
+      right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
+      
+      # # make sure the extended transects don't hit any of the newly extended transects
+      # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect
+      # left_intersects_new_transects  <- any(geos::geos_intersects(left_extended_trans, new_transects))
+      # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects))
+      
+      # make TRUE/FALSE flags stating which transect should we use
+      # - BOTH extensions
+      # - LEFT ONLY extensions
+      # - RIGHT only extensions
+      use_left_extension  <- left_intersects_fline_once && !left_intersects_other_transects
+      use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects
+      use_both_extensions <- use_left_extension && use_right_extension
+      
+      # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans)
+      # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING"))
+      # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") +
+      #   mapview::mapview(sf::st_as_sf(left_start), col.region = "red") +
+      #   mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + 
+      #   mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") +
+      #   mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue")
+      
+      # if(use_both_extensions) {
+      
+      # Get the start and end of both extended tranects
+      left_start  <- geos::geos_point_start(left_extended_trans)
+      left_end    <- geos::geos_point_end(left_extended_trans)
+      right_start <- geos::geos_point_start(right_extended_trans)
+      right_end   <- geos::geos_point_end(right_extended_trans)
       
+      # }
+      # Extend in BOTH directions
+      if(use_both_extensions) {
+        # message("Extend direction: BOTH")
+        start  <- left_start
+        end    <- right_end
+        
+      # extend ONLY the left side
+      } else if(use_left_extension && !use_right_extension) {
+         # message("Extend direction: LEFT")       
+        start  <- left_start
+        end    <- left_end
+        
+      # Extend ONLY the right side
+      } else if(!use_left_extension && use_right_extension) {
+         # message("Extend direction: RIGHT")       
+        start  <- right_start
+        end    <- right_end
+        
+      # DO NOT extend either direction
+      } else {
+        # message("No extension")   
+        # TODO: Really dont need to do anything 
+        # TODO: in this scenario because we just use the original transect line
+        start  <- left_end
+        end    <- right_start
+      }
+      
+      # trans_needs_extension <- use_left_extension || use_right_extension
+      
+      # if(trans_needs_extension) {
+        
+        line_crs      <- wk::wk_crs(current_trans)
+        updated_trans <- make_line_from_start_and_end_pts(start, end, line_crs)
+        
+      # }
+        
+        if(use_left_extension) {
+          left_extended_flag[i]  <- TRUE
+        }
+        
+        if(use_right_extension) {
+          right_extended_flag[i] <- TRUE
+        }
+        
+        if(use_both_extensions) {
+          both_extended_flag[i] <- TRUE
+        }
+        
+        # new_transects[i] <- updated_trans
+        transect_geoms[i] <- updated_trans
+        
+        # start %>% class()
+      
+     
+    }
+      # make the new transect line from the start and points 
+      final_line <- geos::geos_make_linestring(x = c(X_start, X_end),
+                                               y = c(Y_start, Y_end), 
+                                               crs = wk::wk_crs(current_trans)
+                                               )
+      geos::geos_make_collection(start, type_id = "LINESTRING")
+      
+      
+      left_start <- geos::geos_point_start(left_extended_trans)
+      right_end  <- geos::geos_point_end(right_extended_trans)
       
       fline_intersects
       geos::geos_type(fline_intersects) == "point"
@@ -352,7 +582,9 @@ path_df
       mapview::mapview(sf::st_as_sf(trans), color = "green") +
        mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red")     +
         mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red")     +
-        mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")   
+        mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")  +  
+      mapview::mapview(      sf::st_as_sf(final_line), color = "yellow")   
+      
       
       
       

From 1e511232d32339cc1178d9addb4e2e4b14d65331 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 15 May 2024 16:26:09 -0700
Subject: [PATCH 31/64] changed extensions to check that they only interest a
 flowline only a single time, makes sure flowlines outside the current
 flowline are NOT interesting as well

---
 runners/cs_runner/add_fema_to_transects.R | 210 +++++++++++++++++-----
 1 file changed, 165 insertions(+), 45 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 32763a6..7f81f51 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -53,8 +53,8 @@ path_df
   vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
   # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)]
   
-  # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
-  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
+  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
+  # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
   vpu_fema_file
   
   # fema polygons and transect lines
@@ -66,6 +66,28 @@ path_df
   # read in nextgen flowlines data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
 
+  # fema$fema_id %>% unique() %>% length()
+  
+  # # union then explode FEMA polygons 
+  # fema <- 
+  #   fema %>%
+  #   sf::st_union()
+  # 
+  # fema <- rmapshaper::ms_explode(fema)
+  # # fema %>% mapview::npts()
+  # 
+  # # reassign IDs and change geometry column name
+  # fema <- 
+  #   fema %>% 
+  #   sf::st_as_sf() %>% 
+  #   dplyr::mutate(fema_id = 1:dplyr::n()) %>% 
+  #   dplyr::select(fema_id, geom = x)
+  
+  # sf::st_union() %>% 
+  #   rmapshaper::ms_explode() %>% 
+  #   sf::st_as_sf() %>% 
+  #   dplyr::mutate(fema_id = 1:dplyr::n()) %>% 
+  #   dplyr::select(fema_id, geom = x)
   # Given 2 geos_geometry point geometries, create a line between the 2 points
   # start: geos_geoemtry, point
   # end: geos_geoemtry, point
@@ -91,24 +113,34 @@ path_df
   # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons)
   # polygons, set of sf polygons that transect lines should be exteneded 
   # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons"
-  get_transect_extension_distances_to_polygons <- function(transect_lines, polygons, max_extension_distance) {
+  get_transect_extension_distances_to_polygons <- function(transect_lines, 
+                                                           polygons, 
+                                                           flines,
+                                                           max_extension_distance) {
     
     ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
-    # transect_lines <- transects
-    # polygons <- fema
-    # max_extension_distance <- 2500
-    ###    ###    ###    ###    ###    ###    ###
+    transect_lines <- transects
+    polygons <- fema
+    # # flines <- flines
+    max_extension_distance <- 3000
+    # ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
     
     # keep 10% of the original points for speed
-    polygons <- rmapshaper::ms_simplify(polygons, keep = 0.10)
+    polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10)
     
+    mapview::npts(fema)
+     mapview::npts(polygons)
     # transects <- sf::read_sf(transect_path)
     
+     # polygons
     transects_geos  <- geos::as_geos_geometry(transects)
     polygons_geos   <- geos::as_geos_geometry(polygons)     
-
+    
+    # geos::geos_union( polygons_geos, polygons_geos) %>% length()
+    # polygons_geos %>% length()
+    # polygons
     # polygons_geos  
     transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) 
     polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) 
@@ -121,11 +153,21 @@ path_df
     # This can be done with just linestrings (not sure if this is actually more performent but I'm pretty sure it is....)
     intersect_lines <- 
       intersect_polygons %>% 
+      # geos::geos_make_valid() %>% 
       sf::st_as_sf() %>% 
+      # sf::st_union() %>% 
+      # rmapshaper::ms_explode() %>% 
+      # sf::st_as_sf() %>% 
+      # dplyr::mutate(fema_id = 1:dplyr::n()) %>% 
+      # dplyr::select(fema_id, geom = x) %>% 
       sf::st_cast("MULTILINESTRING") %>% 
       geos::as_geos_geometry() %>% 
       geos::geos_simplify_preserve_topology(25)
-
+    
+ #    intersect_polygons %>% 
+ #      geos::geos_make_valid() %>% 
+ #      geos::geos_is_valid() %>% all()
+ # is.null(intersect_lines$geometry )
     # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?)
     # TODO: Double check this logic.
     min_segmentation <- min(intersect_transects$cs_lengthm %/% 2)
@@ -217,6 +259,13 @@ path_df
                     geom
       ) 
     
+    max_extension_distance = 3000
+    which(transects_with_distances$hy_id == "wb-1003839")
+    
+    left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
+    right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
+    which(right_trans$hy_id == "wb-1003839")
+    
     left_distances <- calc_extension_distances(
       geos_geoms             = left_trans_geos,
       ids                    = left_trans$tmp_id,
@@ -238,6 +287,8 @@ path_df
     left_trans$left_distance    <- left_distances
     right_trans$right_distance  <- right_distances
     
+    extensions_by_id %>% 
+      dplyr::filter(hy_id == "wb-1003839")
     # distance to extend LEFT and/or RIGHT for each hy_id/cs_id
     extensions_by_id <- dplyr::left_join(
                           sf::st_drop_geometry(
@@ -252,7 +303,7 @@ path_df
                           by = c("hy_id", "cs_id")
                           )
     
-    ######### ######## ######## ######## ####### 
+     ######### ######## ######## ######## ####### 
     # TODO: This is temporary !!!!!!
     fema_index_df <- dplyr::left_join(
       sf::st_drop_geometry(
@@ -268,24 +319,24 @@ path_df
     )
     ######## ######## ######## ######## ########
     
-    foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
-      fema_id = fema_uids
-    )
-    
-    polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
-      fema_id = fema_uids
-    ) %>% 
-      dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590))
-    merged_polygon <- 
-      polygons_to_merge %>% 
-      sf::st_union()
-    merged_polygon %>% 
-      rmapshaper::ms_explode()
-    mapview::mapview(foi, col.regions = "dodgerblue") +
-      mapview::mapview(polygons_to_merge, col.regions = "yellow") +
-      mapview::mapview(merged_polygon, col.regions = "green") +
-      mapview::mapview(toi, color = "red") +
-      mapview::mapview(og_trans, color = "green")
+    # foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
+    #   fema_id = fema_uids
+    # )
+    # 
+    # polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
+    #   fema_id = fema_uids
+    # ) %>% 
+    #   dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590))
+    # merged_polygon <- 
+    #   polygons_to_merge %>% 
+    #   sf::st_union()
+    # merged_polygon %>% 
+    #   rmapshaper::ms_explode()
+    # mapview::mapview(foi, col.regions = "dodgerblue") +
+    #   mapview::mapview(polygons_to_merge, col.regions = "yellow") +
+    #   mapview::mapview(merged_polygon, col.regions = "green") +
+    #   mapview::mapview(toi, color = "red") +
+    #   mapview::mapview(og_trans, color = "green")
     # polygons %>% 
       # dplyr::filter(fema_id )
     ######## ######## ######## ######## ########
@@ -318,6 +369,8 @@ path_df
       ) %>% 
       hydrofabric3D::add_tmp_id()
     
+    transects_with_distances %>% 
+      dplyr::filter(hy_id == "wb-1003839")
     # transects
     # flines %>% 
     #   dplyr::arrange(-tot_drainage_areasqkm) 
@@ -354,10 +407,6 @@ fema_uids
       mapview::mapview(og_trans, color = "green")
     ######## ######## ### ##### ######## ######## ######## ######## ######## 
     
-    left_extended_flag   <- rep(FALSE, length(transect_ids))   
-    right_extended_flag  <- rep(FALSE, length(transect_ids))
-    both_extended_flag   <- rep(FALSE, length(transect_ids))
-    
     fline_id_array       <- flines$id
     
     # Convert the net object into a geos_geometry
@@ -372,24 +421,30 @@ fema_uids
     right_distances      <- transects_with_distances$right_distance
     
     # preallocate vector that stores the extension. distances
-    new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_ids)))
+    new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_hy_id_array)))
+    
+    left_extended_flag   <- rep(FALSE, length(transect_hy_id_array))   
+    right_extended_flag  <- rep(FALSE, length(transect_hy_id_array))
+    both_extended_flag   <- rep(FALSE, length(transect_hy_id_array))
+    
     # new_transects <- geos::geos_empty()
     # # measures  <- vctrs::vec_c()
     # transects_with_distances[1:20, ]
     # transects[1:20, ]
     
     # number of geometries that will be iterated over, keeping this variable to reference in message block  
-    total <- length(transect_ids)
+    total <- length(transect_hy_id_array)
 
     # output a message every ~10% intervals
-    message_interval <- total %/% 10
+    message_interval <- total %/% 5
     number_of_skips = 0
     
-    for (i in seq_along(transect_ids)) {
+    for (i in seq_along(transect_hy_id_array)) {
       # message("i: ", i)
       # if(i > 2000) {
       #   break
       # }
+      # i = 1
       # Check if the iteration is a multiple of 100
       if (i %% message_interval == 0) {
         
@@ -405,7 +460,8 @@ fema_uids
         #         " - (", percent_done, "%) ")
         
       }
-      # i = 29
+      # which(transects_with_distances$hy_id == "wb-1003839")
+      # i = 9587
       # get the current transect, hy_id, cs_id, flowline, and extension distances
       current_trans <- transect_geoms[i]
       
@@ -428,7 +484,9 @@ fema_uids
         next
       }
       
-     
+      # transects_with_distances %>% 
+      #   dplyr::filter(hy_id == "wb-1003839")
+      # transects
 #      curr_fema_index <- 
 #        left_trans %>% 
 #        dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% 
@@ -445,20 +503,54 @@ fema_uids
       # Check that the extended transect lines only intersect the current flowline once
       left_intersects_fline <- geos::geos_intersection(
         left_extended_trans,
-        current_fline
+        # current_fline
+        flines_geos
       )
       
       right_intersects_fline <- geos::geos_intersection(
         right_extended_trans,
-        current_fline
+        # current_fline
+        flines_geos
       )
       
+      # sum(geos::geos_type(left_intersects_fline) == "point")
+      # sum(geos::geos_type(right_intersects_fline) == "point")
+      
+      
+      # which(geos::geos_type(left_intersects_fline) == "point")
+      
+      # transects %>% 
+      #   dplyr::filter(hy_id == current_hy_id) %>% 
+      #   sf::st_length()
+      # current_hy_id
+      # 
+      # sf::st_as_sf(current_trans) %>% 
+      #   sf::st_length()
+      # tmp <- sf::read_sf(transect_path)
+      # 
+      # tmp %>% 
+      #   dplyr::filter(hy_id == current_hy_id) %>% 
+      #   sf::st_length()
+      
+      # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) +
+      #   mapview::mapview(sf::st_as_sf(current_trans), color = "red") +
+      # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + 
+      # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green")
       # Define conditions to decide which version of the transect to use
+      
       # 1. Use transect with extension in BOTH directions
       # 2. Use transect with LEFT extension only
       # 3. Use transect with RIGHT extension only
-      left_intersects_fline_once  <- geos::geos_type(left_intersects_fline) == "point"
-      right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point"
+      
+      # left_intersects_fline_once  <- geos::geos_type(left_intersects_fline) == "point"
+      # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point"
+      left_intersects_fline_once  <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
+      right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 && sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 
+      
+      # sum(geos::geos_type(left_intersects_fline) == "point") == 1
+      # sum(geos::geos_type(right_intersects_fline) == "point") == 1
+      # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
+      
       
       # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) 
       # left_does_not_intersect_other_transects  <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
@@ -558,6 +650,29 @@ fema_uids
       
      
     }
+    
+    # Update the "transects_to_extend" with new geos geometries ("geos_list")
+    sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms))
+    
+    transects$left_is_extended  <- left_extended_flag
+    transects$right_is_extended <- right_extended_flag
+    transects %>% 
+      dplyr::filter(left_is_extended | right_is_extended)
+    left_only_extended <- 
+      transects %>% 
+      dplyr::filter(!left_is_extended, right_is_extended)
+    left_only_flines <- 
+      flines %>% 
+      dplyr::filter(id %in% left_only_extended$hy_id)
+    # %>% 
+      mapview::mapview(left_only_flines, color = "dodgerblue") + 
+    mapview::mapview(left_only_extended, color = "red")
+    # transects$cs_lengthm  <- length_list
+    
+    transects
+  length(transect_geoms)
+    
+  
       # make the new transect line from the start and points 
       final_line <- geos::geos_make_linestring(x = c(X_start, X_end),
                                                y = c(Y_start, Y_end), 
@@ -613,8 +728,13 @@ fema_uids
       # lines_to_cut_indices   = left_trans$left_fema_index
       # direction              = "head"
       # max_extension_distance = max_extension_distance
-      
-      
+      # geos_geoms             = left_trans_geos
+      # ids                    = left_trans$tmp_id
+      # lines_to_cut           = intersect_lines
+      # lines_to_cut_indices   = left_trans$left_fema_index
+      # direction              = "head"
+      # max_extension_distance = max_extension_distance
+      # 
       
       #####   #####   #####   #####   #####
       

From f9bf310a398a9a9201860103fc14fae1b68e9776 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 16 May 2024 15:31:13 -0700
Subject: [PATCH 32/64] continuing improvement of transect extensions using
 FEMA

---
 runners/cs_runner/add_fema_to_transects.R | 110 ++++++++++++++++++++--
 1 file changed, 101 insertions(+), 9 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 7f81f51..27f5163 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -651,22 +651,114 @@ fema_uids
      
     }
     
+    transects2 <-
+      transects %>% 
+      dplyr::mutate(
+        new_cs_lengthm = as.numeric(sf::st_length(geom))
+      ) %>% 
+      dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm)
+    
+    
     # Update the "transects_to_extend" with new geos geometries ("geos_list")
-    sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms))
+    sf::st_geometry(transects2) <- sf::st_geometry(sf::st_as_sf(transect_geoms))
+    
+    transects2 <- 
+      transects2 %>% 
+      dplyr::mutate(
+        new_cs_lengthm = as.numeric(sf::st_length(geom))
+      ) %>% 
+      dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm)
+    
+    # transects2 %>% 
+    #   dplyr::filter(
+    #     new_cs_lengthm > cs_lengthm
+    #   )
+    # 
+    
+    transects2$left_is_extended  <- left_extended_flag
+    transects2$right_is_extended <- right_extended_flag
+    
+    transects2 %>% 
+      dplyr::filter(left_is_extended, right_is_extended)
+    
+    both_extended <- 
+      transects2 %>% 
+      dplyr::filter(left_is_extended, right_is_extended)
+    
+    both_flines <- 
+      flines %>% 
+      dplyr::filter(id %in% both_extended$hy_id)
     
-    transects$left_is_extended  <- left_extended_flag
-    transects$right_is_extended <- right_extended_flag
-    transects %>% 
-      dplyr::filter(left_is_extended | right_is_extended)
     left_only_extended <- 
-      transects %>% 
-      dplyr::filter(!left_is_extended, right_is_extended)
+      transects2 %>% 
+      dplyr::filter(left_is_extended, !right_is_extended)
+    
     left_only_flines <- 
       flines %>% 
       dplyr::filter(id %in% left_only_extended$hy_id)
+    
+    right_only_extended <- 
+      transects2 %>% 
+      dplyr::filter(!left_is_extended, right_is_extended)
+    
+    right_only_flines <- 
+      flines %>% 
+      dplyr::filter(id %in% right_only_extended$hy_id)
+  
+    # left_fema_polygons <- 
+      # left_trans %>% 
+      
+    fema_indexes_in_aoi <-
+      dplyr::bind_rows(
+        sf::st_drop_geometry(
+          dplyr::rename(left_trans, fema_index = left_fema_index)
+        ),
+        sf::st_drop_geometry(     
+          dplyr::rename(right_trans, 
+                        fema_index = right_fema_index)
+        )
+      ) %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
+      # dplyr::filter(
+      #   # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id |
+      #   # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id
+      #  
+      #   tmp_id %in%  unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id)
+      #   
+      #   ) %>% 
+      # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% 
+      dplyr::slice(1:200) %>%
+      .$fema_index %>% 
+      unlist() %>% 
+      na.omit() %>% 
+      unique() %>% 
+      sort()  
+      # length()
+    
+    polygons[fema_indexes_in_aoi, ]
+    
+    extended_for_map <- 
+      both_extended %>% 
+      dplyr::slice(1:5000)
+    
+    og_transects_for_map <- 
+      transects %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
+
+    # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id
+    # transects_with_distances
     # %>% 
-      mapview::mapview(left_only_flines, color = "dodgerblue") + 
-    mapview::mapview(left_only_extended, color = "red")
+    mapview::mapview(polygons[fema_indexes_in_aoi, ], col.regions = "yellow") + 
+      mapview::mapview(both_flines, color = "dodgerblue") + 
+      mapview::mapview(og_transects_for_map, color = "red") + 
+      mapview::mapview(extended_for_map, color = "green") 
+      
+      # mapview::mapview(left_only_flines, color = "dodgerblue") + 
+      #     mapview::mapview(right_only_flines, color = "dodgerblue") + 
+    # mapview::mapview(left_only_extended, color = "red") + 
+    #      mapview::mapview(right_only_extended, color = "green")
     # transects$cs_lengthm  <- length_list
     
     transects

From 3e5876798aa77b20103f61871c3cce06f2c8d677 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 17 May 2024 13:59:05 -0700
Subject: [PATCH 33/64] starting to cleanup scripts and make it so transects
 will try to be extended halfway in directions that wouldnt normally be
 extended due to intersections with flowlines or other transects

---
 runners/cs_runner/add_fema_to_transects.R | 114 ++++++++++++----------
 1 file changed, 60 insertions(+), 54 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 27f5163..b649aef 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -123,7 +123,8 @@ path_df
     transect_lines <- transects
     polygons <- fema
     # # flines <- flines
-    max_extension_distance <- 3000
+    # max_extension_distance <- 3000
+    max_extension_distance = 3500
     # ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
     
@@ -131,7 +132,7 @@ path_df
     polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10)
     
     mapview::npts(fema)
-     mapview::npts(polygons)
+    mapview::npts(polygons)
     # transects <- sf::read_sf(transect_path)
     
      # polygons
@@ -162,7 +163,10 @@ path_df
       # dplyr::select(fema_id, geom = x) %>% 
       sf::st_cast("MULTILINESTRING") %>% 
       geos::as_geos_geometry() %>% 
-      geos::geos_simplify_preserve_topology(25)
+      geos::geos_simplify_preserve_topology(20)
+    
+    # mapview::npts(sf::st_as_sf(intersect_lines))
+
     
  #    intersect_polygons %>% 
  #      geos::geos_make_valid() %>% 
@@ -259,12 +263,13 @@ path_df
                     geom
       ) 
     
-    max_extension_distance = 3000
-    which(transects_with_distances$hy_id == "wb-1003839")
+    # max_extension_distance = 3000
+    # which(transects_with_distances$hy_id == "wb-1003839")
     
     left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
     right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
-    which(right_trans$hy_id == "wb-1003839")
+    
+    # which(right_trans$hy_id == "wb-1003839")
     
     left_distances <- calc_extension_distances(
       geos_geoms             = left_trans_geos,
@@ -289,6 +294,7 @@ path_df
     
     extensions_by_id %>% 
       dplyr::filter(hy_id == "wb-1003839")
+    
     # distance to extend LEFT and/or RIGHT for each hy_id/cs_id
     extensions_by_id <- dplyr::left_join(
                           sf::st_drop_geometry(
@@ -651,12 +657,11 @@ fema_uids
      
     }
     
-    transects2 <-
-      transects %>% 
-      dplyr::mutate(
-        new_cs_lengthm = as.numeric(sf::st_length(geom))
-      ) %>% 
-      dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm)
+    transects2 <- transects 
+      # dplyr::mutate(
+      #   new_cs_lengthm = as.numeric(sf::st_length(geom))
+      # ) %>% 
+      # dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm)
     
     
     # Update the "transects_to_extend" with new geos geometries ("geos_list")
@@ -681,33 +686,44 @@ fema_uids
     transects2 %>% 
       dplyr::filter(left_is_extended, right_is_extended)
     
-    both_extended <- 
+    any_extended <- 
       transects2 %>% 
-      dplyr::filter(left_is_extended, right_is_extended)
+      dplyr::filter(left_is_extended | right_is_extended)
     
-    both_flines <- 
+    any_flines <- 
       flines %>% 
-      dplyr::filter(id %in% both_extended$hy_id)
+      dplyr::filter(id %in% any_extended$hy_id)
     
-    left_only_extended <- 
-      transects2 %>% 
-      dplyr::filter(left_is_extended, !right_is_extended)
-    
-    left_only_flines <- 
-      flines %>% 
-      dplyr::filter(id %in% left_only_extended$hy_id)
-    
-    right_only_extended <- 
-      transects2 %>% 
-      dplyr::filter(!left_is_extended, right_is_extended)
-    
-    right_only_flines <- 
-      flines %>% 
-      dplyr::filter(id %in% right_only_extended$hy_id)
+    # left_only_extended <- 
+    #   transects2 %>% 
+    #   dplyr::filter(left_is_extended, !right_is_extended)
+    # 
+    # left_only_flines <- 
+    #   flines %>% 
+    #   dplyr::filter(id %in% left_only_extended$hy_id)
+    # 
+    # right_only_extended <- 
+    #   transects2 %>% 
+    #   dplyr::filter(!left_is_extended, right_is_extended)
+    # 
+    # right_only_flines <- 
+    #   flines %>% 
+    #   dplyr::filter(id %in% right_only_extended$hy_id)
   
     # left_fema_polygons <- 
       # left_trans %>% 
-      
+    # ----------------------------------------------------------------
+    # ------- Subset data for mapping ----------- 
+    # ----------------------------------------------------------------
+    extended_for_map <- 
+      any_extended %>% 
+      dplyr::slice(1:5000)
+    
+    og_transects_for_map <- 
+      transects %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
+    
     fema_indexes_in_aoi <-
       dplyr::bind_rows(
         sf::st_drop_geometry(
@@ -719,7 +735,7 @@ fema_uids
         )
       ) %>% 
       hydrofabric3D::add_tmp_id() %>% 
-      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
+      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% 
       # dplyr::filter(
       #   # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id |
       #   # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id
@@ -728,42 +744,32 @@ fema_uids
       #   
       #   ) %>% 
       # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% 
-      dplyr::slice(1:200) %>%
+      # dplyr::slice(1:200) %>%
       .$fema_index %>% 
       unlist() %>% 
       na.omit() %>% 
       unique() %>% 
-      sort()  
+      sort()
       # length()
     
-    polygons[fema_indexes_in_aoi, ]
-    
-    extended_for_map <- 
-      both_extended %>% 
-      dplyr::slice(1:5000)
-    
-    og_transects_for_map <- 
-      transects %>% 
-      hydrofabric3D::add_tmp_id() %>% 
-      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
+    sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi])
 
     # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id
     # transects_with_distances
     # %>% 
-    mapview::mapview(polygons[fema_indexes_in_aoi, ], col.regions = "yellow") + 
-      mapview::mapview(both_flines, color = "dodgerblue") + 
-      mapview::mapview(og_transects_for_map, color = "red") + 
-      mapview::mapview(extended_for_map, color = "green") 
-      
+    mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]),  col.regions = "lightblue") + 
+      mapview::mapview(any_flines, color = "dodgerblue") + 
+      mapview::mapview(og_transects_for_map, color = "green") + 
+      mapview::mapview(extended_for_map, color = "red") 
+
       # mapview::mapview(left_only_flines, color = "dodgerblue") + 
       #     mapview::mapview(right_only_flines, color = "dodgerblue") + 
     # mapview::mapview(left_only_extended, color = "red") + 
     #      mapview::mapview(right_only_extended, color = "green")
     # transects$cs_lengthm  <- length_list
-    
-    transects
-  length(transect_geoms)
-    
+    # ----------------------------------------------------------------
+    # ----------------------------------------------------------------
+    # ----------------------------------------------------------------  
   
       # make the new transect line from the start and points 
       final_line <- geos::geos_make_linestring(x = c(X_start, X_end),

From 9e4405b62da4de1bfeb5abafa4125bc6ad521de2 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 21 May 2024 12:44:51 -0700
Subject: [PATCH 34/64] added a generic transect line checker that makes sure
 that a transect line doesn;'t intersect more than one flowline or another
 transect, working on added in something to try a shorter extension if one of
 the direction turns out to be unextendable

---
 runners/cs_runner/add_fema_to_transects.R | 352 +++++++++++++++-------
 runners/cs_runner/preprocess_fema.R       |  74 ++++-
 2 files changed, 311 insertions(+), 115 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index b649aef..61a599e 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -50,11 +50,13 @@ path_df
   
  
   fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+  # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+
   vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
   # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)]
   
-  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
-  # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
+  # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
+  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
   vpu_fema_file
   
   # fema polygons and transect lines
@@ -65,7 +67,36 @@ path_df
   
   # read in nextgen flowlines data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-
+  # library(nngeo)
+  #   
+  # fema
+  # 
+  # fema %>% 
+  #   dplyr::group_by(fema_id) %>% 
+  #   dplyr::mutate(
+  #     n = dplyr::n()
+  #   ) %>% 
+  #   dplyr::arrange(-n) %>% 
+  #   dplyr::relocate(n)
+  # fema_sub <-   
+  #   fema %>% 
+  #   dplyr::filter(fema_id %in% c(1726))
+  # 
+  # # fema %>% 
+  #   # dplyr::filter(fema_id %in% c(1726)) %>%
+  #   mapview::mapview(fema_sub[1, ], col.regions = "red") +
+  #       mapview::mapview(sf::st_buffer(fema_sub[2, ], 500), col.regions = "green")
+  # fema_no_holes <- nngeo::st_remove_holes(fema)
+  # fema_no_holes_union <- sf::st_union(fema_no_holes)
+  # 
+  # touching_list = sf::st_touches(fema_no_holes)
+  # mapview::npts(fema)
+  # mapview::npts(fema_no_holes)
+  # mapview::npts(fema_no_holes_union)
+  # fema_no_holes %>% 
+  #   dplyr::mutate(new_fema_id = 1:dplyr::n()) %>% 
+  #   dplyr::group_by(new_fema_id)
+  # fema_no_holes_union
   # fema$fema_id %>% unique() %>% length()
   
   # # union then explode FEMA polygons 
@@ -106,7 +137,49 @@ path_df
         return(geos_ls)                                              
         
         
-      }
+  }
+  
+  #' Check if an updated transect line is valid relative to the other transects and flowlines in the network
+  #' The 'transect_to_check' should be 'used' (i.e. function returns TRUE) if 
+  #' the 'transect_to_check' does NOT interesect any other transects ('transect_lines') AND it only intersects a single flowline ONCE.
+  #' If the 'transect_to_check' intersects ANY other transects OR intersects a flowline more
+  #' than once (OR more than one flowline in the network) then the function returns FALSE.
+  #' @param transect_to_check geos_geometry, linestring
+  #' @param transect_lines geos_geometry, linestring
+  #' @param flowlines geos_geometry, linestring
+  #'
+  #' @return TRUE if the extension should be used, FALSE if it shouldn't be used
+  #' @importFrom geos geos_intersection geos_type
+  is_valid_transect_line <- function(transect_to_check, transect_lines, flowlines) {
+    
+    # ###   ##   ##   ##   ##   ##   ##   ##   ##   ##  
+    # extension_line <- left_extended_trans
+    # transect_lines <- transect_geoms
+    # flowlines <- flines_geos
+    # ###   ##   ##   ##   ##   ##   ##   ##   ##   ##  
+    
+    # Define conditions to decide which version of the transect to use
+    
+    # 1. Use transect with extension in BOTH directions
+    # 2. Use transect with LEFT extension only
+    # 3. Use transect with RIGHT extension only
+    
+    # Check that the extended transect lines only intersect a single flowline in the network only ONCE
+    intersects_with_flowlines <- geos::geos_intersection(
+      transect_to_check,
+      flowlines
+    )
+    intersects_flowline_only_once <- sum(geos::geos_type(intersects_with_flowlines) == "point") == 1 && 
+      sum(geos::geos_type(intersects_with_flowlines) == "multipoint") == 0 
+    
+    # check that the extended transect line does NOT intersect other transect lines (other than SELF)
+    intersects_other_transects <- sum(geos::geos_intersects(transect_to_check, transect_lines)) > 1
+    
+    # TRUE == Only one flowline is intersected a single time AND no other transect lines are intersected
+    use_transect <- intersects_flowline_only_once  && !intersects_other_transects
+    
+    return(use_transect)
+  }
   
   # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons")
   # internal function for extending transect lines out to FEMA 100 year flood plain polygons
@@ -163,7 +236,7 @@ path_df
       # dplyr::select(fema_id, geom = x) %>% 
       sf::st_cast("MULTILINESTRING") %>% 
       geos::as_geos_geometry() %>% 
-      geos::geos_simplify_preserve_topology(20)
+      geos::geos_simplify_preserve_topology(25)
     
     # mapview::npts(sf::st_as_sf(intersect_lines))
 
@@ -433,6 +506,10 @@ fema_uids
     right_extended_flag  <- rep(FALSE, length(transect_hy_id_array))
     both_extended_flag   <- rep(FALSE, length(transect_hy_id_array))
     
+    
+    updated_left_distances    <- rep(0, length(transect_hy_id_array))   
+    updated_right_distances   <- rep(0, length(transect_hy_id_array))   
+    
     # new_transects <- geos::geos_empty()
     # # measures  <- vctrs::vec_c()
     # transects_with_distances[1:20, ]
@@ -442,12 +519,14 @@ fema_uids
     total <- length(transect_hy_id_array)
 
     # output a message every ~10% intervals
-    message_interval <- total %/% 5
+    message_interval <- total %/% 20
     number_of_skips = 0
     
     for (i in seq_along(transect_hy_id_array)) {
       # message("i: ", i)
+      # i = 13
       # if(i > 2000) {
+      #   message("-----> STOP BECAUSE at i", i)
       #   break
       # }
       # i = 1
@@ -490,108 +569,155 @@ fema_uids
         next
       }
       
-      # transects_with_distances %>% 
-      #   dplyr::filter(hy_id == "wb-1003839")
-      # transects
-#      curr_fema_index <- 
-#        left_trans %>% 
-#        dplyr::filter(hy_id == current_hy_id, cs_id == current_cs_id) %>% 
-#        .$left_fema_index %>% .[[1]]
-      
       # message("Extending transect line left and right")
       # extend the lines
-      left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, left_distance_to_extend, "head")
-      right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, right_distance_to_extend, "tail")
-      
-      # neighbor_transects <- 
-      # message("Checking left and right intersections with flowline...")
+      left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, 
+                                                              left_distance_to_extend, "head")
+      right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, 
+                                                              right_distance_to_extend, "tail")
       
-      # Check that the extended transect lines only intersect the current flowline once
-      left_intersects_fline <- geos::geos_intersection(
-        left_extended_trans,
-        # current_fline
-        flines_geos
-      )
-      
-      right_intersects_fline <- geos::geos_intersection(
-        right_extended_trans,
-        # current_fline
-        flines_geos
-      )
+      # initial check to make sure the extended versions of the transects are valid
+      use_left_extension  <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos)
+      use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos)
+      # use_both_extensions <- use_left_extension && use_right_extension
       
-      # sum(geos::geos_type(left_intersects_fline) == "point")
-      # sum(geos::geos_type(right_intersects_fline) == "point")
+      used_half_of_left  <- FALSE
+      used_half_of_right <- FALSE
       
+      # TODO: Probably should precompute this division BEFORE the loop...
+      half_left_distance   <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0)
+      half_right_distance  <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0)
       
-      # which(geos::geos_type(left_intersects_fline) == "point")
+      # if we CAN'T use the original LEFT extension distance, 
+      # we try HALF the distance (or some distane less than we extended by in the first place)
+      if (!use_left_extension) {
+        
+        # half_left_distance   <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0)
+        left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, 
+                                                                half_left_distance, "head")
+        use_left_extension  <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos)
+        
+        used_half_of_left <- ifelse(use_left_extension, TRUE,  FALSE)
+      }
       
-      # transects %>% 
-      #   dplyr::filter(hy_id == current_hy_id) %>% 
-      #   sf::st_length()
-      # current_hy_id
+      # if we CAN'T use the original RIGHT extension distance, 
+      # we try HALF the distance (or some distance less than we extended by in the first place)
+      if (!use_right_extension) {
+        
+        # half_right_distance  <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0)
+        right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, 
+                                                                half_right_distance, "tail")
+        use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos)
+        
+        used_half_of_right  <- ifelse(use_right_extension, TRUE,  FALSE)
+        
+        # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + 
+        #   mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + 
+        #   mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") +
+        #   mapview::mapview(sf::st_as_sf(left_extended_trans2), color = "dodgerblue") + 
+        #   mapview::mapview(sf::st_as_sf(right_extended_trans2), color = "dodgerblue") 
+        
+      }
+
+      use_both_extensions <- use_left_extension && use_right_extension
+
+      # # message("Checking left and right intersections with flowline...")
+      # # ---------------------------------------------------------------------------------
+      # # TODO: UNCOMMENT BELOW ---> this was my original method 
+      # # ---------------------------------------------------------------------------------
+      # # Check that the extended transect lines only intersect the current flowline once
+      # left_intersects_fline <- geos::geos_intersection(
+      #   left_extended_trans,
+      #   # current_fline
+      #   flines_geos
+      # )
       # 
-      # sf::st_as_sf(current_trans) %>% 
-      #   sf::st_length()
-      # tmp <- sf::read_sf(transect_path)
+      # right_intersects_fline <- geos::geos_intersection(
+      #   right_extended_trans,
+      #   # current_fline
+      #   flines_geos
+      # )
       # 
-      # tmp %>% 
-      #   dplyr::filter(hy_id == current_hy_id) %>% 
-      #   sf::st_length()
-      
-      # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) +
-      #   mapview::mapview(sf::st_as_sf(current_trans), color = "red") +
-      # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + 
-      # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green")
-      # Define conditions to decide which version of the transect to use
-      
-      # 1. Use transect with extension in BOTH directions
-      # 2. Use transect with LEFT extension only
-      # 3. Use transect with RIGHT extension only
-      
-      # left_intersects_fline_once  <- geos::geos_type(left_intersects_fline) == "point"
-      # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point"
-      left_intersects_fline_once  <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
-      right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 && sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 
-      
-      # sum(geos::geos_type(left_intersects_fline) == "point") == 1
-      # sum(geos::geos_type(right_intersects_fline) == "point") == 1
-      # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
-      
-      
-      # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) 
-      # left_does_not_intersect_other_transects  <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
-      # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
       # 
-      # use_left_extension  <- left_intersects_fline_once && left_does_not_intersect_other_transects
-      # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects
+      # # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) +
+      # #   mapview::mapview(sf::st_as_sf(current_trans), color = "red") +
+      # # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + 
+      # # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green")
+      # 
+      # # Define conditions to decide which version of the transect to use
+      # 
+      # # 1. Use transect with extension in BOTH directions
+      # # 2. Use transect with LEFT extension only
+      # # 3. Use transect with RIGHT extension only
+      # 
+      # # left_intersects_fline_once  <- geos::geos_type(left_intersects_fline) == "point"
+      # # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point"
+      # left_intersects_fline_once  <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && 
+      #                                sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
+      # 
+      # right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 &&
+      #                                sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 
+      # 
+      # # sum(geos::geos_type(left_intersects_fline) == "point") == 1
+      # # sum(geos::geos_type(right_intersects_fline) == "point") == 1
+      # # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
+      # 
+      # 
+      # 
+      # # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) 
+      # # left_does_not_intersect_other_transects  <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
+      # # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
+      # # 
+      # # use_left_extension  <- left_intersects_fline_once && left_does_not_intersect_other_transects
+      # # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects
+      # # use_both_extensions <- use_left_extension && use_right_extension
+      # 
+      # 
+      # # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read
+      # left_intersects_other_transects  <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
+      # right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
+      # 
+      # # # make sure the extended transects don't hit any of the newly extended transects
+      # # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect
+      # # left_intersects_new_transects  <- any(geos::geos_intersects(left_extended_trans, new_transects))
+      # # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects))
+      # 
+      # # make TRUE/FALSE flags stating which transect should we use
+      # # - BOTH extensions
+      # # - LEFT ONLY extensions
+      # # - RIGHT only extensions
+      # use_left_extension  <- left_intersects_fline_once && !left_intersects_other_transects
+      # use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects
       # use_both_extensions <- use_left_extension && use_right_extension
-      
-      
-      # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read
-      left_intersects_other_transects  <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
-      right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
-      
-      # # make sure the extended transects don't hit any of the newly extended transects
-      # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect
-      # left_intersects_new_transects  <- any(geos::geos_intersects(left_extended_trans, new_transects))
-      # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects))
-      
-      # make TRUE/FALSE flags stating which transect should we use
-      # - BOTH extensions
-      # - LEFT ONLY extensions
-      # - RIGHT only extensions
-      use_left_extension  <- left_intersects_fline_once && !left_intersects_other_transects
-      use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects
-      use_both_extensions <- use_left_extension && use_right_extension
-      
-      # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans)
-      # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING"))
-      # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") +
-      #   mapview::mapview(sf::st_as_sf(left_start), col.region = "red") +
-      #   mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + 
-      #   mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") +
-      #   mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue")
-      
+      # 
+      # new_use_left_extension  <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos)
+      # new_use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos)
+      # new_use_both_extensions <- new_use_left_extension && new_use_right_extension
+      # 
+      # message("--------------------------------------------")
+      # message("Left intersects FLINE ONCE: ", left_intersects_fline_once)
+      # message("Right intersects FLINE ONCE: ", right_intersects_fline_once)
+      # message()
+      # message("Left intersects OTHER TRANSECTS: ", left_intersects_other_transects)
+      # message("Right intersects OTHER TRANSECTS: ", right_intersects_other_transects)
+      # message()
+      # message("Use LEFT extension intersects: ", use_left_extension)
+      # message("Use RIGHT extension intersects: ", use_right_extension)
+      # message("Use BOTH extension intersects: ", use_both_extensions)
+      # message()
+      # message("--------------------------------------------")
+      # message()
+      # # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans)
+      # # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING"))
+      # # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") +
+      # #   mapview::mapview(sf::st_as_sf(left_start), col.region = "red") +
+      # #   mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + 
+      # #   mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") +
+      # #   mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue")
+      # # ---------------------------------------------------------------------------------
+      # # TODO: UNCOMMENT ABOVE ---> this was my original method 
+      # # ---------------------------------------------------------------------------------
+      # 
       # if(use_both_extensions) {
       
       # Get the start and end of both extended tranects
@@ -635,22 +761,46 @@ fema_uids
         line_crs      <- wk::wk_crs(current_trans)
         updated_trans <- make_line_from_start_and_end_pts(start, end, line_crs)
         
-      # }
+      #   touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ]
+      #   mapview::mapview(touched_flines, color = "dodgerblue") + 
+      #   mapview::mapview(sf::st_as_sf(current_trans), color = "red") +
+      #     mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") +
+      #     mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") +
+      #     mapview::mapview(sf::st_as_sf(updated_trans), color = "yellow")
+      # nrow(flines)
+      # touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ]
+      # flines[lengths(right_intersects_fline) == 0, ]
+      # length(right_intersects_fline)
+        # }
         
+        # ---------------------------------------------------
+        # TODO: UNCOMMENT BELOW
+        # ---------------------------------------------------
         if(use_left_extension) {
           left_extended_flag[i]  <- TRUE
         }
-        
+
         if(use_right_extension) {
           right_extended_flag[i] <- TRUE
         }
-        
+
         if(use_both_extensions) {
           both_extended_flag[i] <- TRUE
         }
         
+        if(used_half_of_left) {
+          updated_left_distances[i]  <- half_left_distance 
+        }
+        if(used_half_of_right) {
+          updated_right_distances[i] <- half_right_distance 
+        }
+        
         # new_transects[i] <- updated_trans
         transect_geoms[i] <- updated_trans
+
+        # ---------------------------------------------------
+        # TODO: UNCOMMENT ABOVE
+        # ---------------------------------------------------
         
         # start %>% class()
       
@@ -717,7 +867,7 @@ fema_uids
     # ----------------------------------------------------------------
     extended_for_map <- 
       any_extended %>% 
-      dplyr::slice(1:5000)
+      dplyr::slice(1:1000)
     
     og_transects_for_map <- 
       transects %>% 
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index ba74369..52653d3 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -342,8 +342,8 @@ DELETE_STAGING_GPKGS <- FALSE
 
 # FEMA_VPU_SUBFOLDERS
 
-# for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
-for (i in 1:4) {
+for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+# for (i in 1:4) {
   vpu_dir = FEMA_VPU_SUBFOLDERS[i]
   message("Merging files in '", basename(vpu_dir), "' directory...")
   
@@ -407,15 +407,19 @@ for (i in 1:4) {
 MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged")
 
 for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+  # i = 8
+  # i
   vpu_dir = FEMA_VPU_SUBFOLDERS[i]
   
   VPU        <- basename(vpu_dir)
   
   message("Attempting to union FEMA polygons for '", VPU, "'...")
+  
   merged_dir <- paste0(vpu_dir, "/merged")
   fema_vpu_file <- list.files(merged_dir, full.names = TRUE)
   
   has_fema_vpu_file <- ifelse(length(fema_vpu_file) > 0, TRUE, FALSE)
+  # has_fema_vpu_file
   # message()
   # fema_vpu_file
 # }
@@ -430,34 +434,76 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)]
 
   fema_vpu <- sf::read_sf(fema_vpu_file)
-
+  
+  # fema_vpu
+  
   # fema_ids <- c(695)
   # fema_vpu <-
   #   fema_vpu %>%
   #   dplyr::group_by(source) %>%
   #   dplyr::summarise()  %>%
   #   dplyr::ungroup()
+  # fema_vpu
+  
+  # fema_snapped <- sf::st_snap(fema_vpu, fema_vpu, tolerance = 10)
+  # sf::st_
+  # message("Removing holes before dissolve...")
+  fema_vpu <- nngeo::st_remove_holes(fema_vpu)
+  # 
+  # message("Making valid geometries...")
+  # fema_vpu <- sf::st_make_valid(fema_vpu) 
+  
+  # fema_vpu <- 
+  #   fema_vpu %>% 
+  #   sf::st_cast("MULTIPOLYGON")
+  
+  message("Dissolving...")
   
   # 2633 = old number of polygons
-  fema_vpu  <- rmapshaper::ms_dissolve(fema_vpu,
-                                       field = "source",
-                                       sys = TRUE,
-                                       sys_mem = 16
-                                       )
-  fema_vpu  <- rmapshaper::ms_explode(fema_vpu,     
-                                      sys = TRUE,
-                                      sys_mem = 16)
+  fema_vpu <- rmapshaper::ms_dissolve(
+                            input   = fema_vpu,
+                            field   = "source",
+                            sys     = TRUE,
+                            sys_mem = 16
+                            )
+  
+  message("Exploding...")
+  # mapview::npts(fema_vpu)
+  # mapview::npts(fema_vpu_dissolve)
+  fema_vpu <- rmapshaper::ms_explode(
+                            input   = fema_vpu,     
+                            sys     = TRUE,
+                            sys_mem = 16
+                            )
+  # mapview::npts(fema_exp)
+  message("Removing holes after explosion...")
+  fema_vpu <- nngeo::st_remove_holes(fema_vpu)
+  # mapview::npts(fema_exp_noholes)
+  
+  # slice_subset = 1:50
+  # fema_exp_noholes[slice_subset, ]
+  # mapview::mapview(  fema_vpu[1:100, ], col.regions = "dodgerblue")+ 
+  # mapview::mapview(  fema_exp[slice_subset, ], col.regions = "red") +
+  #   mapview::mapview(  fema_exp_noholes[slice_subset, ], col.regions = "green")
+  # fema_vpu <- rmapshaper::ms_dissolve(fema_vpu,
+  #                                    field = "source",
+  #                                    sys = TRUE,
+  #                                    sys_mem = 16
+  # # )
+  # fema_vpu  <- rmapshaper::ms_explode(fema_vpu,     
+  #                                     sys = TRUE,
+  #                                     sys_mem = 16)
 
   fema_vpu <-
     fema_vpu %>%
-    dplyr::group_by(source) %>%
+    # dplyr::group_by(source) %>%
     dplyr::mutate(
       state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
       vpu      = gsub("VPU_", "", VPU),
-      fema_id  = paste0(state, "_", 1:dplyr::n())
+      fema_id  = 1:dplyr::n()
       ) %>%
     dplyr::ungroup() %>%
-    dplyr::relocate(vpu, fema_id, source, state, geom)
+    dplyr::select(vpu, fema_id, state, geom = geometry)
   
   if (OVERWRITE_FEMA_FILES) {
     union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)

From 568925b85558ff349ff821283b4ac02da01d56d2 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 21 May 2024 21:18:20 -0700
Subject: [PATCH 35/64] slowly putting all the code into individual functions
 for extending transects via fema floodplain polygons

---
 runners/cs_runner/add_fema_to_transects.R | 1164 +++++++++++----------
 1 file changed, 585 insertions(+), 579 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 61a599e..d23b67e 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -67,10 +67,14 @@ path_df
   
   # read in nextgen flowlines data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-  # library(nngeo)
-  #   
-  # fema
+  system.time({
+   extended_transects <- get_transect_extension_distances_to_polygons(transects,
+                                               fema,
+                                               flines,
+                                               3000)
   # 
+  })
+   # library(nngeo)
   # fema %>% 
   #   dplyr::group_by(fema_id) %>% 
   #   dplyr::mutate(
@@ -81,7 +85,6 @@ path_df
   # fema_sub <-   
   #   fema %>% 
   #   dplyr::filter(fema_id %in% c(1726))
-  # 
   # # fema %>% 
   #   # dplyr::filter(fema_id %in% c(1726)) %>%
   #   mapview::mapview(fema_sub[1, ], col.regions = "red") +
@@ -180,7 +183,7 @@ path_df
     
     return(use_transect)
   }
-  
+
   # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons")
   # internal function for extending transect lines out to FEMA 100 year flood plain polygons
   # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons)
@@ -193,19 +196,19 @@ path_df
     
     ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
-    transect_lines <- transects
-    polygons <- fema
-    # # flines <- flines
-    # max_extension_distance <- 3000
-    max_extension_distance = 3500
+    # transect_lines <- transects
+    # polygons <- fema
+    # # # flines <- flines
+    # # max_extension_distance <- 3000
+    # max_extension_distance = 3500
     # ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
     
     # keep 10% of the original points for speed
     polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10)
     
-    mapview::npts(fema)
-    mapview::npts(polygons)
+    # mapview::npts(fema)
+    # mapview::npts(polygons)
     # transects <- sf::read_sf(transect_path)
     
      # polygons
@@ -338,10 +341,10 @@ path_df
     
     # max_extension_distance = 3000
     # which(transects_with_distances$hy_id == "wb-1003839")
-    
-    left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
-    right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
-    
+    # 
+    # left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
+    # right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
+    # 
     # which(right_trans$hy_id == "wb-1003839")
     
     left_distances <- calc_extension_distances(
@@ -365,9 +368,9 @@ path_df
     left_trans$left_distance    <- left_distances
     right_trans$right_distance  <- right_distances
     
-    extensions_by_id %>% 
-      dplyr::filter(hy_id == "wb-1003839")
-    
+    # extensions_by_id %>% 
+    #   dplyr::filter(hy_id == "wb-1003839")
+    # 
     # distance to extend LEFT and/or RIGHT for each hy_id/cs_id
     extensions_by_id <- dplyr::left_join(
                           sf::st_drop_geometry(
@@ -382,20 +385,20 @@ path_df
                           by = c("hy_id", "cs_id")
                           )
     
-     ######### ######## ######## ######## ####### 
-    # TODO: This is temporary !!!!!!
-    fema_index_df <- dplyr::left_join(
-      sf::st_drop_geometry(
-        dplyr::select(left_trans, 
-                      hy_id, cs_id, left_distance, left_fema_index)
-      ),
-      sf::st_drop_geometry(
-        dplyr::select(right_trans, 
-                      hy_id, cs_id, 
-                      right_distance, right_fema_index)
-      ),
-      by = c("hy_id", "cs_id")
-    )
+    #  ######### ######## ######## ######## ####### 
+    # # TODO: This is temporary !!!!!!
+    # fema_index_df <- dplyr::left_join(
+    #   sf::st_drop_geometry(
+    #     dplyr::select(left_trans, 
+    #                   hy_id, cs_id, left_distance, left_fema_index)
+    #   ),
+    #   sf::st_drop_geometry(
+    #     dplyr::select(right_trans, 
+    #                   hy_id, cs_id, 
+    #                   right_distance, right_fema_index)
+    #   ),
+    #   by = c("hy_id", "cs_id")
+    # )
     ######## ######## ######## ######## ########
     
     # foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
@@ -422,7 +425,7 @@ path_df
     
     # TODO: Add left/right extension distancces to transect data
     # TODO: this can ultimately just be the "transects" variable, dont need to make new "transects_with_distances" variable
-    transects_with_distances <- 
+    transects <- 
       transects %>% 
       dplyr::left_join(
                 extensions_by_id,
@@ -448,9 +451,9 @@ path_df
       ) %>% 
       hydrofabric3D::add_tmp_id()
     
-    transects_with_distances %>% 
-      dplyr::filter(hy_id == "wb-1003839")
-    # transects
+    # transects_with_distances %>% 
+    #   dplyr::filter(hy_id == "wb-1003839")
+    # # transects
     # flines %>% 
     #   dplyr::arrange(-tot_drainage_areasqkm) 
     # flines %>% 
@@ -459,31 +462,31 @@ path_df
     #   # dplyr::filter(hydroseq == max(hydroseq)) %>% 
     #   mapview::mapview()
     
-    # extensions_by_id
-    transects_with_distances %>% 
-      dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id)
-    
-    ######## ######## ######## ######## ######## ######## ######## ######## 
-    left_extended_flag[1:20]
-    right_extended_flag[1:20]
-    both_extended_flag[1:20]
-    
-    range_vect <- 1:500
-    
-    fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index),     unlist(fema_index_df[range_vect, ]$right_fema_index)))
-    fema_uids <- fema_uids[!is.na(fema_uids)]
-fema_uids
-    foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
-      fema_id = fema_uids
-    )
-    # ooi <- sf::st_as_sf()
-    # toi <- sf::st_as_sf(new_transects[1:20])
-     toi <- sf::st_as_sf(transect_geoms[range_vect])
-    toi
-    og_trans <- transects_with_distances[range_vect, ]
-    mapview::mapview(foi, col.regions = "dodgerblue") +
-      mapview::mapview(toi, color = "red") +
-      mapview::mapview(og_trans, color = "green")
+    # # extensions_by_id
+    # transects_with_distances %>% 
+    #   dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id)
+    
+#     ######## ######## ######## ######## ######## ######## ######## ######## 
+#     left_extended_flag[1:20]
+#     right_extended_flag[1:20]
+#     both_extended_flag[1:20]
+#     
+#     range_vect <- 1:500
+#     
+#     fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index),     unlist(fema_index_df[range_vect, ]$right_fema_index)))
+#     fema_uids <- fema_uids[!is.na(fema_uids)]
+# fema_uids
+#     foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
+#       fema_id = fema_uids
+#     )
+#     # ooi <- sf::st_as_sf()
+#     # toi <- sf::st_as_sf(new_transects[1:20])
+#      toi <- sf::st_as_sf(transect_geoms[range_vect])
+#     toi
+#     og_trans <- transects_with_distances[range_vect, ]
+#     mapview::mapview(foi, col.regions = "dodgerblue") +
+#       mapview::mapview(toi, color = "red") +
+#       mapview::mapview(og_trans, color = "green")
     ######## ######## ### ##### ######## ######## ######## ######## ######## 
     
     fline_id_array       <- flines$id
@@ -491,13 +494,13 @@ fema_uids
     # Convert the net object into a geos_geometry
     flines_geos          <- geos::as_geos_geometry(flines)
     
-    transect_hy_id_array <- transects_with_distances$hy_id
-    transect_cs_id_array <- transects_with_distances$cs_id
+    transect_hy_id_array <- transects$hy_id
+    transect_cs_id_array <- transects$cs_id
     
-    transect_geoms       <- geos::as_geos_geometry(transects_with_distances$geom)
+    transect_geoms       <- geos::as_geos_geometry(transects$geom)
    
-    left_distances       <- transects_with_distances$left_distance
-    right_distances      <- transects_with_distances$right_distance
+    left_distances       <- transects$left_distance
+    right_distances      <- transects$right_distance
     
     # preallocate vector that stores the extension. distances
     new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_hy_id_array)))
@@ -807,7 +810,7 @@ fema_uids
      
     }
     
-    transects2 <- transects 
+    # transects2 <- transects 
       # dplyr::mutate(
       #   new_cs_lengthm = as.numeric(sf::st_length(geom))
       # ) %>% 
@@ -815,10 +818,10 @@ fema_uids
     
     
     # Update the "transects_to_extend" with new geos geometries ("geos_list")
-    sf::st_geometry(transects2) <- sf::st_geometry(sf::st_as_sf(transect_geoms))
+    sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms))
     
-    transects2 <- 
-      transects2 %>% 
+    transects <- 
+      transects %>% 
       dplyr::mutate(
         new_cs_lengthm = as.numeric(sf::st_length(geom))
       ) %>% 
@@ -830,138 +833,141 @@ fema_uids
     #   )
     # 
     
-    transects2$left_is_extended  <- left_extended_flag
-    transects2$right_is_extended <- right_extended_flag
-    
-    transects2 %>% 
-      dplyr::filter(left_is_extended, right_is_extended)
-    
-    any_extended <- 
-      transects2 %>% 
-      dplyr::filter(left_is_extended | right_is_extended)
-    
-    any_flines <- 
-      flines %>% 
-      dplyr::filter(id %in% any_extended$hy_id)
-    
-    # left_only_extended <- 
-    #   transects2 %>% 
-    #   dplyr::filter(left_is_extended, !right_is_extended)
-    # 
-    # left_only_flines <- 
-    #   flines %>% 
-    #   dplyr::filter(id %in% left_only_extended$hy_id)
-    # 
-    # right_only_extended <- 
-    #   transects2 %>% 
-    #   dplyr::filter(!left_is_extended, right_is_extended)
-    # 
-    # right_only_flines <- 
-    #   flines %>% 
-    #   dplyr::filter(id %in% right_only_extended$hy_id)
-  
-    # left_fema_polygons <- 
-      # left_trans %>% 
-    # ----------------------------------------------------------------
-    # ------- Subset data for mapping ----------- 
-    # ----------------------------------------------------------------
-    extended_for_map <- 
-      any_extended %>% 
-      dplyr::slice(1:1000)
-    
-    og_transects_for_map <- 
-      transects %>% 
-      hydrofabric3D::add_tmp_id() %>% 
-      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
-    
-    fema_indexes_in_aoi <-
-      dplyr::bind_rows(
-        sf::st_drop_geometry(
-          dplyr::rename(left_trans, fema_index = left_fema_index)
-        ),
-        sf::st_drop_geometry(     
-          dplyr::rename(right_trans, 
-                        fema_index = right_fema_index)
-        )
-      ) %>% 
-      hydrofabric3D::add_tmp_id() %>% 
-      dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% 
-      # dplyr::filter(
-      #   # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id |
-      #   # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id
-      #  
-      #   tmp_id %in%  unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id)
-      #   
-      #   ) %>% 
-      # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% 
-      # dplyr::slice(1:200) %>%
-      .$fema_index %>% 
-      unlist() %>% 
-      na.omit() %>% 
-      unique() %>% 
-      sort()
-      # length()
-    
-    sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi])
-
-    # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id
-    # transects_with_distances
-    # %>% 
-    mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]),  col.regions = "lightblue") + 
-      mapview::mapview(any_flines, color = "dodgerblue") + 
-      mapview::mapview(og_transects_for_map, color = "green") + 
-      mapview::mapview(extended_for_map, color = "red") 
-
-      # mapview::mapview(left_only_flines, color = "dodgerblue") + 
-      #     mapview::mapview(right_only_flines, color = "dodgerblue") + 
-    # mapview::mapview(left_only_extended, color = "red") + 
-    #      mapview::mapview(right_only_extended, color = "green")
-    # transects$cs_lengthm  <- length_list
-    # ----------------------------------------------------------------
-    # ----------------------------------------------------------------
-    # ----------------------------------------------------------------  
-  
-      # make the new transect line from the start and points 
-      final_line <- geos::geos_make_linestring(x = c(X_start, X_end),
-                                               y = c(Y_start, Y_end), 
-                                               crs = wk::wk_crs(current_trans)
-                                               )
-      geos::geos_make_collection(start, type_id = "LINESTRING")
-      
-      
-      left_start <- geos::geos_point_start(left_extended_trans)
-      right_end  <- geos::geos_point_end(right_extended_trans)
-      
-      fline_intersects
-      geos::geos_type(fline_intersects) == "point"
-      
-      geos::geos_intersects(current_trans, )
-      
-      # if(
-      #   geos::geos_type(fline_intersects)
-      #   
-      #   
-      # )
-      mapview::mapview(sf::st_as_sf(trans), color = "green") +
-       mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red")     +
-        mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red")     +
-        mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")  +  
-      mapview::mapview(      sf::st_as_sf(final_line), color = "yellow")   
-      
-      
-      
-      
-        
-      
-    }
-    
-    
-    left_trans$left_extension_distance
+    transects$left_is_extended  <- left_extended_flag
+    transects$right_is_extended <- right_extended_flag
     
-    length(right_distances)
+    return(transects)
     
-    
-    # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within 
+  }    
+    # transects2 %>% 
+      # dplyr::filter(left_is_extended, right_is_extended)
+#     
+#     any_extended <- 
+#       transects2 %>% 
+#       dplyr::filter(left_is_extended | right_is_extended)
+#     
+#     any_flines <- 
+#       flines %>% 
+#       dplyr::filter(id %in% any_extended$hy_id)
+#     
+#     # left_only_extended <- 
+#     #   transects2 %>% 
+#     #   dplyr::filter(left_is_extended, !right_is_extended)
+#     # 
+#     # left_only_flines <- 
+#     #   flines %>% 
+#     #   dplyr::filter(id %in% left_only_extended$hy_id)
+#     # 
+#     # right_only_extended <- 
+#     #   transects2 %>% 
+#     #   dplyr::filter(!left_is_extended, right_is_extended)
+#     # 
+#     # right_only_flines <- 
+#     #   flines %>% 
+#     #   dplyr::filter(id %in% right_only_extended$hy_id)
+#   
+#     # left_fema_polygons <- 
+#       # left_trans %>% 
+#     # ----------------------------------------------------------------
+#     # ------- Subset data for mapping ----------- 
+#     # ----------------------------------------------------------------
+#     extended_for_map <- 
+#       any_extended %>% 
+#       dplyr::slice(1:1000)
+#     
+#     og_transects_for_map <- 
+#       transects %>% 
+#       hydrofabric3D::add_tmp_id() %>% 
+#       dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
+#     
+#     fema_indexes_in_aoi <-
+#       dplyr::bind_rows(
+#         sf::st_drop_geometry(
+#           dplyr::rename(left_trans, fema_index = left_fema_index)
+#         ),
+#         sf::st_drop_geometry(     
+#           dplyr::rename(right_trans, 
+#                         fema_index = right_fema_index)
+#         )
+#       ) %>% 
+#       hydrofabric3D::add_tmp_id() %>% 
+#       dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% 
+#       # dplyr::filter(
+#       #   # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id |
+#       #   # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id
+#       #  
+#       #   tmp_id %in%  unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id)
+#       #   
+#       #   ) %>% 
+#       # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% 
+#       # dplyr::slice(1:200) %>%
+#       .$fema_index %>% 
+#       unlist() %>% 
+#       na.omit() %>% 
+#       unique() %>% 
+#       sort()
+#       # length()
+#     
+#     sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi])
+# 
+#     # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id
+#     # transects_with_distances
+#     # %>% 
+#     mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]),  col.regions = "lightblue") + 
+#       mapview::mapview(any_flines, color = "dodgerblue") + 
+#       mapview::mapview(og_transects_for_map, color = "green") + 
+#       mapview::mapview(extended_for_map, color = "red") 
+# 
+#       # mapview::mapview(left_only_flines, color = "dodgerblue") + 
+#       #     mapview::mapview(right_only_flines, color = "dodgerblue") + 
+#     # mapview::mapview(left_only_extended, color = "red") + 
+#     #      mapview::mapview(right_only_extended, color = "green")
+#     # transects$cs_lengthm  <- length_list
+#     # ----------------------------------------------------------------
+#     # ----------------------------------------------------------------
+#     # ----------------------------------------------------------------  
+#   
+#       # make the new transect line from the start and points 
+#       final_line <- geos::geos_make_linestring(x = c(X_start, X_end),
+#                                                y = c(Y_start, Y_end), 
+#                                                crs = wk::wk_crs(current_trans)
+#                                                )
+#       geos::geos_make_collection(start, type_id = "LINESTRING")
+#       
+#       
+#       left_start <- geos::geos_point_start(left_extended_trans)
+#       right_end  <- geos::geos_point_end(right_extended_trans)
+#       
+#       fline_intersects
+#       geos::geos_type(fline_intersects) == "point"
+#       
+#       geos::geos_intersects(current_trans, )
+#       
+#       # if(
+#       #   geos::geos_type(fline_intersects)
+#       #   
+#       #   
+#       # )
+#       mapview::mapview(sf::st_as_sf(trans), color = "green") +
+#        mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red")     +
+#         mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red")     +
+#         mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")  +  
+#       mapview::mapview(      sf::st_as_sf(final_line), color = "yellow")   
+#       
+#       
+#       
+#       
+#         
+#       
+#     }
+#     
+#     
+#     left_trans$left_extension_distance
+#     
+#     length(right_distances)
+#     
+#     
+#     # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within 
     calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) {
       #####   #####   #####   #####   #####
       # geos_geoms   <- left_trans_geos
@@ -982,14 +988,14 @@ fema_uids
       # lines_to_cut_indices   = left_trans$left_fema_index
       # direction              = "head"
       # max_extension_distance = max_extension_distance
-      # 
-      
+      #
+
       #####   #####   #####   #####   #####
-      
+
       if (!direction %in% c("head", "tail")) {
           stop("Invalid 'direction' value, must be one of 'head' or 'tail'")
       }
-      
+
       # preallocate vector that stores the extension. distances
       extension_dists <- vctrs::vec_c(rep(0, length(ids)))
 
@@ -1001,400 +1007,400 @@ fema_uids
         polygon_index     <- lines_to_cut_indices[[i]]
         # any(is_within_polygon)
         message("Transect: '", curr_id, "' - (", i, ")")
-        
+
         if (is_within_polygon) {
           message("- Side of transect intersects with FEMA")
           message("\t > FEMA index: ", polygon_index)
-          
+
           curr_geom  <- geos_geoms[[i]]
           index_vect <- sort(unlist(polygon_index))
-          
+
            distance_to_extend <- hydrofabric3D:::geos_bs_distance(
             distances    = 1:max_extension_distance,
             line         = curr_geom,
             geoms_to_cut = lines_to_cut[index_vect],
             direction    = direction
           )
-          
+
           extension_dists[i] <- distance_to_extend
         }
-        
+
       }
-           
+
       return(extension_dists)
     }
-    
-    
-    # ----------------------------------------------------------------------------------------------------------------
-    # Loop over every left and right halfs of transects and 
-    # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary
-    # ----------------------------------------------------------------------------------------------------------------
-    
-    left_ids          <- left_trans$tmp_id
-    
-    left_fema_indexes <- left_trans$left_fema_index
-    left_fema_bool    <- left_trans$left_is_within_fema
-    
-    # preallocate vector that stores the extension. distances
-    left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids)))
-    
-    # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool))
-    # 1:length(left_ids)
-    extension_count = 0
-    
-    for(i in 1:length(left_ids)) {
-      # i = 1
-      tmp_id                 <- left_ids[i]
-      is_within_fema_polygon <- left_fema_bool[i]
-      fema_index             <- left_fema_indexes[i]
-  
-      message("Transect: '", tmp_id, "' - (", i, ")")
-      # if(is_within_fema_polygon) {
-      #   break
-      # }
-      # fema_index <- left_trans$left_fema_index[i]
-      # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE)
-      
-      if(is_within_fema_polygon) {
-        
-        message("- Left side of transect intersects with FEMA")
-        message("\t > FEMA index: ", fema_index)
-        extension_count = extension_count + 1
-        message("\t > extension_count: ", extension_count)
-        
-        trans_geom <- left_trans_geos[i]
-        index_vect <- sort(unlist(fema_index))
-        
-        # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect])
-        
-        # intersect_lines[index_vect]
-        
-        dist_to_fema <- hydrofabric3D:::geos_bs_distance(
-                            distances    = 1:max_extension_distance,
-                            line         = trans_geom,
-                            geoms_to_cut = intersect_lines[index_vect],
-                            direction    = "head"
-                          )
-        
-        left_extension_dists[i] <- dist_to_fema
-        
-      }
-      message()
-    }
-    
-    left_trans$left_extension_dist <- left_extension_dists
-  }
-  tmp <- left_trans[1:5, ]
-  tmp
-  tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist)
-  
-  tmp_extended
-  
-  fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index))
-  
-  mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") +  
-    mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green")  +
-    sf::st_as_sf(intersect_polygons[fema_idx])
-  
-  length(left_extension_dists)
-  
-  left_trans_geos
-  vctrs::vec_c(
-    vctrs::vec_c(
-      left_trans$hy_id
-  ),
-  vctrs::vec_cast(left_trans$cs_id)
-  )
-  
- #  
- #  # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ]
- # left_trans <- 
- #    left_trans %>%
- #    dplyr::mutate(
- #      fema_index = unlist(sf::st_within(., intersect_polygons))
- #    ) %>% 
- #    dplyr::relocate(fema_index)
- #  
-  # left_trans_geos <- geos::as_geos_geometry(left_trans)
-  
-  sort(na.omit(unlist(unique(left_trans$fema_index))))
-  fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
-  
-  fema_polygons %>% na.omit()
-  
-  # note: sorting the fema polygon indices (not sure if necessary)
-  left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))]
-  
-  left_fema
-  
-  left_fema %>% plot()
-  
-  geos::geos_make_linestring(geom = left_fema)
-  left_trans$fema_index
-  sort(na.omit(unlist(unique(left_trans$fema_index))))
-  
-  # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
-  left_fema_lines <- 
-    left_fema %>% 
-    sf::st_as_sf() %>% 
-    sf::st_cast("multilinestring") %>% 
-    geos::as_geos_geometry() %>% 
-    geos::geos_simplify_preserve_topology(25)
-  
-  left_fema %>% 
-    sf::st_as_sf() %>% 
-    mapview::npts()
-  
-  geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% 
-    geos::geos_num_coordinates() %>%
-    sum()
-  
-  geos::geos_num_coordinates(left_fema_lines) %>% sum()
-  
-  geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% 
-    .[3] %>% 
-    plot()
-  left_fema_lines[3] %>% plot()
-  
-  
-  left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-    
-    hydrofabric3d:::geos_bs_distance(
-      distances    = 1:2000,
-      line         = left_trans_geos[i],
-      geoms_to_cut = left_fema_lines,
-      direction    = "head"
-    )
-  }) %>% 
-    unlist()
-  
-  left_trans$left_head_extension_dist <- left_extension_dists
-  
-  # find the distances from the right side of transect lines 
-  right_trans <- 
-    segmented_trans %>% 
-    lwgeom::st_linesubstring(0.50, 1) %>% 
-    dplyr::mutate(
-      partition         = "right",
-      partition_lengthm = as.numeric(sf::st_length(geom))
-    ) %>% 
-    dplyr::select(hy_id, cs_source, cs_id, cs_measure,  
-                  cs_lengthm, is_extended, partition, partition_lengthm, geom)
-  right_trans_geos <- geos::as_geos_geometry(right_trans)
-  right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons))
- 
-  right_within_vect <- lapply(right_within_matrix, function(i) {
-                      if(length(i) > 0) { c(i) } else { c(na_real_) } }
-                      )
- right_trans$fema_index <- right_within_vect
-# 
-#  right_trans <- 
-#     right_trans %>%
+
+#     
+#     # ----------------------------------------------------------------------------------------------------------------
+#     # Loop over every left and right halfs of transects and 
+#     # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary
+#     # ----------------------------------------------------------------------------------------------------------------
+#     
+#     left_ids          <- left_trans$tmp_id
+#     
+#     left_fema_indexes <- left_trans$left_fema_index
+#     left_fema_bool    <- left_trans$left_is_within_fema
+#     
+#     # preallocate vector that stores the extension. distances
+#     left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids)))
+#     
+#     # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool))
+#     # 1:length(left_ids)
+#     extension_count = 0
+#     
+#     for(i in 1:length(left_ids)) {
+#       # i = 1
+#       tmp_id                 <- left_ids[i]
+#       is_within_fema_polygon <- left_fema_bool[i]
+#       fema_index             <- left_fema_indexes[i]
+#   
+#       message("Transect: '", tmp_id, "' - (", i, ")")
+#       # if(is_within_fema_polygon) {
+#       #   break
+#       # }
+#       # fema_index <- left_trans$left_fema_index[i]
+#       # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE)
+#       
+#       if(is_within_fema_polygon) {
+#         
+#         message("- Left side of transect intersects with FEMA")
+#         message("\t > FEMA index: ", fema_index)
+#         extension_count = extension_count + 1
+#         message("\t > extension_count: ", extension_count)
+#         
+#         trans_geom <- left_trans_geos[i]
+#         index_vect <- sort(unlist(fema_index))
+#         
+#         # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect])
+#         
+#         # intersect_lines[index_vect]
+#         
+#         dist_to_fema <- hydrofabric3D:::geos_bs_distance(
+#                             distances    = 1:max_extension_distance,
+#                             line         = trans_geom,
+#                             geoms_to_cut = intersect_lines[index_vect],
+#                             direction    = "head"
+#                           )
+#         
+#         left_extension_dists[i] <- dist_to_fema
+#         
+#       }
+#       message()
+#     }
+#     
+#     left_trans$left_extension_dist <- left_extension_dists
+#   }
+#   tmp <- left_trans[1:5, ]
+#   tmp
+#   tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist)
+#   
+#   tmp_extended
+#   
+#   fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index))
+#   
+#   mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") +  
+#     mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green")  +
+#     sf::st_as_sf(intersect_polygons[fema_idx])
+#   
+#   length(left_extension_dists)
+#   
+#   left_trans_geos
+#   vctrs::vec_c(
+#     vctrs::vec_c(
+#       left_trans$hy_id
+#   ),
+#   vctrs::vec_cast(left_trans$cs_id)
+#   )
+#   
+#  #  
+#  #  # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ]
+#  # left_trans <- 
+#  #    left_trans %>%
+#  #    dplyr::mutate(
+#  #      fema_index = unlist(sf::st_within(., intersect_polygons))
+#  #    ) %>% 
+#  #    dplyr::relocate(fema_index)
+#  #  
+#   # left_trans_geos <- geos::as_geos_geometry(left_trans)
+#   
+#   sort(na.omit(unlist(unique(left_trans$fema_index))))
+#   fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
+#   
+#   fema_polygons %>% na.omit()
+#   
+#   # note: sorting the fema polygon indices (not sure if necessary)
+#   left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))]
+#   
+#   left_fema
+#   
+#   left_fema %>% plot()
+#   
+#   geos::geos_make_linestring(geom = left_fema)
+#   left_trans$fema_index
+#   sort(na.omit(unlist(unique(left_trans$fema_index))))
+#   
+#   # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
+#   left_fema_lines <- 
+#     left_fema %>% 
+#     sf::st_as_sf() %>% 
+#     sf::st_cast("multilinestring") %>% 
+#     geos::as_geos_geometry() %>% 
+#     geos::geos_simplify_preserve_topology(25)
+#   
+#   left_fema %>% 
+#     sf::st_as_sf() %>% 
+#     mapview::npts()
+#   
+#   geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% 
+#     geos::geos_num_coordinates() %>%
+#     sum()
+#   
+#   geos::geos_num_coordinates(left_fema_lines) %>% sum()
+#   
+#   geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% 
+#     .[3] %>% 
+#     plot()
+#   left_fema_lines[3] %>% plot()
+#   
+#   
+#   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
+#     
+#     hydrofabric3d:::geos_bs_distance(
+#       distances    = 1:2000,
+#       line         = left_trans_geos[i],
+#       geoms_to_cut = left_fema_lines,
+#       direction    = "head"
+#     )
+#   }) %>% 
+#     unlist()
+#   
+#   left_trans$left_head_extension_dist <- left_extension_dists
+#   
+#   # find the distances from the right side of transect lines 
+#   right_trans <- 
+#     segmented_trans %>% 
+#     lwgeom::st_linesubstring(0.50, 1) %>% 
+#     dplyr::mutate(
+#       partition         = "right",
+#       partition_lengthm = as.numeric(sf::st_length(geom))
+#     ) %>% 
+#     dplyr::select(hy_id, cs_source, cs_id, cs_measure,  
+#                   cs_lengthm, is_extended, partition, partition_lengthm, geom)
+#   right_trans_geos <- geos::as_geos_geometry(right_trans)
+#   right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons))
+#  
+#   right_within_vect <- lapply(right_within_matrix, function(i) {
+#                       if(length(i) > 0) { c(i) } else { c(na_real_) } }
+#                       )
+#  right_trans$fema_index <- right_within_vect
+# # 
+# #  right_trans <- 
+# #     right_trans %>%
+# #     dplyr::mutate(
+# #       fema_index = unlist(sf::st_within(., fema_polygons))
+# #     ) %>% 
+# #     dplyr::relocate(fema_index)
+# #   
+#   
+#   right_fema <- fema_polygons[unique(right_trans$fema_index),] 
+#   right_fema_lines <- 
+#     right_fema %>% 
+#     sf::st_cast("linestring")
+#    
+#   right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) {
+#     
+#     hydrofabric3d:::geos_bs_distance(
+#       distances    = 1:2000,
+#       line         = right_trans_geos[i],
+#       geoms_to_cut = right_fema_lines,
+#       direction    = "tail"
+#     )
+#   }) %>% 
+#     unlist()
+#   
+#   right_trans$right_tail_extension_dists <- right_extension_dists
+#   right_trans$right_tail_extension_dists
+#   unlist(sf::st_within(left_trans, fema_polygons))
+#   
+#   left_trans <- 
+#     left_trans %>%
 #     dplyr::mutate(
 #       fema_index = unlist(sf::st_within(., fema_polygons))
 #     ) %>% 
-#     dplyr::relocate(fema_index)
+#   dplyr::relocate(fema_index)
+#   
+#   left_fema <- fema_polygons[unique(left_trans$fema_index),]
+#   left_fema_lines <- 
+#     left_fema %>% 
+#     sf::st_cast("linestring")
+#   
+#   geos::as_geos_geometry(left_fema)
+#   mapview::mapview(left_fema, col.regions = "dodgerblue") +
+#     mapview::mapview(left_ls, color = "yellow") +
+#     mapview::mapview(left_trans, color = "red") +
+#     mapview::mapview(right_trans, color = "green")
+#   
+#   left_trans_geos <- geos::as_geos_geometry(left_trans)
+#   left_trans_geos %>% plot()
+#   left_trans_geos[2]
+#   
+#   length(left_trans_geos)
+#   
+#   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
+#     
+#    hydrofabric3d:::geos_bs_distance(
+#       distances    = 1:2000,
+#       line         = left_trans_geos[i],
+#       geoms_to_cut = left_fema_lines,
+#       direction    = "head"
+#     )
+#   }) %>% 
+#     unlist()
+#   
+#   left_extensions  <- geos::geos_empty()
+#   
+#   for (i in 1:length(left_extension_dists)) {
+#     dist = left_extension_dists[i]
+#     geos_line <- left_trans_geos[i]
+#     message(glue::glue("i: {i}\ndist: {dist}"))
+#     
+#     extended <- hydrofabric3d::geos_extend_line(
+#       geos_line,
+#       dist,
+#       "head"
+#     )
+#     left_extensions <- vctrs::vec_c(left_extensions, extended)
+#     
+#   }
+#   # index for only valid transects
+#   # is_valid <- !geos::geos_is_empty(left_extensions)
+#   left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)]
+#   # !geos::geos_is_empty(left_extensions)
+#   
+#   new_left_trans <- 
+#     left_trans %>% 
+#     sf::st_drop_geometry() %>% 
+#     dplyr::mutate(
+#       geom = sf::st_as_sfc(left_extensions)
+#     ) %>% 
+#     sf::st_as_sf()
+#   # geos::sf
+#   mapview::mapview(left_fema, col.regions = "dodgerblue") +
+#     mapview::mapview(left_ls, color = "yellow") +
+#     mapview::mapview(left_trans, color = "red") +
+#     mapview::mapview(new_left_trans, color = "green")
+#   mapply(function(geom, dist) {
+#       hydrofabric3d::geos_extend_line(geom, dist, "head") 
+#     },
+#   left_trans_geos,
+#   left_extension_dists
+#   )
+#   
+#   left_extensions <- lapply(seq_along(left_trans_geos), function(i) {
+#       
+#     extend_dist <- hydrofabric3d:::geos_bs_distance(
+#         distances    = 1:2000,
+#         line         = left_trans_geos[i],
+#         geoms_to_cut = left_fema_lines,
+#         direction    = "head"
+#       )
+#       
+#     hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") 
+#     
+#     })
+#   # unlist(left_extensions)
+#   
+#   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
+#     hydrofabric3d:::geos_bs_distance(
+#       distances    = 1:2000,
+#       line         = left_trans_geos[i],
+#       geoms_to_cut = left_fema_lines,
+#       direction    = "head"
+#     )
+#   }) 
+#   
+#   distance_to_extend <- 
+#     hydrofabric3d:::geos_bs_distance(
+#       distances    = 1:1500,
+#       line         = left_trans_geos[1],
+#       geoms_to_cut = left_fema_lines,
+#       direction    = "head"
+#     )
+#   
+#   extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% 
+#     sf::st_as_sf()
+#   mapview::mapview(left_fema, col.regions = "dodgerblue") +
+#     mapview::mapview(left_ls, color = "yellow") +
+#     mapview::mapview(left_trans, color = "red") +
+#     mapview::mapview(extended, color = "green")
+#     # mapview::mapview(right_trans, color = "green")
+#   left_trans$geom %>% sf::st_length()
+#   plot(segmented_trans$geom, col = "red", lwd =5)
+#   plot(left_trans$geom, col = "green", lwd=3, add = true)
+#   plot(right_trans$geom, col = "blue", lwd=3, add = true)
+#   unlist(left_trans$geom)
+#    unlist(right_trans$geom)
+#   
+#   unlist(trans_fema$geom )
+#   unlist(split_trans$geom )
+#   split_trans %>% 
+#     sf::st_collection_extract("linestring")
+#   split_trans$geom
+#   
+#   tmp_trans$geom
+#   nngeo::st_segments(tmp_trans) %>% 
+#     .$result %>% 
+#     plot()
+#   
+#   mapview::mapview(tmp_trans) + tmp_centroid
+#   geos::geos_clip_by_rect()
+#   
+#   transects_with_
+#   
+#   lengths(transects_polygons_matrix)
+#   mapview::mapview(transects_with_fema, color = "green") + fema_with_transects
+#   unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]
+#   transects  %>% 
+#     hydrofabric3d::add_tmp_id(transects) %>% 
+#     .$tmp_id %>% 
+#     unique() %>% .[1:30]
+#   trans_subset <- 
+#     transects %>% 
+#     hydrofabric3d::add_tmp_id() %>% 
+#     dplyr::filter(tmp_id %in%  unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30])
+#   
+#   fema_subset <- 
+#     fema %>% 
+#     dplyr::filter(fema_id == "1268")
+# 
+#   extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(500, nrow(trans_subset)))
+#   extended
+#   clipped_trans <- rmapshaper::ms_clip(extended, fema)
+#   
+#   rmapshaper::ms_clip(extended, fema_subset)
+#   mapview::mapview(trans_subset, color = "red") + 
+#     mapview::mapview(extended, color = "yellow") +
+#     # mapview::mapview(  sf::st_difference(extended, fema_subset), color = "green") +
+#     mapview::mapview(clipped_trans,  color = "green") +
+#     fema 
+#   
+#    rep(50, nrow(trans_subset))
+#   extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(50, nrow(trans_subset)))
+#   
+#   
+#   hydrofabric3D::geos_extend_line(trans_subset, 50) %>% 
+#     sf::st_as_sf() %>% mapview::mapview()
+#   
+#   
+#   
+#   
+#   
+#   
+#   
+#   
+#   
+#   
 #   
-  
-  right_fema <- fema_polygons[unique(right_trans$fema_index),] 
-  right_fema_lines <- 
-    right_fema %>% 
-    sf::st_cast("linestring")
-   
-  right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) {
-    
-    hydrofabric3d:::geos_bs_distance(
-      distances    = 1:2000,
-      line         = right_trans_geos[i],
-      geoms_to_cut = right_fema_lines,
-      direction    = "tail"
-    )
-  }) %>% 
-    unlist()
-  
-  right_trans$right_tail_extension_dists <- right_extension_dists
-  right_trans$right_tail_extension_dists
-  unlist(sf::st_within(left_trans, fema_polygons))
-  
-  left_trans <- 
-    left_trans %>%
-    dplyr::mutate(
-      fema_index = unlist(sf::st_within(., fema_polygons))
-    ) %>% 
-  dplyr::relocate(fema_index)
-  
-  left_fema <- fema_polygons[unique(left_trans$fema_index),]
-  left_fema_lines <- 
-    left_fema %>% 
-    sf::st_cast("linestring")
-  
-  geos::as_geos_geometry(left_fema)
-  mapview::mapview(left_fema, col.regions = "dodgerblue") +
-    mapview::mapview(left_ls, color = "yellow") +
-    mapview::mapview(left_trans, color = "red") +
-    mapview::mapview(right_trans, color = "green")
-  
-  left_trans_geos <- geos::as_geos_geometry(left_trans)
-  left_trans_geos %>% plot()
-  left_trans_geos[2]
-  
-  length(left_trans_geos)
-  
-  left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-    
-   hydrofabric3d:::geos_bs_distance(
-      distances    = 1:2000,
-      line         = left_trans_geos[i],
-      geoms_to_cut = left_fema_lines,
-      direction    = "head"
-    )
-  }) %>% 
-    unlist()
-  
-  left_extensions  <- geos::geos_empty()
-  
-  for (i in 1:length(left_extension_dists)) {
-    dist = left_extension_dists[i]
-    geos_line <- left_trans_geos[i]
-    message(glue::glue("i: {i}\ndist: {dist}"))
-    
-    extended <- hydrofabric3d::geos_extend_line(
-      geos_line,
-      dist,
-      "head"
-    )
-    left_extensions <- vctrs::vec_c(left_extensions, extended)
-    
-  }
-  # index for only valid transects
-  # is_valid <- !geos::geos_is_empty(left_extensions)
-  left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)]
-  # !geos::geos_is_empty(left_extensions)
-  
-  new_left_trans <- 
-    left_trans %>% 
-    sf::st_drop_geometry() %>% 
-    dplyr::mutate(
-      geom = sf::st_as_sfc(left_extensions)
-    ) %>% 
-    sf::st_as_sf()
-  # geos::sf
-  mapview::mapview(left_fema, col.regions = "dodgerblue") +
-    mapview::mapview(left_ls, color = "yellow") +
-    mapview::mapview(left_trans, color = "red") +
-    mapview::mapview(new_left_trans, color = "green")
-  mapply(function(geom, dist) {
-      hydrofabric3d::geos_extend_line(geom, dist, "head") 
-    },
-  left_trans_geos,
-  left_extension_dists
-  )
-  
-  left_extensions <- lapply(seq_along(left_trans_geos), function(i) {
-      
-    extend_dist <- hydrofabric3d:::geos_bs_distance(
-        distances    = 1:2000,
-        line         = left_trans_geos[i],
-        geoms_to_cut = left_fema_lines,
-        direction    = "head"
-      )
-      
-    hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") 
-    
-    })
-  # unlist(left_extensions)
-  
-  left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-    hydrofabric3d:::geos_bs_distance(
-      distances    = 1:2000,
-      line         = left_trans_geos[i],
-      geoms_to_cut = left_fema_lines,
-      direction    = "head"
-    )
-  }) 
-  
-  distance_to_extend <- 
-    hydrofabric3d:::geos_bs_distance(
-      distances    = 1:1500,
-      line         = left_trans_geos[1],
-      geoms_to_cut = left_fema_lines,
-      direction    = "head"
-    )
-  
-  extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% 
-    sf::st_as_sf()
-  mapview::mapview(left_fema, col.regions = "dodgerblue") +
-    mapview::mapview(left_ls, color = "yellow") +
-    mapview::mapview(left_trans, color = "red") +
-    mapview::mapview(extended, color = "green")
-    # mapview::mapview(right_trans, color = "green")
-  left_trans$geom %>% sf::st_length()
-  plot(segmented_trans$geom, col = "red", lwd =5)
-  plot(left_trans$geom, col = "green", lwd=3, add = true)
-  plot(right_trans$geom, col = "blue", lwd=3, add = true)
-  unlist(left_trans$geom)
-   unlist(right_trans$geom)
-  
-  unlist(trans_fema$geom )
-  unlist(split_trans$geom )
-  split_trans %>% 
-    sf::st_collection_extract("linestring")
-  split_trans$geom
-  
-  tmp_trans$geom
-  nngeo::st_segments(tmp_trans) %>% 
-    .$result %>% 
-    plot()
-  
-  mapview::mapview(tmp_trans) + tmp_centroid
-  geos::geos_clip_by_rect()
-  
-  transects_with_
-  
-  lengths(transects_polygons_matrix)
-  mapview::mapview(transects_with_fema, color = "green") + fema_with_transects
-  unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]
-  transects  %>% 
-    hydrofabric3d::add_tmp_id(transects) %>% 
-    .$tmp_id %>% 
-    unique() %>% .[1:30]
-  trans_subset <- 
-    transects %>% 
-    hydrofabric3d::add_tmp_id() %>% 
-    dplyr::filter(tmp_id %in%  unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30])
-  
-  fema_subset <- 
-    fema %>% 
-    dplyr::filter(fema_id == "1268")
-
-  extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(500, nrow(trans_subset)))
-  extended
-  clipped_trans <- rmapshaper::ms_clip(extended, fema)
-  
-  rmapshaper::ms_clip(extended, fema_subset)
-  mapview::mapview(trans_subset, color = "red") + 
-    mapview::mapview(extended, color = "yellow") +
-    # mapview::mapview(  sf::st_difference(extended, fema_subset), color = "green") +
-    mapview::mapview(clipped_trans,  color = "green") +
-    fema 
-  
-   rep(50, nrow(trans_subset))
-  extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(50, nrow(trans_subset)))
-  
-  
-  hydrofabric3D::geos_extend_line(trans_subset, 50) %>% 
-    sf::st_as_sf() %>% mapview::mapview()
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
   
   
  

From 248d5401b5b38fb83fabb33eaaa622cf033dfa0c Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 23 May 2024 13:35:52 -0700
Subject: [PATCH 36/64] small stuff

---
 runners/cs_runner/add_fema_to_transects.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index d23b67e..dbbe236 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -200,7 +200,7 @@ path_df
     # polygons <- fema
     # # # flines <- flines
     # # max_extension_distance <- 3000
-    # max_extension_distance = 3500
+  # max_extension_distance = 3500
     # ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
     
@@ -243,7 +243,6 @@ path_df
     
     # mapview::npts(sf::st_as_sf(intersect_lines))
 
-    
  #    intersect_polygons %>% 
  #      geos::geos_make_valid() %>% 
  #      geos::geos_is_valid() %>% all()

From f4f7832e50d6a8977de5b4278142211fb32e4c74 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 23 May 2024 13:36:57 -0700
Subject: [PATCH 37/64] random cleanups

---
 runners/cs_runner/add_fema_to_transects.R | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index dbbe236..a36b369 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -9,8 +9,6 @@ transects_prefix <- paste0(s3_bucket, version_prefix, "/3d/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
 nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
-# fema_files      <- list.files(fema_fgb_path, full.names = FALSE)
-# fema_bb_files   <- list.files(fema_fgb_bb_path, full.names = FALSE)
 transects_files <- list.files(transects_dir, full.names = FALSE)
 transects_files <- transects_files[!grepl("updated", transects_files)]
 
@@ -29,7 +27,7 @@ path_df
 # loop over each vpu and generate cross sections, then save locally and upload to s3 bucket
 # for(i in 1:nrow(path_df)) {
   
-  i = 8
+  # i = 8
   
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
@@ -72,7 +70,6 @@ path_df
                                                fema,
                                                flines,
                                                3000)
-  # 
   })
    # library(nngeo)
   # fema %>% 
@@ -91,7 +88,6 @@ path_df
   #       mapview::mapview(sf::st_buffer(fema_sub[2, ], 500), col.regions = "green")
   # fema_no_holes <- nngeo::st_remove_holes(fema)
   # fema_no_holes_union <- sf::st_union(fema_no_holes)
-  # 
   # touching_list = sf::st_touches(fema_no_holes)
   # mapview::npts(fema)
   # mapview::npts(fema_no_holes)
@@ -101,15 +97,10 @@ path_df
   #   dplyr::group_by(new_fema_id)
   # fema_no_holes_union
   # fema$fema_id %>% unique() %>% length()
-  
   # # union then explode FEMA polygons 
   # fema <- 
   #   fema %>%
   #   sf::st_union()
-  # 
-  # fema <- rmapshaper::ms_explode(fema)
-  # # fema %>% mapview::npts()
-  # 
   # # reassign IDs and change geometry column name
   # fema <- 
   #   fema %>% 
@@ -194,7 +185,6 @@ path_df
                                                            flines,
                                                            max_extension_distance) {
     
-    ###    ###    ###    ###    ###    ###    ###
     ###    ###    ###    ###    ###    ###    ###
     # transect_lines <- transects
     # polygons <- fema
@@ -202,7 +192,6 @@ path_df
     # # max_extension_distance <- 3000
   # max_extension_distance = 3500
     # ###    ###    ###    ###    ###    ###    ###
-    ###    ###    ###    ###    ###    ###    ###
     
     # keep 10% of the original points for speed
     polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10)
@@ -256,11 +245,6 @@ path_df
     # make each transect line have way more segments so we can take a left and right half of each transect line
     segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation)
     
-    # unlist(segmented_trans$geom)
-    # unique(lengths(segmented_trans$geom))
-    # length(lengths(segmented_trans$geom))
-    # lengths(segmented_trans$geom)  
-    
     # mapview::mapview(left_trans, col.regions = "dodgerblue") +
       # mapview::mapview(intersect_transects, color = "red") +
       #   mapview::mapview(intersect_transects[42, ], color = "yellow") +

From b81e4778a9bc34fc979e8c1ec5a6ecb67dd17897 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 29 May 2024 15:35:29 -0700
Subject: [PATCH 38/64] small cleanups as im migrating fema functions to
 hydrofabric3D

---
 runners/cs_runner/add_fema_to_transects.R | 413 +---------------------
 1 file changed, 4 insertions(+), 409 deletions(-)

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index a36b369..c96efb3 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -936,35 +936,18 @@ path_df
 #         mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red")     +
 #         mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")  +  
 #       mapview::mapview(      sf::st_as_sf(final_line), color = "yellow")   
-#       
-#       
-#       
-#       
-#         
+
 #       
 #     }
-#     
-#     
+
 #     left_trans$left_extension_distance
-#     
+
 #     length(right_distances)
-#     
-#     
 #     # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within 
     calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) {
       #####   #####   #####   #####   #####
       # geos_geoms   <- left_trans_geos
-      # ids          <- left_trans$tmp_id
-      # lines_to_cut <- intersect_lines
-      # lines_to_cut_indices <- left_trans$left_fema_index
-      # direction = "head"
-      # max_extension_distance = 2500
-      # geos_geoms             = left_trans_geos
-      # ids                    = left_trans$tmp_id
-      # lines_to_cut           = intersect_lines
-      # lines_to_cut_indices   = left_trans$left_fema_index
-      # direction              = "head"
-      # max_extension_distance = max_extension_distance
+
       # geos_geoms             = left_trans_geos
       # ids                    = left_trans$tmp_id
       # lines_to_cut           = intersect_lines
@@ -1012,391 +995,3 @@ path_df
 
       return(extension_dists)
     }
-
-#     
-#     # ----------------------------------------------------------------------------------------------------------------
-#     # Loop over every left and right halfs of transects and 
-#     # if they are fully within FEMA polygons, get the minimum extension distance required for the transect to meet the FEMA polygon boundary
-#     # ----------------------------------------------------------------------------------------------------------------
-#     
-#     left_ids          <- left_trans$tmp_id
-#     
-#     left_fema_indexes <- left_trans$left_fema_index
-#     left_fema_bool    <- left_trans$left_is_within_fema
-#     
-#     # preallocate vector that stores the extension. distances
-#     left_extension_dists <- vctrs::vec_c(rep(0, length(left_ids)))
-#     
-#     # all_equal_length_vects <- all(length(left_ids) == length(left_fema_indexes) && length(left_ids) == length(left_fema_bool))
-#     # 1:length(left_ids)
-#     extension_count = 0
-#     
-#     for(i in 1:length(left_ids)) {
-#       # i = 1
-#       tmp_id                 <- left_ids[i]
-#       is_within_fema_polygon <- left_fema_bool[i]
-#       fema_index             <- left_fema_indexes[i]
-#   
-#       message("Transect: '", tmp_id, "' - (", i, ")")
-#       # if(is_within_fema_polygon) {
-#       #   break
-#       # }
-#       # fema_index <- left_trans$left_fema_index[i]
-#       # is_within_fema_polygon = ifelse(!is.na(left_fema_index), TRUE, FALSE)
-#       
-#       if(is_within_fema_polygon) {
-#         
-#         message("- Left side of transect intersects with FEMA")
-#         message("\t > FEMA index: ", fema_index)
-#         extension_count = extension_count + 1
-#         message("\t > extension_count: ", extension_count)
-#         
-#         trans_geom <- left_trans_geos[i]
-#         index_vect <- sort(unlist(fema_index))
-#         
-#         # mapview::mapview(sf::st_as_sf(left_trans_geos[i]), color = "red") + sf::st_as_sf(intersect_lines[index_vect])
-#         
-#         # intersect_lines[index_vect]
-#         
-#         dist_to_fema <- hydrofabric3D:::geos_bs_distance(
-#                             distances    = 1:max_extension_distance,
-#                             line         = trans_geom,
-#                             geoms_to_cut = intersect_lines[index_vect],
-#                             direction    = "head"
-#                           )
-#         
-#         left_extension_dists[i] <- dist_to_fema
-#         
-#       }
-#       message()
-#     }
-#     
-#     left_trans$left_extension_dist <- left_extension_dists
-#   }
-#   tmp <- left_trans[1:5, ]
-#   tmp
-#   tmp_extended <- hydrofabric3D:::extend_by_length(tmp, tmp$left_extension_dist)
-#   
-#   tmp_extended
-#   
-#   fema_idx <- unique(unlist(dplyr::select(tmp, left_fema_index)$left_fema_index))
-#   
-#   mapview::mapview(dplyr::select(tmp, -left_fema_index), color = "red") +  
-#     mapview::mapview(dplyr::select(tmp_extended, -left_fema_index), color = "green")  +
-#     sf::st_as_sf(intersect_polygons[fema_idx])
-#   
-#   length(left_extension_dists)
-#   
-#   left_trans_geos
-#   vctrs::vec_c(
-#     vctrs::vec_c(
-#       left_trans$hy_id
-#   ),
-#   vctrs::vec_cast(left_trans$cs_id)
-#   )
-#   
-#  #  
-#  #  # mapview::mapview(left_trans[477, ]) + intersect_polygons[1888, ]
-#  # left_trans <- 
-#  #    left_trans %>%
-#  #    dplyr::mutate(
-#  #      fema_index = unlist(sf::st_within(., intersect_polygons))
-#  #    ) %>% 
-#  #    dplyr::relocate(fema_index)
-#  #  
-#   # left_trans_geos <- geos::as_geos_geometry(left_trans)
-#   
-#   sort(na.omit(unlist(unique(left_trans$fema_index))))
-#   fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
-#   
-#   fema_polygons %>% na.omit()
-#   
-#   # note: sorting the fema polygon indices (not sure if necessary)
-#   left_fema <- fema_polygons[sort(na.omit(unlist(unique(left_trans$fema_index))))]
-#   
-#   left_fema
-#   
-#   left_fema %>% plot()
-#   
-#   geos::geos_make_linestring(geom = left_fema)
-#   left_trans$fema_index
-#   sort(na.omit(unlist(unique(left_trans$fema_index))))
-#   
-#   # left_fema <- fema_polygons[na.omit(unlist(unique(left_trans$fema_index)))]
-#   left_fema_lines <- 
-#     left_fema %>% 
-#     sf::st_as_sf() %>% 
-#     sf::st_cast("multilinestring") %>% 
-#     geos::as_geos_geometry() %>% 
-#     geos::geos_simplify_preserve_topology(25)
-#   
-#   left_fema %>% 
-#     sf::st_as_sf() %>% 
-#     mapview::npts()
-#   
-#   geos::geos_simplify_preserve_topology(left_fema_lines, 50) %>% 
-#     geos::geos_num_coordinates() %>%
-#     sum()
-#   
-#   geos::geos_num_coordinates(left_fema_lines) %>% sum()
-#   
-#   geos::geos_simplify_preserve_topology(left_fema_lines, 1) %>% 
-#     .[3] %>% 
-#     plot()
-#   left_fema_lines[3] %>% plot()
-#   
-#   
-#   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-#     
-#     hydrofabric3d:::geos_bs_distance(
-#       distances    = 1:2000,
-#       line         = left_trans_geos[i],
-#       geoms_to_cut = left_fema_lines,
-#       direction    = "head"
-#     )
-#   }) %>% 
-#     unlist()
-#   
-#   left_trans$left_head_extension_dist <- left_extension_dists
-#   
-#   # find the distances from the right side of transect lines 
-#   right_trans <- 
-#     segmented_trans %>% 
-#     lwgeom::st_linesubstring(0.50, 1) %>% 
-#     dplyr::mutate(
-#       partition         = "right",
-#       partition_lengthm = as.numeric(sf::st_length(geom))
-#     ) %>% 
-#     dplyr::select(hy_id, cs_source, cs_id, cs_measure,  
-#                   cs_lengthm, is_extended, partition, partition_lengthm, geom)
-#   right_trans_geos <- geos::as_geos_geometry(right_trans)
-#   right_within_matrix <- geos::geos_within_matrix(right_trans_geos, geos::as_geos_geometry(fema_polygons))
-#  
-#   right_within_vect <- lapply(right_within_matrix, function(i) {
-#                       if(length(i) > 0) { c(i) } else { c(na_real_) } }
-#                       )
-#  right_trans$fema_index <- right_within_vect
-# # 
-# #  right_trans <- 
-# #     right_trans %>%
-# #     dplyr::mutate(
-# #       fema_index = unlist(sf::st_within(., fema_polygons))
-# #     ) %>% 
-# #     dplyr::relocate(fema_index)
-# #   
-#   
-#   right_fema <- fema_polygons[unique(right_trans$fema_index),] 
-#   right_fema_lines <- 
-#     right_fema %>% 
-#     sf::st_cast("linestring")
-#    
-#   right_extension_dists <- lapply(seq_along(right_trans_geos), function(i) {
-#     
-#     hydrofabric3d:::geos_bs_distance(
-#       distances    = 1:2000,
-#       line         = right_trans_geos[i],
-#       geoms_to_cut = right_fema_lines,
-#       direction    = "tail"
-#     )
-#   }) %>% 
-#     unlist()
-#   
-#   right_trans$right_tail_extension_dists <- right_extension_dists
-#   right_trans$right_tail_extension_dists
-#   unlist(sf::st_within(left_trans, fema_polygons))
-#   
-#   left_trans <- 
-#     left_trans %>%
-#     dplyr::mutate(
-#       fema_index = unlist(sf::st_within(., fema_polygons))
-#     ) %>% 
-#   dplyr::relocate(fema_index)
-#   
-#   left_fema <- fema_polygons[unique(left_trans$fema_index),]
-#   left_fema_lines <- 
-#     left_fema %>% 
-#     sf::st_cast("linestring")
-#   
-#   geos::as_geos_geometry(left_fema)
-#   mapview::mapview(left_fema, col.regions = "dodgerblue") +
-#     mapview::mapview(left_ls, color = "yellow") +
-#     mapview::mapview(left_trans, color = "red") +
-#     mapview::mapview(right_trans, color = "green")
-#   
-#   left_trans_geos <- geos::as_geos_geometry(left_trans)
-#   left_trans_geos %>% plot()
-#   left_trans_geos[2]
-#   
-#   length(left_trans_geos)
-#   
-#   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-#     
-#    hydrofabric3d:::geos_bs_distance(
-#       distances    = 1:2000,
-#       line         = left_trans_geos[i],
-#       geoms_to_cut = left_fema_lines,
-#       direction    = "head"
-#     )
-#   }) %>% 
-#     unlist()
-#   
-#   left_extensions  <- geos::geos_empty()
-#   
-#   for (i in 1:length(left_extension_dists)) {
-#     dist = left_extension_dists[i]
-#     geos_line <- left_trans_geos[i]
-#     message(glue::glue("i: {i}\ndist: {dist}"))
-#     
-#     extended <- hydrofabric3d::geos_extend_line(
-#       geos_line,
-#       dist,
-#       "head"
-#     )
-#     left_extensions <- vctrs::vec_c(left_extensions, extended)
-#     
-#   }
-#   # index for only valid transects
-#   # is_valid <- !geos::geos_is_empty(left_extensions)
-#   left_extensions <- left_extensions[!geos::geos_is_empty(left_extensions)]
-#   # !geos::geos_is_empty(left_extensions)
-#   
-#   new_left_trans <- 
-#     left_trans %>% 
-#     sf::st_drop_geometry() %>% 
-#     dplyr::mutate(
-#       geom = sf::st_as_sfc(left_extensions)
-#     ) %>% 
-#     sf::st_as_sf()
-#   # geos::sf
-#   mapview::mapview(left_fema, col.regions = "dodgerblue") +
-#     mapview::mapview(left_ls, color = "yellow") +
-#     mapview::mapview(left_trans, color = "red") +
-#     mapview::mapview(new_left_trans, color = "green")
-#   mapply(function(geom, dist) {
-#       hydrofabric3d::geos_extend_line(geom, dist, "head") 
-#     },
-#   left_trans_geos,
-#   left_extension_dists
-#   )
-#   
-#   left_extensions <- lapply(seq_along(left_trans_geos), function(i) {
-#       
-#     extend_dist <- hydrofabric3d:::geos_bs_distance(
-#         distances    = 1:2000,
-#         line         = left_trans_geos[i],
-#         geoms_to_cut = left_fema_lines,
-#         direction    = "head"
-#       )
-#       
-#     hydrofabric3d::geos_extend_line(left_trans_geos[i], extend_dist, "head") 
-#     
-#     })
-#   # unlist(left_extensions)
-#   
-#   left_extension_dists <- lapply(seq_along(left_trans_geos), function(i) {
-#     hydrofabric3d:::geos_bs_distance(
-#       distances    = 1:2000,
-#       line         = left_trans_geos[i],
-#       geoms_to_cut = left_fema_lines,
-#       direction    = "head"
-#     )
-#   }) 
-#   
-#   distance_to_extend <- 
-#     hydrofabric3d:::geos_bs_distance(
-#       distances    = 1:1500,
-#       line         = left_trans_geos[1],
-#       geoms_to_cut = left_fema_lines,
-#       direction    = "head"
-#     )
-#   
-#   extended <- hydrofabric3d::geos_extend_line(left_trans_geos[1], distance_to_extend, "head") %>% 
-#     sf::st_as_sf()
-#   mapview::mapview(left_fema, col.regions = "dodgerblue") +
-#     mapview::mapview(left_ls, color = "yellow") +
-#     mapview::mapview(left_trans, color = "red") +
-#     mapview::mapview(extended, color = "green")
-#     # mapview::mapview(right_trans, color = "green")
-#   left_trans$geom %>% sf::st_length()
-#   plot(segmented_trans$geom, col = "red", lwd =5)
-#   plot(left_trans$geom, col = "green", lwd=3, add = true)
-#   plot(right_trans$geom, col = "blue", lwd=3, add = true)
-#   unlist(left_trans$geom)
-#    unlist(right_trans$geom)
-#   
-#   unlist(trans_fema$geom )
-#   unlist(split_trans$geom )
-#   split_trans %>% 
-#     sf::st_collection_extract("linestring")
-#   split_trans$geom
-#   
-#   tmp_trans$geom
-#   nngeo::st_segments(tmp_trans) %>% 
-#     .$result %>% 
-#     plot()
-#   
-#   mapview::mapview(tmp_trans) + tmp_centroid
-#   geos::geos_clip_by_rect()
-#   
-#   transects_with_
-#   
-#   lengths(transects_polygons_matrix)
-#   mapview::mapview(transects_with_fema, color = "green") + fema_with_transects
-#   unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30]
-#   transects  %>% 
-#     hydrofabric3d::add_tmp_id(transects) %>% 
-#     .$tmp_id %>% 
-#     unique() %>% .[1:30]
-#   trans_subset <- 
-#     transects %>% 
-#     hydrofabric3d::add_tmp_id() %>% 
-#     dplyr::filter(tmp_id %in%  unique(hydrofabric3d::add_tmp_id(transects)$tmp_id)[1:30])
-#   
-#   fema_subset <- 
-#     fema %>% 
-#     dplyr::filter(fema_id == "1268")
-# 
-#   extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(500, nrow(trans_subset)))
-#   extended
-#   clipped_trans <- rmapshaper::ms_clip(extended, fema)
-#   
-#   rmapshaper::ms_clip(extended, fema_subset)
-#   mapview::mapview(trans_subset, color = "red") + 
-#     mapview::mapview(extended, color = "yellow") +
-#     # mapview::mapview(  sf::st_difference(extended, fema_subset), color = "green") +
-#     mapview::mapview(clipped_trans,  color = "green") +
-#     fema 
-#   
-#    rep(50, nrow(trans_subset))
-#   extended <- hydrofabric3D:::extend_by_length(trans_subset,    rep(50, nrow(trans_subset)))
-#   
-#   
-#   hydrofabric3D::geos_extend_line(trans_subset, 50) %>% 
-#     sf::st_as_sf() %>% mapview::mapview()
-#   
-#   
-#   
-#   
-#   
-#   
-#   
-#   
-#   
-#   
-#   
-  
-  
- 
-  
-  
-       
-  
-     
-  
-     
-  
-     
-  
-    
-  
-    
\ No newline at end of file

From 7914a4526db5ece3d238487934fcfaee50674df6 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 13 Jun 2024 10:16:29 +0300
Subject: [PATCH 39/64] updating cs_runner/01_transects.R to use FEMA VPU
 polygons to extend transects out to the FEMA floodplain when possible

---
 runners/cs_runner/01_transects.R          |  34 ++-
 runners/cs_runner/add_fema_to_transects.R | 278 ++++++++++++++--------
 2 files changed, 210 insertions(+), 102 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index b3727ba..4d6c746 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -11,7 +11,7 @@ source("runners/cs_runner/config.R")
 transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
-nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
+nextgen_files    <- list.files(nextgen_dir, full.names = FALSE)
 model_attr_files <- list.files(model_attr_dir, full.names = FALSE)
 
 # string to fill in "cs_source" column in output datasets
@@ -26,16 +26,32 @@ path_df <- align_files_by_vpu(
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
-  
+  # i = 8
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
   
+  vpu <- path_df$vpu[i]
+
+  # Get FEMA by VPU directory and files for current VPU 
+  fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+  # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+
+  vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
+  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
+  
+  # fema polygons and transect lines
+  fema <- sf::read_sf(vpu_fema_file)
+  
   # # model attributes file and full path
   # model_attr_file <- path_df$y[i]
   # model_attr_path <- paste0(model_attr_dir, model_attr_file)
 
-  message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'")
+  message("Creating VPU ", vpu, " transects:", 
+          "\n - flowpaths: '",
+          nextgen_file, "'",
+           "\n - FEMA polygons: ", basename(vpu_fema_file)
+          )
   # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
   
   # read in nextgen data
@@ -114,6 +130,15 @@ for(i in 1:nrow(path_df)) {
       cs_source = net_source
     )
   
+  # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  transects <- hydrofabric3D::get_transect_extension_distances_to_polygons(
+                                                  transect_lines         = transects, 
+                                                  polygons               = fema, 
+                                                  flines                 = flines, 
+                                                  max_extension_distance = 3000 
+                                                  )
+                                                
   # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
   sf::write_sf(
     # save dataset with only subset of columns to upload to S3
@@ -129,6 +154,9 @@ for(i in 1:nrow(path_df)) {
     out_path
     )
   
+  transects <- sf::read_sf(out_path)
+
+  
   # command to copy transects geopackage to S3
   copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
                    ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index c96efb3..1cae9ca 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -27,7 +27,7 @@ path_df
 # loop over each vpu and generate cross sections, then save locally and upload to s3 bucket
 # for(i in 1:nrow(path_df)) {
   
-  # i = 8
+  i = 8
   
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
@@ -65,13 +65,181 @@ path_df
   
   # read in nextgen flowlines data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-  system.time({
-   extended_transects <- get_transect_extension_distances_to_polygons(transects,
-                                               fema,
-                                               flines,
-                                               3000)
-  })
-   # library(nngeo)
+  # 
+  # system.time({
+  #  extended_transects <- get_transect_extension_distances_to_polygons(transects, fema, flines, 3000)
+  # })
+  # 
+  # sf::write_sf(extended_transects, '/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg')
+  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
+  # ----- Generate plots of extensions ----
+  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
+  extended_transects <- sf::read_sf('/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg') %>% 
+    hydrofabric3D::add_tmp_id()
+  # ids_of_interest = c("wb-1002167", "wb-1002166", "wb-1002165", "wb-1002164")
+  # ids_of_interest = c( "wb-1002166", "wb-1002165")
+  # ids_of_interest = c("wb-1014540", "wb-1014541", "wb-1014542", "wb-1014570", "wb-1014574",
+  #   "wb-1014575", "wb-1014572" ,"wb-1014573" ,"wb-1014571", 
+  #   "wb-1014568", "wb-1014569", "wb-1014567", "wb-1014543")
+
+  # ids_of_interest = c( "wb-1014542")
+  
+  any_extended <- 
+    extended_transects %>% 
+    dplyr::filter(
+      (left_is_extended & right_is_extended) | (left_is_extended & !right_is_extended) | (!left_is_extended & right_is_extended)
+    ) %>%
+    dplyr::mutate(
+      extend_status = dplyr::case_when(
+        (left_is_extended & right_is_extended)  ~ "both",
+        (left_is_extended & !right_is_extended) ~ "left_only",
+        (!left_is_extended & right_is_extended) ~ "right_only",
+        TRUE ~ "no_extension"
+      )
+    ) %>% 
+    dplyr::mutate(
+      extend_distance = left_distance + right_distance
+    ) %>% 
+    dplyr::relocate(extend_status, extend_distance) %>% 
+    dplyr::filter(hy_id == "wb-1002550") 
+    # dplyr::slice(1:1500)
+  
+  fema_subset_intersects <- sf::st_intersects(fema, any_extended)
+  fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ]
+  fema_polygons
+  
+  og_trans <- 
+    transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in% any_extended$tmp_id | hy_id %in% any_extended$hy_id) %>% 
+    dplyr::mutate(
+      extend_status = "original"
+    ) 
+  
+  Flowlines <-
+    flines %>% 
+    dplyr::filter(id %in% unique(any_extended$hy_id))
+  
+  FEMA = fema_polygons
+  Extended = sf::st_buffer(any_extended, 10)
+  Original = sf::st_buffer(og_trans, 10)
+  
+  mapview::mapview(FEMA, col.regions = "dodgerblue") + 
+    mapview::mapview(Flowlines, color = "darkblue") + 
+    mapview::mapview(Extended, col.regions = "green") +
+    mapview::mapview(Original, col.regions = "red") 
+    # mapview::mapview(Extended, color = "green") +
+    # mapview::mapview(Original, color = "red") 
+  extend_subset <- 
+    any_extended %>% 
+    dplyr::filter(hy_id %in% ids_of_interest)
+    # dplyr::group_by(extend_status) %>%
+    # dplyr::arrange(-extend_distance, .by_group = TRUE) %>% 
+    # dplyr::slice(
+    #   which.min(extend_distance),
+    #   which.max(extend_distance)
+    # )
+    # dplyr::slice(1)
+  extend_subset <-
+    any_extended %>%
+    dplyr::group_by(extend_status) %>%
+    dplyr::arrange(-extend_distance, .by_group = TRUE) %>%
+    # dplyr::slice(
+    #   which.min(extend_distance),
+    #   which.max(extend_distance)
+    # )
+    dplyr::slice(2000:2020)
+  extend_subset
+  
+  fline_subset <-
+    flines %>% 
+    dplyr::filter(id %in% unique(extend_subset$hy_id))
+  
+  plot_data <- 
+    dplyr::bind_rows(
+    transects %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% 
+      dplyr::mutate(
+        extend_status = "original"
+      ),
+    dplyr::filter(extend_subset, 
+                  extend_status != "no_extension")
+  ) %>% 
+  dplyr::relocate(extend_status, extend_distance) %>% 
+    dplyr::mutate(
+      extend_status = dplyr::case_when(
+        extend_status != "original" ~ "extended",
+        TRUE ~ extend_status
+      )
+    ) 
+  
+  fema_subset_intersects <- sf::st_intersects(fema, fline_subset)
+  fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ]
+  fema_polygons
+  
+  flines[lengths(sf::st_intersects(flines, fema_polygons)) > 0, ] %>% 
+    dplyr::pull(id) %>% 
+    unique()
+  fema_polygons
+  sf::st_crop(fema_polygons, fline_subset)
+  ggplot2::ggplot() +
+    ggplot2::geom_sf(data =   sf::st_crop(fema_polygons, fline_subset), fill = "grey") +
+    ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 1) +
+    # ggplot2::geom_sf(data = plot_data, ggplot2::aes(color = extend_status))
+   ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "extended"),
+                    color = "green") + 
+   ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "original"), 
+                    color = "red")
+
+  
+  mapview::mapview(fema_polygons, col.regions = "dodgerblue") + 
+  mapview::mapview(fline_subset, color = "dodgerblue") + 
+    mapview::mapview(dplyr::filter(plot_data, extend_status == "extended"), color = "green") +
+    mapview::mapview(dplyr::filter(plot_data, extend_status == "original"), color = "red") 
+  transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% 
+    dplyr::mutate(
+      extend_status = "original"
+    )
+  
+  extend_subset
+  
+  ggplot2::ggplot() +
+    ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 5) +
+    ggplot2::geom_sf(data = extend_subset, ggplot2::aes(color = extend_status))
+  
+  mapview::mapview(fline_subset, color = "dodgerblue") + 
+   mapview::mapview(extend_subset, color = "green") 
+  
+  
+  both_extended <-
+    extended_transects %>% 
+    dplyr::filter(left_is_extended, right_is_extended)
+  
+  left_only_extended <- 
+    extended_transects %>% 
+    dplyr::filter(left_is_extended, !right_is_extended) 
+  
+  right_only_extended <- 
+    extended_transects %>% 
+    dplyr::filter(!left_is_extended, right_is_extended)
+  
+  unique(both_extended$hy_id)
+  
+  extended_transects %>% 
+    dplyr::filter(
+      # tmp_id %in% unique(left_only_extended$tmp_id),
+      tmp_id %in% unique(right_only_extended$tmp_id) || 
+      tmp_id %in% unique(both_extended$tmp_id)
+      )
+  
+  unique(extended_transects$hy_id)
+  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
+  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
+  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ---  
+  # library(nngeo)
   # fema %>% 
   #   dplyr::group_by(fema_id) %>% 
   #   dplyr::mutate(
@@ -180,10 +348,7 @@ path_df
   # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons)
   # polygons, set of sf polygons that transect lines should be exteneded 
   # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons"
-  get_transect_extension_distances_to_polygons <- function(transect_lines, 
-                                                           polygons, 
-                                                           flines,
-                                                           max_extension_distance) {
+  get_transect_extension_distances_to_polygons <- function(transect_lines,   polygons,  flines, max_extension_distance) {
     
     ###    ###    ###    ###    ###    ###    ###
     # transect_lines <- transects
@@ -789,9 +954,7 @@ path_df
         # ---------------------------------------------------
         
         # start %>% class()
-      
-     
-    }
+    }      
     
     # transects2 <- transects 
       # dplyr::mutate(
@@ -822,90 +985,7 @@ path_df
     return(transects)
     
   }    
-    # transects2 %>% 
-      # dplyr::filter(left_is_extended, right_is_extended)
-#     
-#     any_extended <- 
-#       transects2 %>% 
-#       dplyr::filter(left_is_extended | right_is_extended)
-#     
-#     any_flines <- 
-#       flines %>% 
-#       dplyr::filter(id %in% any_extended$hy_id)
-#     
-#     # left_only_extended <- 
-#     #   transects2 %>% 
-#     #   dplyr::filter(left_is_extended, !right_is_extended)
-#     # 
-#     # left_only_flines <- 
-#     #   flines %>% 
-#     #   dplyr::filter(id %in% left_only_extended$hy_id)
-#     # 
-#     # right_only_extended <- 
-#     #   transects2 %>% 
-#     #   dplyr::filter(!left_is_extended, right_is_extended)
-#     # 
-#     # right_only_flines <- 
-#     #   flines %>% 
-#     #   dplyr::filter(id %in% right_only_extended$hy_id)
-#   
-#     # left_fema_polygons <- 
-#       # left_trans %>% 
-#     # ----------------------------------------------------------------
-#     # ------- Subset data for mapping ----------- 
-#     # ----------------------------------------------------------------
-#     extended_for_map <- 
-#       any_extended %>% 
-#       dplyr::slice(1:1000)
-#     
-#     og_transects_for_map <- 
-#       transects %>% 
-#       hydrofabric3D::add_tmp_id() %>% 
-#       dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id)
-#     
-#     fema_indexes_in_aoi <-
-#       dplyr::bind_rows(
-#         sf::st_drop_geometry(
-#           dplyr::rename(left_trans, fema_index = left_fema_index)
-#         ),
-#         sf::st_drop_geometry(     
-#           dplyr::rename(right_trans, 
-#                         fema_index = right_fema_index)
-#         )
-#       ) %>% 
-#       hydrofabric3D::add_tmp_id() %>% 
-#       dplyr::filter(tmp_id %in% hydrofabric3D::add_tmp_id(extended_for_map)$tmp_id) %>% 
-#       # dplyr::filter(
-#       #   # tmp_id %in% hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id |
-#       #   # tmp_id %in% hydrofabric3D::add_tmp_id(right_only_extended)$tmp_id
-#       #  
-#       #   tmp_id %in%  unique(hydrofabric3D::add_tmp_id(dplyr::filter(transects2, left_is_extended, right_is_extended))$tmp_id)
-#       #   
-#       #   ) %>% 
-#       # dplyr::filter(left_is_within_fema | right_is_within_fema) %>% 
-#       # dplyr::slice(1:200) %>%
-#       .$fema_index %>% 
-#       unlist() %>% 
-#       na.omit() %>% 
-#       unique() %>% 
-#       sort()
-#       # length()
-#     
-#     sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi])
-# 
-#     # hydrofabric3D::add_tmp_id(left_only_extended)$tmp_id
-#     # transects_with_distances
-#     # %>% 
-#     mapview::mapview( sf::st_as_sf(intersect_polygons[fema_indexes_in_aoi]),  col.regions = "lightblue") + 
-#       mapview::mapview(any_flines, color = "dodgerblue") + 
-#       mapview::mapview(og_transects_for_map, color = "green") + 
-#       mapview::mapview(extended_for_map, color = "red") 
-# 
-#       # mapview::mapview(left_only_flines, color = "dodgerblue") + 
-#       #     mapview::mapview(right_only_flines, color = "dodgerblue") + 
-#     # mapview::mapview(left_only_extended, color = "red") + 
-#     #      mapview::mapview(right_only_extended, color = "green")
-#     # transects$cs_lengthm  <- length_list
+   
 #     # ----------------------------------------------------------------
 #     # ----------------------------------------------------------------
 #     # ----------------------------------------------------------------  

From 1ba56d4a5f52bac07f33f00aa73c1935df3933ee Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 11 Jul 2024 15:54:28 -0700
Subject: [PATCH 40/64] reworked final step in producing fema geometries to
 better resolve internal boundaries

---
 .gitignore                                |   3 +-
 runners/cs_runner/01_transects.R          | 198 ++++++++++++++++++++--
 runners/cs_runner/add_fema_to_transects.R |   3 +
 runners/cs_runner/preprocess_fema.R       | 135 +++++++++++----
 runners/cs_runner/utils.R                 | 117 +++++++++++++
 5 files changed, 405 insertions(+), 51 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3d01a85..182b0e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ inst/doc
 vignettes/cihro-data
 data
 check
-.Rapp.history
\ No newline at end of file
+.Rapp.history
+runners/cs_runner/fema_extension_plotting.R
\ No newline at end of file
diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 4d6c746..e8d0476 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -26,7 +26,7 @@ path_df <- align_files_by_vpu(
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
-  # i = 8
+   # i = 8
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -40,9 +40,7 @@ for(i in 1:nrow(path_df)) {
   vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
   vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
   
-  # fema polygons and transect lines
-  fema <- sf::read_sf(vpu_fema_file)
-  
+
   # # model attributes file and full path
   # model_attr_file <- path_df$y[i]
   # model_attr_path <- paste0(model_attr_dir, model_attr_file)
@@ -50,8 +48,10 @@ for(i in 1:nrow(path_df)) {
   message("Creating VPU ", vpu, " transects:", 
           "\n - flowpaths: '",
           nextgen_file, "'",
-           "\n - FEMA polygons: ", basename(vpu_fema_file)
+           "\n - FEMA polygons: '", 
+          basename(vpu_fema_file), "'"
           )
+  
   # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
   
   # read in nextgen data
@@ -85,6 +85,7 @@ for(i in 1:nrow(path_df)) {
       lengthkm,
       tot_drainage_areasqkm,
       bf_width,
+      mainstem,
       geometry = geom
     )
 
@@ -111,7 +112,9 @@ for(i in 1:nrow(path_df)) {
     # precision         = 1,
     add               = TRUE                           # whether to add back the original data
   )
-    
+  
+  gc()
+  
   time2 <- Sys.time()
   time_diff <- round(as.numeric(time2 - time1 ), 2)
   
@@ -121,7 +124,6 @@ for(i in 1:nrow(path_df)) {
   out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg")
   out_path <- paste0(transects_dir, out_file)
   
-  message("Saving transects to:\n - filepath: '", out_path, "'")
   
   # add cs_source column and rename cs_widths to cs_lengthm
   transects <- 
@@ -129,16 +131,177 @@ for(i in 1:nrow(path_df)) {
     dplyr::mutate(
       cs_source = net_source
     )
+  # ---------------------------------------------------------------------
+  # --- Extend transects out to FEMA 100yr floodplains
+  # ---------------------------------------------------------------------
+  message("Reading in FEMA polygons...") 
+  # fema polygons and transect lines
+  fema <- sf::read_sf(vpu_fema_file)
+  
+  # mapview::npts(fema)
+  message("Simplifying FEMA polygons...")
+  # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+  # keep 10% of the original points for speed
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01)
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01, sys = TRUE, sys_mem = 16)
+
+  # mapview::npts(fema)
+  
+  # # TODO: the flines argument needs the "hy_id" column to be named "id" 
+  # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons()
+  # flines <- 
+  #   flines %>% 
+  #   dplyr::rename(id = hy_id)
+  
+  message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+  
+  # # TODO: hacky, need to fix the extend-trancterts to polygons function to not need these columns
+  # transects <- 
+  #   transects %>% 
+  #   dplyr::rename(geom = geometry) %>%
+  #   dplyr::mutate(
+  #     is_extended = FALSE
+  #   )
+  
+  transects <- 
+    transects  %>%
+    dplyr::left_join(
+      dplyr::select(sf::st_drop_geometry(flines),
+                    hy_id,
+                    mainstem
+      ),
+      by = "hy_id" 
+    )
   
+  # system.time({
+    
   # TODO: make sure this 3000m extension distance is appropriate across VPUs 
   # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
-  transects <- hydrofabric3D::get_transect_extension_distances_to_polygons(
-                                                  transect_lines         = transects, 
-                                                  polygons               = fema, 
-                                                  flines                 = flines, 
-                                                  max_extension_distance = 3000 
-                                                  )
-                                                
+  transects <- hydrofabric3D::extend_transects_to_polygons(
+    transect_lines         = transects, 
+    polygons               = fema, 
+    flowlines              = flines, 
+    crosswalk_id           = "hy_id",
+    intersect_group_id     = "mainstem", 
+    max_extension_distance = 3000 
+  )
+  
+  # })
+  
+  message("FEMA extensions complete! - ( ", Sys.time(), " )")
+  
+  transects  <- dplyr::select(transects, -tmp_id)
+  transects  <- hydrofabric3D::add_tmp_id(transects)
+  
+  # transects <- 
+    transects3 %>%  
+    # dplyr::select(-cs_lengthm) %>% 
+    # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% 
+    dplyr::select(
+      hy_id, 
+      cs_id, 
+      cs_lengthm,
+      # cs_lengthm = new_cs_lengthm, 
+      cs_source,
+      cs_measure,
+      geometry
+      # is_extended,
+      # is_fema_extended,
+      # geometry = geom
+    )
+  
+  gc()
+   
+  # # ---------------------------------------------------------------------
+  # # ---------------------------------------------------------------------
+  # 
+  # transects <- sf::read_sf(out_path)
+  # 
+  # # flines <-
+  # #   flines %>%
+  # #   dplyr::filter(hy_id %in% transects$hy_id)
+  # #   dplyr::slice(1:1000)
+  # 
+  # flines <-
+  #   flines %>%
+  #   dplyr::slice(seq(1, nrow(flines), 10))
+  # # 
+  # transects <-
+  #   transects %>%
+  #   dplyr::filter(hy_id %in% flines$hy_id)
+  # # 
+  # 
+  # # TODO: the flines argument needs the "hy_id" column to be named "id" 
+  # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons()
+  # flines <- 
+  #   flines %>% 
+  #   dplyr::rename(id = hy_id)
+  # 
+  # 
+  #  # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01)
+  # 
+  # mapview::mapview(dplyr::filter(fema, fema_id %in% 1:10), col.regions = "red") + 
+  #   mapview::mapview(dplyr::filter(fema_keep, fema_id %in% 1:10), col.regions = "green") 
+  #   # mapview::mapview(dplyr::filter(fema_nokeep, fema_id %in% 1:10), col.regions = "dodgerblue")
+  # transects
+  # 
+  # # system.time({
+  # # profvis::profvis({
+  #   
+  # # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  # # transects2 <- hydrofabric3D::get_transect_extension_distances_to_polygons(
+  # #                                                 transect_lines         = transects, 
+  # #                                                 polygons               = fema, 
+  # #                                                 flines                 = flines, 
+  # #                                                 max_extension_distance = 3000 
+  # #                                                 )
+  # transects2 <- hydrofabric3D::extend_transects_to_polygons(
+  #   transect_lines         = transects, 
+  #   polygons               = fema, 
+  #   flowlines              = flines, 
+  #   max_extension_distance = 3000 
+  # )
+  # # })
+  # # })
+  # 
+  # # mapview::mapview(transects2, color = "green") + 
+  # # mapview::mapview(transects, color = "red") + 
+  # #    mapview::mapview(fema_keep, col.regions = "dodgerblue")
+  # # ---------------------------------------------------------------------
+  # # ----------------------------------------------------------------------------------------------------------------
+  # 
+  # transects2 <- dplyr::select(transects2, -tmp_id)
+  # transects2 <- hydrofabric3D::add_tmp_id(transects2)
+  # 
+  # extended_ids <- 
+  #   transects2 %>% 
+  #   dplyr::filter(left_is_extended | right_is_extended) %>% 
+  #   dplyr::pull(hy_id) %>% 
+  #   unique()
+  # 
+  # start_trans <- dplyr::filter(transects, hy_id %in% extended_ids[1:150])
+  #   end_trans <- dplyr::filter(transects2, hy_id %in% extended_ids[1:150])
+  # mapview::mapview(start_trans, color = "red")  + 
+  #   mapview::mapview(end_trans, color = "green")
+  # transects <- 
+  #   transects %>%  
+  #   dplyr::select(-cs_lengthm) %>% 
+  #   # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% 
+  #   dplyr::select(
+  #     hy_id, 
+  #     cs_id, 
+  #     # cs_lengthm,
+  #     cs_lengthm = new_cs_lengthm, 
+  #     cs_source,
+  #     cs_measure,
+  #     # is_extended,
+  #     # is_fema_extended,
+  #     geometry = geom
+  #   )
+  
+  message("Saving transects to:\n - filepath: '", out_path, "'")
+  
   # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
   sf::write_sf(
     # save dataset with only subset of columns to upload to S3
@@ -154,9 +317,6 @@ for(i in 1:nrow(path_df)) {
     out_path
     )
   
-  transects <- sf::read_sf(out_path)
-
-  
   # command to copy transects geopackage to S3
   copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
                    ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
@@ -169,6 +329,7 @@ for(i in 1:nrow(path_df)) {
   system(copy_to_s3, intern = TRUE)
   
   message("Overwritting local copy of transects to include 'is_extended' column...\n==========================")
+  
   # Overwrite transects with additional columns for development purposes (is_extended) to have a local copy of dataset with information about extensions
   sf::write_sf(
     dplyr::select(
@@ -184,4 +345,7 @@ for(i in 1:nrow(path_df)) {
     ),
     out_path
   )
+  
+  rm(fema, transects, flines)
+  gc()
 }
diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
index 1cae9ca..a5ac6f3 100644
--- a/runners/cs_runner/add_fema_to_transects.R
+++ b/runners/cs_runner/add_fema_to_transects.R
@@ -24,6 +24,9 @@ path_df <- align_files_by_vpu(
 
 path_df
 
+# # install.packages("devtools")
+devtools::install_github("anguswg-ucsb/hydrofabric3D")
+
 # loop over each vpu and generate cross sections, then save locally and upload to s3 bucket
 # for(i in 1:nrow(path_df)) {
   
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 52653d3..160593e 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -19,6 +19,7 @@ source("runners/cs_runner/utils.R")
 library(dplyr)
 library(sf)
 library(geos)
+library(fastmap)
 
 # -------------------------------------------------------------------------------------
 # ---- OVERWRITE_FEMA_FILES constant logical ----
@@ -108,7 +109,8 @@ FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
 # -------------------------------------------------------------------------------------
 
 for (file in FEMA_FILENAMES) {
-  
+  # message(file)
+
   local_fema_path <- paste0(FEMA_FGB_PATH, "/", file)
   
   geojson_filename     <- gsub(".fgb", ".geojson", file)
@@ -146,15 +148,25 @@ for (file in FEMA_geojson_paths) {
   # message("Fema 100 year flood plain:\n > '", file, "'")
   output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(file))
   output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename)
-  
+
   clean_geojson_exists <- file.exists(output_path)
   message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
   message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
+  # start_fema <- sf::read_sf(file)
+  
+  # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, 
+  #                            ' -simplify 0.15 visvalingam \\', 
+  #                            ' -dissolve \\', 
+  #                            ' -explode \\', 
+  #                            ' -o ', output_path
+  # )
+  
   mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, 
-                             ' -simplify 0.15 visvalingam \\', 
-                             ' -dissolve \\', 
-                             ' -explode \\', 
+                             ' -dissolve2 FLD_AR_ID \\', 
+                             ' -simplify 0.1 visvalingam \\', 
+                             # ' -explode \\',
+                             ' -snap \\',
                              ' -o ', output_path
   )
   
@@ -163,12 +175,14 @@ for (file in FEMA_geojson_paths) {
     system(mapshaper_command)
     message("Writting '", output_clean_filename, "' to: \n > '", output_path, "'")
   }
+  # end_fema <- sf::read_sf(output_path)
   
   message()
-}
+
+  }
 
 # -------------------------------------------------------------------------------------
-# ---- Convert cleaned FEMA geometries to geopackages ----
+# ---- Convert cleaned FEMA geojson geometries to geopackages ----
 # -------------------------------------------------------------------------------------
 
 # paths to FEMA 100 year flood plain files
@@ -182,8 +196,6 @@ for (file in FEMA_clean_paths) {
   
   message("Converting GEOJSON file to GPKG:\n > '", basename(file), "' > '", output_gpkg_filename, "'")
   
-  # system(ogr2ogr_command)
-  
   clean_gpkg_exists <- file.exists(output_path)
   
   message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists)
@@ -199,41 +211,97 @@ for (file in FEMA_clean_paths) {
   message()
 }
 
-# # -------------------------------------------------------------------------------------
-# # ---- Apply hydrofab::clean_geometries() to cleaned FEMA geometries  ----
-# # -------------------------------------------------------------------------------------
-# 
+# # -------------------------------------------------------------------------------------------------------------------
+# # ---- Apply final dissolve/snap and removal of internal boudnaries in FEMA geometries  ----
+# # -------------------------------------------------------------------------------------------------------------------
+
 # paths to FEMA 100 year flood plain files
 FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 
 for (file_path in FEMA_gpkg_paths) {
   message("Applying hydrofab::clean_geometry() to:\n > '", basename(file_path), "'")
+   
+  fema <- sf::read_sf(file_path)
+
+  fema <-
+    fema[!sf::st_is_empty(fema), ] %>% 
+    sf::st_transform(5070)
+ 
+  #  TODO: Snap using geos::geos_snap()
+  # fema <-
+  #   geos::geos_snap(
+  #     geos::as_geos_geometry(fema),
+  #     geos::as_geos_geometry(fema),
+  #     tolerance = 1
+  #     ) %>%
+  #   geos::geos_make_valid()  %>%
+  #   sf::st_as_sf()
+  
+  # TODO: we get this error when trying to use the geometry column after geos snapping
+  # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]."
+  # fema %>%
+    # sf::st_cast("POLYGON")
+
+  # TODO: Snap using sf::st_snap()
+  # fema <- sf::st_snap(
+  #             fema,
+  #             fema,
+  #             tolerance = 2
+  #             )
   
   fema <-
-    file_path %>%
-    sf::read_sf() %>%
-    sf::st_transform(5070) %>%
-    sf::st_cast("POLYGON") %>%
+    fema %>% 
+    # fema[!sf::st_is_empty(fema), ] %>% 
+    dplyr::select(geometry = geom) %>%
+    add_predicate_group_id(sf::st_intersects) %>% 
+    sf::st_make_valid() %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-group_id) %>% 
+    add_predicate_group_id(sf::st_intersects) %>% 
+    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
     dplyr::mutate(
-      fema_id = 1:dplyr::n()
-    ) %>%
-    dplyr::select(fema_id, geometry = geom)
-
-  message(" > ", nrow(fema), " POLYGONs")
-  message("Start time: ", Sys.time())
-
-  fema_clean <- hydrofab::clean_geometry(
-    catchments = fema,
-    ID         = "fema_id"
-    )
-  
-  fema_clean <-
-    fema_clean %>%
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(fema_id, geometry)
+  
+  # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
+  # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
+  # mapview::mapview(start_fema$geom, color = "red", col.regions = "red") + 
+  #   mapview::mapview(end_fema$geom, color = 'limegreen', col.regions = "limegreen") + 
+  #   mapview::mapview(snap_union_sf, color = 'gold', col.regions = "gold") + 
+  # mapview::mapview(final_fema, color = 'white', col.regions = "white") + 
+  # mapview::mapview(fin, color = 'white', col.regions = "white") 
+
+  # message(" > ", nrow(fema), " POLYGONs")
+  # message("Start time: ", Sys.time())
+  # 
+  # fema_clean <- hydrofab::clean_geometry(
+  #   catchments = fema,
+  #   ID         = "fema_id"
+  #   )
+  # 
+  # fema_clean <-
+  #   fema_clean %>%
+  #   dplyr::mutate(
+  #     source = basename(file_path),
+  #     state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+  #   ) %>%
+  #   dplyr::select(fema_id, source, state, areasqkm, geometry)
+  
+  fema <- 
+    fema %>% 
     dplyr::mutate(
       source = basename(file_path),
       state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
     ) %>%
-    dplyr::select(fema_id, source, state, areasqkm, geometry)
+    dplyr::select(fema_id, source, state, 
+                  # areasqkm, 
+                  geometry)
 
   message("End time: ", Sys.time())
   
@@ -245,7 +313,8 @@ for (file_path in FEMA_gpkg_paths) {
   if (OVERWRITE_FEMA_FILES) {
     message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
     sf::write_sf(
-      fema_clean,
+      # fema_clean,
+      fema,
       file_path
     )
   }
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
index ae40b70..8bfd145 100644
--- a/runners/cs_runner/utils.R
+++ b/runners/cs_runner/utils.R
@@ -26,3 +26,120 @@ polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) {
   
   return(polygons_with_lines)
 }
+
+add_predicate_group_id <- function(polys, predicate) {
+  # GROUP BY SPATIAL PREDICATES
+  # ----------------------------------------- 
+  # predicate = sf::st_touches
+  # polys <- sf_df
+  # ----------------------------------------- 
+  
+  
+  relations <- predicate(polys)
+  
+  relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) })
+  
+  group_ids_map <- fastmap::fastmap()
+  ids_to_groups <- fastmap::fastmap()
+  
+  group_id <- 0
+  
+  for (i in seq_along(relations)) {
+    
+    predicate_ids <- relations[i][[1]]
+    
+    # message("(", i, ") - ", predicate_ids)
+    # message("Start Group ID: ", group_id)
+    
+    id_group_check <- ids_to_groups$has(predicate_ids)
+    
+    if(any(id_group_check)) {
+      
+      known_groups  <- ids_to_groups$mget(predicate_ids)
+      known_group   <- known_groups[unname(sapply(known_groups , function(kg) {
+        !is.null(kg)
+      }))][[1]]
+      
+      # message("IDs part of past group ID > '", known_group, "'")
+      
+      past_group_ids     <- group_ids_map$get(known_group)[[1]]
+      updated_group_ids  <- as.character(
+        sort(as.numeric(unique(c(past_group_ids, predicate_ids))))
+      )
+      
+      group_ids_map$set(known_group, list(updated_group_ids))
+      
+      new_ids <- predicate_ids[!predicate_ids %in% past_group_ids]
+      
+      # message("Adding ", new_ids, " to seen set...")
+      
+      # add any newly added IDs to the seen map
+      for (seen_id in new_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+      
+    } else {
+      # get a new group ID number
+      group_id <- group_id + 1    
+      # message("IDs form NEW group > '", group_id, "'")
+      
+      # create a new key in the map with the predicate IDs list as the value
+      group_ids_map$set(as.character(group_id), list(predicate_ids))
+      
+      # message("Adding ", predicate_ids, " to seen set...")
+      
+      # add each predicate ID to the map storing the seen indexes and their respecitve group IDs 
+      for (seen_id in predicate_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+    }
+    # message("End group ID: ", group_id, "\n") 
+  }
+  
+  group_ids   <- group_ids_map$as_list() 
+  
+  grouping_df <- lapply(seq_along(group_ids), function(i) {
+    # i = 2
+    grouping  <- group_ids[i] 
+    group_id  <- names(grouping)
+    indices   <- grouping[[1]][[1]]
+    
+    data.frame(
+      index      = as.numeric(indices),
+      group_id   = rep(group_id, length(indices))   
+    )
+    
+  }) %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::arrange(i) 
+  
+  # count up the number of IDs for each group, well use this to determine which group 
+  # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members)
+  group_id_counts <- 
+    grouping_df %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::count() %>% 
+    # dplyr::arrange(-n) %>% 
+    dplyr::ungroup()
+  
+  # select the IDs with the most other members
+  grouping_df <- 
+    grouping_df %>% 
+    dplyr::left_join(
+      group_id_counts, 
+      by = 'group_id'
+    ) %>% 
+    dplyr::group_by(index) %>% 
+    dplyr::slice_max(n, with_ties = FALSE) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-n) %>% 
+    dplyr::arrange(-index) 
+  
+  polys$group_id <- grouping_df$group_id
+  
+  return(polys)
+  
+}
+

From fcedf0759b75ef61b8253935d405383b64c9fd5f Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 12 Jul 2024 08:42:05 -0700
Subject: [PATCH 41/64] reworking processing of fema FGBs to improve transect
 extensions

---
 runners/cs_runner/preprocess_fema.R | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 160593e..1498151 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -20,6 +20,10 @@ library(dplyr)
 library(sf)
 library(geos)
 library(fastmap)
+library(nngeo)
+
+# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop
+# TODO: Delete old files as needed
 
 # -------------------------------------------------------------------------------------
 # ---- OVERWRITE_FEMA_FILES constant logical ----
@@ -330,9 +334,8 @@ for (file_path in FEMA_gpkg_paths) {
 FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 
 # paths to nextgen datasets and model attribute parquet files
-NEXTGEN_FILENAMES  <- list.files(nextgen_dir, full.names = FALSE)
-NEXTGEN_FILE_PATHS <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
-# OVERWRITE_FEMA_FILES
+NEXTGEN_FILENAMES    <- list.files(nextgen_dir, full.names = FALSE)
+NEXTGEN_FILE_PATHS   <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
 
 for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   fema_file <- basename(file_path)
@@ -371,8 +374,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
         dplyr::mutate(
           vpu = vpu
         ) %>%
-        dplyr::select(vpu, fema_id, source, state,
-                      areasqkm, geom)
+        dplyr::select(vpu, fema_id, source, state, geom)
       
       # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
 
@@ -409,14 +411,11 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
 
 DELETE_STAGING_GPKGS <- FALSE 
 
-# FEMA_VPU_SUBFOLDERS
-
 for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
-# for (i in 1:4) {
-  vpu_dir = FEMA_VPU_SUBFOLDERS[i]
+ # for (i in 1:4) {
+  # vpu_dir = FEMA_VPU_SUBFOLDERS[i]
   message("Merging files in '", basename(vpu_dir), "' directory...")
-  
-# }
+ # }
   
   # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06'
   vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)

From e1c66212cf01f54efe3c750f5d4350fa546126a2 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 12 Jul 2024 14:17:48 -0700
Subject: [PATCH 42/64] wip on removing wholes but not losing many polygons

---
 runners/cs_runner/preprocess_fema.R | 83 ++++++++++++++++++++++-------
 1 file changed, 65 insertions(+), 18 deletions(-)

diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 1498151..b45948b 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -473,10 +473,9 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
 # -------------------------------------------------------------------------------------
 
 MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged")
-
+MERGED_DIRS
 for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   # i = 8
-  # i
   vpu_dir = FEMA_VPU_SUBFOLDERS[i]
   
   VPU        <- basename(vpu_dir)
@@ -502,22 +501,50 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)]
 
   fema_vpu <- sf::read_sf(fema_vpu_file)
-  
-  # fema_vpu
-  
-  # fema_ids <- c(695)
   # fema_vpu <-
   #   fema_vpu %>%
   #   dplyr::group_by(source) %>%
   #   dplyr::summarise()  %>%
   #   dplyr::ungroup()
-  # fema_vpu
+ 
+   mapview::npts(fema_vpu)
+  fema_vpu2 <- 
+    fema_vpu %>% 
+    nngeo::st_remove_holes(max_area = 20) %>%
+    # dplyr::select(geometry = geom) %>%
+    add_predicate_group_id(sf::st_intersects) %>% 
+    sf::st_make_valid() %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-group_id) %>%
+    add_predicate_group_id(sf::st_intersects) %>%
+    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
+    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
+    dplyr::mutate(
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(fema_id, geometry)
+  
+  fema_vpu2 %>% mapview::npts()
+  
+  fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
   
-  # fema_snapped <- sf::st_snap(fema_vpu, fema_vpu, tolerance = 10)
-  # sf::st_
+  mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') +
+    mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
+      # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
+    
   # message("Removing holes before dissolve...")
   fema_vpu <- nngeo::st_remove_holes(fema_vpu)
-  # 
+  mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') +
+  mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') 
+  # mapview::npts(fema_vpu)
+  all(sf::st_is_valid(fema_vpu))
+  rmapshaper::ms_innerlines(fema_vpu)
+  
+  
   # message("Making valid geometries...")
   # fema_vpu <- sf::st_make_valid(fema_vpu) 
   
@@ -527,8 +554,8 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   
   message("Dissolving...")
   
-  # 2633 = old number of polygons
-  fema_vpu <- rmapshaper::ms_dissolve(
+  # 1421 = old number of polygons
+  fema_vpu2 <- rmapshaper::ms_dissolve(
                             input   = fema_vpu,
                             field   = "source",
                             sys     = TRUE,
@@ -536,18 +563,38 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
                             )
   
   message("Exploding...")
+  
   # mapview::npts(fema_vpu)
-  # mapview::npts(fema_vpu_dissolve)
-  fema_vpu <- rmapshaper::ms_explode(
-                            input   = fema_vpu,     
+  # mapview::npts(fema_vpu2)
+  
+  fema_vpu2 <- rmapshaper::ms_explode(
+                            input   = fema_vpu2,     
                             sys     = TRUE,
                             sys_mem = 16
                             )
-  # mapview::npts(fema_exp)
+  # mapview::npts(fema_vpu2)
   message("Removing holes after explosion...")
-  fema_vpu <- nngeo::st_remove_holes(fema_vpu)
-  # mapview::npts(fema_exp_noholes)
+  fema_vpu2 <- nngeo::st_remove_holes(fema_vpu2)
   
+  fema_vpu2 <- 
+    fema_vpu2 %>% 
+    add_predicate_group_id(sf::st_intersects) %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    )
+  
+  mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') +
+    mapview::mapview(fema_vpu2[1:00, ], color = 'green', col.regions = 'white') 
+  
+  # mapview::npts(fema_vpu2)
+  sf::st_is_valid(fema_vpu2) %>% all()
+  
+  fema_vpu2 %>% 
+    sf::st_make_valid() %>% 
+    sf::st_geometry_type() %>% 
+    unique()
+  sf::st_geometry_type(fema_vpu2) %>% unique()
   # slice_subset = 1:50
   # fema_exp_noholes[slice_subset, ]
   # mapview::mapview(  fema_vpu[1:100, ], col.regions = "dodgerblue")+ 

From 42ad438ca6a2a24dfebea732ae286b1f7c96ae2d Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 16 Jul 2024 13:45:52 -0700
Subject: [PATCH 43/64] replaced preprocess fema with new version which better
 resolves internal boundaries and leaves fewer islands/holes

---
 runners/cs_runner/preprocess_fema.R  | 489 +++++++++-----------------
 runners/cs_runner/preprocess_fema2.R | 504 +++++++++++++++++++++++++++
 2 files changed, 666 insertions(+), 327 deletions(-)
 create mode 100644 runners/cs_runner/preprocess_fema2.R

diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index b45948b..1e1cba5 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -32,24 +32,13 @@ library(nngeo)
 # -------------------------------------------------------------------------------------
 
 # Default is TRUE (i.e. a fresh processing run is done from start to finish)
-OVERWRITE_FEMA_FILES <- TRUE
+OVERWRITE_FEMA_FILES  <- TRUE
+DELETE_STAGING_GPKGS  <- TRUE
 
 # -------------------------------------------------------------------------------------
 # ---- Create directories (if they do NOT exist) ----
 # -------------------------------------------------------------------------------------
 
-# create geojsons directory (if not exists) 
-if (!dir.exists(FEMA_GEOJSON_PATH)) {
-  message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'"))
-  dir.create(FEMA_GEOJSON_PATH)
-}
-
-# create directory for cleaned FEMA geometries (if not exists) 
-if (!dir.exists(FEMA_CLEAN_PATH)) {
-  message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'"))
-  dir.create(FEMA_CLEAN_PATH)
-}
-
 # create directory for cleaned FEMA geometries as geopackages (if not exists) 
 if (!dir.exists(FEMA_GPKG_PATH)) {
   message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
@@ -66,171 +55,132 @@ for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) {
   # create directory for FEMA geomteries by VPU
   # message(VPU_SUBFOLDER)
   
-  state_dir  = paste0(VPU_SUBFOLDER, "/states/")
-  merged_dir = paste0(VPU_SUBFOLDER, "/merged/")
+  # state_dir  = paste0(VPU_SUBFOLDER, "/states/")
+  # merged_dir = paste0(VPU_SUBFOLDER, "/merged/")
   
   if (!dir.exists(VPU_SUBFOLDER)) {
     message("Creating FEMA VPU subfolder...")
     message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n  Creating directory:\n > '", VPU_SUBFOLDER, "'"))
     dir.create(VPU_SUBFOLDER)
   }
-  
-  if (!dir.exists(state_dir)) { 
-      message("Creating FEMA VPU states subfolder...")
-      message(paste0("'/", basename(state_dir), "' directory does not exist...\n  Creating directory:\n > '", state_dir, "'"))
-      
-      dir.create(state_dir)
-     
-     }
-  
-  if (!dir.exists(merged_dir)) { 
-    message("Creating FEMA VPU merged subfolder...")
-    message(paste0("'/", basename(merged_dir), "' directory does not exist...\n  Creating directory:\n > '", merged_dir, "'"))
-    
-    dir.create(merged_dir)
-    
-    }
-}
-
-# FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
-
-
-# create FEMA GPKG Bounding Boxes directory (if not exists)
-if (!dir.exists(FEMA_GPKG_BB_PATH)) {
-  message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
-  dir.create(FEMA_GPKG_BB_PATH)
+  # if (!dir.exists(state_dir)) { 
+  #   message("Creating FEMA VPU states subfolder...")
+  #   message(paste0("'/", basename(state_dir), "' directory does not exist...\n  Creating directory:\n > '", state_dir, "'"))
+  #   dir.create(state_dir)
+  # }
+  # if (!dir.exists(merged_dir)) { 
+  #   message("Creating FEMA VPU merged subfolder...")
+  #   message(paste0("'/", basename(merged_dir), "' directory does not exist...\n  Creating directory:\n > '", merged_dir, "'"))
+  #   dir.create(merged_dir)
+  # }
 }
 
 # -------------------------------------------------------------------------------------
 # ---- Get paths to downloaded FEMA 100 FGBs ----
 # -------------------------------------------------------------------------------------
 
-FEMA_FILENAMES  <- list.files(FEMA_FGB_PATH, full.names = FALSE)
-FEMA_FILE_PATHS <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
-
-# -------------------------------------------------------------------------------------
-# ---- Run ogr2ogr to get FGB files into geojson ----
-# -------------------------------------------------------------------------------------
+FEMA_FILENAMES        <- list.files(FEMA_FGB_PATH, full.names = FALSE)
+FEMA_FILE_PATHS       <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
 
 for (file in FEMA_FILENAMES) {
-  # message(file)
-
-  local_fema_path <- paste0(FEMA_FGB_PATH, "/", file)
   
-  geojson_filename     <- gsub(".fgb", ".geojson", file)
-  geojson_save_path    <- paste0(FEMA_GEOJSON_PATH, "/", geojson_filename)
+  STAGING_FILES_TO_DELETE <- c()
+  
+  # Convert FGB to GeoJSON
+  local_fema_path   <- paste0(FEMA_FGB_PATH, "/", file)
+  geojson_filename  <- gsub(".fgb", ".geojson", file)
+  geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename)
   
   message("FEMA filename: '", file, "'")
   message("Converting \n > '", file, "' to geojson '", geojson_filename, "'")
   
-  geojson_exists <- file.exists(geojson_save_path)
+  geojson_exists  <- file.exists(geojson_save_path)
   
   message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
   message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
-  # ogr2ogr command converting FGBs to GEOJSON for mapshaper processing
-  ogr2ogr_command = paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
+  ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
   
-  if (OVERWRITE_FEMA_FILES) {
+  if (OVERWRITE_FEMA_FILES || !geojson_exists) {
     system(ogr2ogr_command)
-    message("Writting '", geojson_filename, "' to: \n > '", geojson_save_path, "'")
+    message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path)
   }
   
-  message()
-}  
-
-# -------------------------------------------------------------------------------------
-# ---- Clean FEMA geometries (Simplify, Dissolve, Explode) ----
-# -------------------------------------------------------------------------------------
-
-# paths to FEMA 100 year flood plain files
-FEMA_geojson_paths      <- list.files(FEMA_GEOJSON_PATH, full.names = TRUE)
-
-for (file in FEMA_geojson_paths) {
+  # Clean GeoJSON
+  message("Simplify, dissolve, explode > '", geojson_filename, "'")
+  output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename)
+  output_clean_geojson_path     <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename)
   
-  message("Simplify, dissolve, explode > '", basename(file), "'")
-  # message("Fema 100 year flood plain:\n > '", file, "'")
-  output_clean_filename <- gsub(".geojson", "_clean.geojson", basename(file))
-  output_path <- paste0(FEMA_CLEAN_PATH, "/", output_clean_filename)
-
-  clean_geojson_exists <- file.exists(output_path)
+  clean_geojson_exists  <- file.exists(output_clean_geojson_path)
   message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
   message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
-  # start_fema <- sf::read_sf(file)
-  
-  # mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, 
-  #                            ' -simplify 0.15 visvalingam \\', 
-  #                            ' -dissolve \\', 
-  #                            ' -explode \\', 
-  #                            ' -o ', output_path
-  # )
-  
-  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', file, 
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, 
                              ' -dissolve2 FLD_AR_ID \\', 
                              ' -simplify 0.1 visvalingam \\', 
-                             # ' -explode \\',
                              ' -snap \\',
-                             ' -o ', output_path
+                             ' -o ', output_clean_geojson_path
   )
   
-  if (OVERWRITE_FEMA_FILES) {
+  
+  if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) {
     message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
     system(mapshaper_command)
-    message("Writting '", output_clean_filename, "' to: \n > '", output_path, "'")
+    message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
   }
-  # end_fema <- sf::read_sf(output_path)
   
-  message()
-
-  }
-
-# -------------------------------------------------------------------------------------
-# ---- Convert cleaned FEMA geojson geometries to geopackages ----
-# -------------------------------------------------------------------------------------
-
-# paths to FEMA 100 year flood plain files
-FEMA_clean_paths      <- list.files(FEMA_CLEAN_PATH, full.names = TRUE)
-
-for (file in FEMA_clean_paths) {
-  message("Fema 100 year flood plain:\n > '", basename(file), "'")
-  
-  output_gpkg_filename <- gsub("_clean.geojson", "_clean.gpkg", basename(file))
-  output_path          <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+  # Convert cleaned GeoJSON to GeoPackage
+  message("Fema 100 year flood plain:\n > '", output_clean_filename, "'")
   
-  message("Converting GEOJSON file to GPKG:\n > '", basename(file), "' > '", output_gpkg_filename, "'")
+  output_gpkg_filename  <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename)
+  output_gpkg_path      <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
   
-  clean_gpkg_exists <- file.exists(output_path)
+  message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'")
   
+  clean_gpkg_exists <- file.exists(output_gpkg_path)
   message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists)
   message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
-  ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file)
-  # ogr2ogr_command = paste0("ogr2ogr -nlt MULTIPOLYGON ", output_path, " ", file)
+  ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path)
   
-  if (OVERWRITE_FEMA_FILES) {
+  if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) {
     system(ogr2ogr_command)
-    message("Writting '", output_gpkg_filename, "' to: \n > '", output_path, "'")
+    message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'")
+  }
+  
+  message("Deleting intermediary files\n")
+  for (delete_file in STAGING_FILES_TO_DELETE) {
+    if (file.exists(delete_file)) {
+      message("Deleting >>> '", delete_file, "'")
+      file.remove(delete_file)
+    }
+    
   }
+  
   message()
+  
 }
 
-# # -------------------------------------------------------------------------------------------------------------------
-# # ---- Apply final dissolve/snap and removal of internal boudnaries in FEMA geometries  ----
-# # -------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------------------------------
+# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
+# -------------------------------------------------------------------------------------------------------------------
 
 # paths to FEMA 100 year flood plain files
 FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 
 for (file_path in FEMA_gpkg_paths) {
-  message("Applying hydrofab::clean_geometry() to:\n > '", basename(file_path), "'")
-   
+  message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
+  
   fema <- sf::read_sf(file_path)
-
+  
   fema <-
     fema[!sf::st_is_empty(fema), ] %>% 
     sf::st_transform(5070)
- 
+  
   #  TODO: Snap using geos::geos_snap()
   # fema <-
   #   geos::geos_snap(
@@ -244,8 +194,8 @@ for (file_path in FEMA_gpkg_paths) {
   # TODO: we get this error when trying to use the geometry column after geos snapping
   # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]."
   # fema %>%
-    # sf::st_cast("POLYGON")
-
+  # sf::st_cast("POLYGON")
+  
   # TODO: Snap using sf::st_snap()
   # fema <- sf::st_snap(
   #             fema,
@@ -275,27 +225,6 @@ for (file_path in FEMA_gpkg_paths) {
   
   # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
   # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
-  # mapview::mapview(start_fema$geom, color = "red", col.regions = "red") + 
-  #   mapview::mapview(end_fema$geom, color = 'limegreen', col.regions = "limegreen") + 
-  #   mapview::mapview(snap_union_sf, color = 'gold', col.regions = "gold") + 
-  # mapview::mapview(final_fema, color = 'white', col.regions = "white") + 
-  # mapview::mapview(fin, color = 'white', col.regions = "white") 
-
-  # message(" > ", nrow(fema), " POLYGONs")
-  # message("Start time: ", Sys.time())
-  # 
-  # fema_clean <- hydrofab::clean_geometry(
-  #   catchments = fema,
-  #   ID         = "fema_id"
-  #   )
-  # 
-  # fema_clean <-
-  #   fema_clean %>%
-  #   dplyr::mutate(
-  #     source = basename(file_path),
-  #     state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
-  #   ) %>%
-  #   dplyr::select(fema_id, source, state, areasqkm, geometry)
   
   fema <- 
     fema %>% 
@@ -306,13 +235,8 @@ for (file_path in FEMA_gpkg_paths) {
     dplyr::select(fema_id, source, state, 
                   # areasqkm, 
                   geometry)
-
-  message("End time: ", Sys.time())
   
-  # geom_diff <- sf::st_difference(fema[1, ], fema_clean[1, ])
-  # mapview::mapview(fema[1, ], col.regions = "red") +
-  # mapview::mapview(fema_clean[1, ], col.regions = "green") +
-  # mapview::mapview(geom_diff, col.regions = "white")
+  message("End time: ", Sys.time())
   
   if (OVERWRITE_FEMA_FILES) {
     message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
@@ -326,9 +250,9 @@ for (file_path in FEMA_gpkg_paths) {
   
 }
 
-# # -------------------------------------------------------------------------------------
-# # ---- Partion parts of each FEMA GPKGs to the a Nextgen VPU ---- 
-# # -------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- 
+# -------------------------------------------------------------------------------------
 
 # Clean FEMA GPKG files
 FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
@@ -338,7 +262,9 @@ NEXTGEN_FILENAMES    <- list.files(nextgen_dir, full.names = FALSE)
 NEXTGEN_FILE_PATHS   <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
 
 for (file_path in FEMA_CLEAN_GPKG_PATHS) {
+  
   fema_file <- basename(file_path)
+  
   message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'")
   
   # read in fema polygons
@@ -354,19 +280,21 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     
     # read in nextgen flowlines 
     flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-
+    
     # get the FEMA polygons that intersect with the nextgen flowlines
     fema_intersect <- polygons_with_line_intersects(fema, flines)
-
+    
     fema_in_nextgen <-  nrow(fema_intersect) != 0
     
     message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
-
+    
     if(fema_in_nextgen) {
       
       # create filepaths
       vpu_subfolder      <- paste0("VPU_", vpu)
-      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
+      # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
+      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
+      
       # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
       
       fema_intersect <-
@@ -377,11 +305,11 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
         dplyr::select(vpu, fema_id, source, state, geom)
       
       # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
-
+      
       fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
       fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
       
-
+      
       if (OVERWRITE_FEMA_FILES) {
         message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
         
@@ -390,8 +318,8 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
           fema_vpu_path
         )
       }
-
-        
+      
+      
     }
     message()
   }
@@ -401,7 +329,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     "--------------------------------------------------------------\n", 
     "Completed all VPU intersections for: \n > '", fema_file, "'",
     "\n--------------------------------------------------------------\n"
-    )
+  )
   
 }
 
@@ -409,28 +337,26 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
 # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
 # -------------------------------------------------------------------------------------
 
-DELETE_STAGING_GPKGS <- FALSE 
+DELETE_STAGING_GPKGS <- F
 
 for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
- # for (i in 1:4) {
-  # vpu_dir = FEMA_VPU_SUBFOLDERS[i]
+  # for (i in 1:4) {
+  # vpu_dir = FEMA_VPU_SUBFOLDERS2[12]
   message("Merging files in '", basename(vpu_dir), "' directory...")
- # }
+  # }
   
-  # vpu_dir <- '/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_06'
   vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
   
-  states_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/states"), vpu_subdirs)]
-  merged_dir <- vpu_subdirs[grepl(paste0(vpu_dir, "/merged"), vpu_subdirs)]
+  # path to the merged directory where the final merged geopackge will end up
+  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_gpkg_name  <- paste0(master_name, ".gpkg")
+  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
   
   # fema state geopackages partioned for the specific VPU
-  fema_state_gpkgs <- list.files(states_dir, full.names = TRUE)
+  fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE)
   
-  master_name      <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
-  master_gpkg_name <- paste0(master_name, ".gpkg")
-  
-  # path to the merged directory where the final merged geopackge will end up
-  master_filepath  <- paste0(merged_dir, "/", master_gpkg_name)
+  # make sure to ignore the master file if it already exists
+  fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath]
   
   for(gpkg_file in fema_state_gpkgs) {
     # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
@@ -443,15 +369,17 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
     ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",   
                                     master_filepath, 
                                     " ", gpkg_file
-                                    )
+    )
     
     if (OVERWRITE_FEMA_FILES) {
       system(ogr2ogr_merge_command)
     }
   }
   
-  if(DELETE_STAGING_GPKGS) {
-    message(" - Deleting individual gpkgs from '/states' directory...")
+  has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0
+  
+  if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) {
+    message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...")
     # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'")
     
     remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
@@ -472,198 +400,105 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
 # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
 # -------------------------------------------------------------------------------------
 
-MERGED_DIRS <- paste0(FEMA_VPU_SUBFOLDERS, "/merged")
-MERGED_DIRS
 for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
-  # i = 8
-  vpu_dir = FEMA_VPU_SUBFOLDERS[i]
-  
+  vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
   VPU        <- basename(vpu_dir)
   
-  message("Attempting to union FEMA polygons for '", VPU, "'...")
+  message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
   
-  merged_dir <- paste0(vpu_dir, "/merged")
-  fema_vpu_file <- list.files(merged_dir, full.names = TRUE)
+  # path to the merged directory where the final merged geopackage will end up
+  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_gpkg_name  <- paste0(master_name, ".gpkg")
+  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
   
-  has_fema_vpu_file <- ifelse(length(fema_vpu_file) > 0, TRUE, FALSE)
-  # has_fema_vpu_file
-  # message()
-  # fema_vpu_file
-# }
-  if(!has_fema_vpu_file) { 
+  message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
+  
+  if(!file.exists(master_filepath)) { 
     message("No FEMA geometries in '", VPU, "'")
     message()
     next
   }
-
-  message("> Re-unioning and re-exploding geometries in '", basename(fema_vpu_file), "'")
   
-  fema_vpu_file <- fema_vpu_file[!grepl("_union.gpkg", fema_vpu_file)]
-
-  fema_vpu <- sf::read_sf(fema_vpu_file)
-  # fema_vpu <-
-  #   fema_vpu %>%
-  #   dplyr::group_by(source) %>%
-  #   dplyr::summarise()  %>%
-  #   dplyr::ungroup()
- 
-   mapview::npts(fema_vpu)
-  fema_vpu2 <- 
+  
+  fema_vpu <- sf::read_sf(master_filepath)
+  
+  # fema_vpu %>% sf::st_geometry_type() %>% unique()
+  
+  fema_vpu <- 
     fema_vpu %>% 
-    nngeo::st_remove_holes(max_area = 20) %>%
+    nngeo::st_remove_holes(max_area = 200) %>%
     # dplyr::select(geometry = geom) %>%
     add_predicate_group_id(sf::st_intersects) %>% 
-    sf::st_make_valid() %>% 
+    sf::st_make_valid() %>%
     dplyr::group_by(group_id) %>% 
     dplyr::summarise(
       geometry = sf::st_combine(sf::st_union(geometry))
     ) %>% 
     dplyr::ungroup() %>% 
     dplyr::select(-group_id) %>%
-    add_predicate_group_id(sf::st_intersects) %>%
+    add_predicate_group_id(sf::st_intersects)
+  
+  geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+  
+  message("Geometry counts before casting all geometries to MULTIPOLYGON:")
+  for (g in seq_along(geom_type_counts)) {
+    message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
+  }
+  
+  message("Keeping only POLYGON and MULTIPOLYGON geometries...")
+  fema_vpu <- 
+    fema_vpu %>%
+    dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
+    sf::st_cast("MULTIPOLYGON") %>%
+    sf::st_make_valid() %>%
+    # dplyr::group_by(group_id) %>% 
     rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
     rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
+    nngeo::st_remove_holes(max_area = 200) %>% 
     dplyr::mutate(
       fema_id = as.character(1:dplyr::n())
     ) %>% 
     dplyr::select(fema_id, geometry)
   
-  fema_vpu2 %>% mapview::npts()
-  
-  fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
-  
-  mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') +
-    mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
-      # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
-    
-  # message("Removing holes before dissolve...")
-  fema_vpu <- nngeo::st_remove_holes(fema_vpu)
-  mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') +
-  mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white') 
-  # mapview::npts(fema_vpu)
-  all(sf::st_is_valid(fema_vpu))
-  rmapshaper::ms_innerlines(fema_vpu)
-  
-  
-  # message("Making valid geometries...")
-  # fema_vpu <- sf::st_make_valid(fema_vpu) 
-  
-  # fema_vpu <- 
-  #   fema_vpu %>% 
-  #   sf::st_cast("MULTIPOLYGON")
-  
-  message("Dissolving...")
-  
-  # 1421 = old number of polygons
-  fema_vpu2 <- rmapshaper::ms_dissolve(
-                            input   = fema_vpu,
-                            field   = "source",
-                            sys     = TRUE,
-                            sys_mem = 16
-                            )
-  
-  message("Exploding...")
-  
-  # mapview::npts(fema_vpu)
-  # mapview::npts(fema_vpu2)
-  
-  fema_vpu2 <- rmapshaper::ms_explode(
-                            input   = fema_vpu2,     
-                            sys     = TRUE,
-                            sys_mem = 16
-                            )
-  # mapview::npts(fema_vpu2)
-  message("Removing holes after explosion...")
-  fema_vpu2 <- nngeo::st_remove_holes(fema_vpu2)
-  
-  fema_vpu2 <- 
-    fema_vpu2 %>% 
-    add_predicate_group_id(sf::st_intersects) %>% 
-    dplyr::group_by(group_id) %>% 
-    dplyr::summarise(
-      geometry = sf::st_combine(sf::st_union(geometry))
-    )
+  # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+  # message("Geometry counts after all processing steps: ")
+  # for (g in seq_along(end_geom_type_counts)) {
+  #   message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g])
+  # }
   
-  mapview::mapview(fema_vpu[1:100, ], color = 'red', col.regions = 'white') +
-    mapview::mapview(fema_vpu2[1:00, ], color = 'green', col.regions = 'white') 
+  # fema_vpu2 %>% mapview::npts()
+  # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
   
-  # mapview::npts(fema_vpu2)
-  sf::st_is_valid(fema_vpu2) %>% all()
+  # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
+  #       mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white')
+  #   # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
+  # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
   
-  fema_vpu2 %>% 
-    sf::st_make_valid() %>% 
-    sf::st_geometry_type() %>% 
-    unique()
-  sf::st_geometry_type(fema_vpu2) %>% unique()
-  # slice_subset = 1:50
-  # fema_exp_noholes[slice_subset, ]
-  # mapview::mapview(  fema_vpu[1:100, ], col.regions = "dodgerblue")+ 
-  # mapview::mapview(  fema_exp[slice_subset, ], col.regions = "red") +
-  #   mapview::mapview(  fema_exp_noholes[slice_subset, ], col.regions = "green")
-  # fema_vpu <- rmapshaper::ms_dissolve(fema_vpu,
-  #                                    field = "source",
-  #                                    sys = TRUE,
-  #                                    sys_mem = 16
-  # # )
-  # fema_vpu  <- rmapshaper::ms_explode(fema_vpu,     
-  #                                     sys = TRUE,
-  #                                     sys_mem = 16)
-
   fema_vpu <-
     fema_vpu %>%
     # dplyr::group_by(source) %>%
     dplyr::mutate(
-      state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
+      # state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
       vpu      = gsub("VPU_", "", VPU),
       fema_id  = 1:dplyr::n()
-      ) %>%
+    ) %>%
     dplyr::ungroup() %>%
-    dplyr::select(vpu, fema_id, state, geom = geometry)
+    dplyr::select(vpu, fema_id, 
+                  # state, 
+                  geom = geometry)
   
   if (OVERWRITE_FEMA_FILES) {
-    union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
-    message("> Writting '", basename(union_file_path), "' (unioned and exploded version)")
+    
+    message("> Overwritting '", basename(master_filepath), "' with final clean version...")
+    
+    # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
+    # message("> writting '", basename(union_file_path), "' (unioned and exploded version)")
+    
     sf::write_sf(
       fema_vpu,
-      union_file_path
-      )
+      master_filepath
+      # union_file_path
+    )
   }
   message()
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Generate bounding box gpkg for each FEMA FGB ----
-# -------------------------------------------------------------------------------------
-
-for (key in FEMA_FILENAMES) {
- 
-  local_fema_path <- paste0(FEMA_FGB_PATH, "/", key)
-  
-  gpkg_filename   <- gsub(".fgb", "_bb.gpkg", key)
-  bb_save_path    <- paste0(FEMA_FGB_BB_PATH, "/", gpkg_filename)
-  
-  message("S3 Key: '", key, "'")
-  message("Local FEMA file:\n > '", local_fema_path, "'")
-  message("Local output FEMA bounding box file:\n > '", bb_save_path, "'")
-  
-  # fema <- sf::read_sf(local_fema_path)
-  
-  fema_bb <- 
-    local_fema_path %>% 
-    sf::read_sf() %>% 
-    sf::st_bbox() %>% 
-    sf::st_as_sfc() %>% 
-    sf::st_as_sf() %>% 
-    dplyr::mutate(
-      fema_fgb      = key,
-      fema_fgb_path = local_fema_path,
-      state         = gsub("-100yr-flood_valid.fgb", "", key)
-    ) %>% 
-    dplyr::select(fema_fgb, fema_fgb_path, state, geometry = x) %>% 
-    sf::st_transform(5070)
-  
-  message("Saving FEMA bounding box file:\n > '", bb_save_path, "'")
-  
-  sf::write_sf(fema_bb, bb_save_path)
-  message()
-}  
+}
\ No newline at end of file
diff --git a/runners/cs_runner/preprocess_fema2.R b/runners/cs_runner/preprocess_fema2.R
new file mode 100644
index 0000000..2141946
--- /dev/null
+++ b/runners/cs_runner/preprocess_fema2.R
@@ -0,0 +1,504 @@
+# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3
+# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages 
+
+# Processing steps:
+# - Convert FGBs to GEOJSON (via ogr2ogr)
+# - Simplifies
+# - Dissolves
+# - Explodes
+# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr)
+# - Apply hydrofab::clean_geometry()
+# - Partition FEMA 100 geometries by VPU      # TODO still
+# - Get FEMA bounding box geometries (maybe)
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+library(dplyr)
+library(sf)
+library(geos)
+library(fastmap)
+library(nngeo)
+
+# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop
+# TODO: Delete old files as needed
+
+# -------------------------------------------------------------------------------------
+# ---- OVERWRITE_FEMA_FILES constant logical ----
+# ---- > if TRUE, processing steps will be run again 
+#          and overwrite existing previously processed files
+# -------------------------------------------------------------------------------------
+
+# Default is TRUE (i.e. a fresh processing run is done from start to finish)
+OVERWRITE_FEMA_FILES  <- TRUE
+DELETE_STAGING_GPKGS  <- TRUE
+
+# -------------------------------------------------------------------------------------
+# ---- Create directories (if they do NOT exist) ----
+# -------------------------------------------------------------------------------------
+
+# create directory for cleaned FEMA geometries as geopackages (if not exists) 
+if (!dir.exists(FEMA_GPKG_PATH)) {
+  message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
+  dir.create(FEMA_GPKG_PATH)
+}
+
+# create directory for FEMA geomteries partioned by VPU
+if (!dir.exists(FEMA_BY_VPU_PATH)) {
+  message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'"))
+  dir.create(FEMA_BY_VPU_PATH)
+}
+
+for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) {
+  # create directory for FEMA geomteries by VPU
+  # message(VPU_SUBFOLDER)
+  
+  # state_dir  = paste0(VPU_SUBFOLDER, "/states/")
+  # merged_dir = paste0(VPU_SUBFOLDER, "/merged/")
+  
+  if (!dir.exists(VPU_SUBFOLDER)) {
+    message("Creating FEMA VPU subfolder...")
+    message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n  Creating directory:\n > '", VPU_SUBFOLDER, "'"))
+    dir.create(VPU_SUBFOLDER)
+  }
+  # if (!dir.exists(state_dir)) { 
+  #   message("Creating FEMA VPU states subfolder...")
+  #   message(paste0("'/", basename(state_dir), "' directory does not exist...\n  Creating directory:\n > '", state_dir, "'"))
+  #   dir.create(state_dir)
+  # }
+  # if (!dir.exists(merged_dir)) { 
+  #   message("Creating FEMA VPU merged subfolder...")
+  #   message(paste0("'/", basename(merged_dir), "' directory does not exist...\n  Creating directory:\n > '", merged_dir, "'"))
+  #   dir.create(merged_dir)
+  # }
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Get paths to downloaded FEMA 100 FGBs ----
+# -------------------------------------------------------------------------------------
+
+FEMA_FILENAMES        <- list.files(FEMA_FGB_PATH, full.names = FALSE)
+FEMA_FILE_PATHS       <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
+
+for (file in FEMA_FILENAMES) {
+  
+  STAGING_FILES_TO_DELETE <- c()
+  
+  # Convert FGB to GeoJSON
+  local_fema_path   <- paste0(FEMA_FGB_PATH, "/", file)
+  geojson_filename  <- gsub(".fgb", ".geojson", file)
+  geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename)
+  
+  message("FEMA filename: '", file, "'")
+  message("Converting \n > '", file, "' to geojson '", geojson_filename, "'")
+  
+  geojson_exists  <- file.exists(geojson_save_path)
+  
+  message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
+  
+  if (OVERWRITE_FEMA_FILES || !geojson_exists) {
+    system(ogr2ogr_command)
+    message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path)
+  }
+  
+  # Clean GeoJSON
+  message("Simplify, dissolve, explode > '", geojson_filename, "'")
+  output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename)
+  output_clean_geojson_path     <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename)
+  
+  clean_geojson_exists  <- file.exists(output_clean_geojson_path)
+  message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, 
+                             ' -dissolve2 FLD_AR_ID \\', 
+                             ' -simplify 0.1 visvalingam \\', 
+                             ' -snap \\',
+                             ' -o ', output_clean_geojson_path
+                             )
+  
+  
+  if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) {
+    message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
+    system(mapshaper_command)
+    message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
+  }
+  
+  # Convert cleaned GeoJSON to GeoPackage
+  message("Fema 100 year flood plain:\n > '", output_clean_filename, "'")
+  
+  output_gpkg_filename  <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename)
+  output_gpkg_path      <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+  
+  message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'")
+  
+  clean_gpkg_exists <- file.exists(output_gpkg_path)
+  message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path)
+  
+  if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) {
+    system(ogr2ogr_command)
+    message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'")
+  }
+  
+  message("Deleting intermediary files\n")
+  for (delete_file in STAGING_FILES_TO_DELETE) {
+    if (file.exists(delete_file)) {
+      message("Deleting >>> '", delete_file, "'")
+      file.remove(delete_file)
+    }
+    
+  }
+  
+  message()
+  
+}
+
+# -------------------------------------------------------------------------------------------------------------------
+# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
+# -------------------------------------------------------------------------------------------------------------------
+
+# paths to FEMA 100 year flood plain files
+FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+for (file_path in FEMA_gpkg_paths) {
+  message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
+  
+  fema <- sf::read_sf(file_path)
+  
+  fema <-
+    fema[!sf::st_is_empty(fema), ] %>% 
+    sf::st_transform(5070)
+  
+  #  TODO: Snap using geos::geos_snap()
+  # fema <-
+  #   geos::geos_snap(
+  #     geos::as_geos_geometry(fema),
+  #     geos::as_geos_geometry(fema),
+  #     tolerance = 1
+  #     ) %>%
+  #   geos::geos_make_valid()  %>%
+  #   sf::st_as_sf()
+  
+  # TODO: we get this error when trying to use the geometry column after geos snapping
+  # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]."
+  # fema %>%
+  # sf::st_cast("POLYGON")
+  
+  # TODO: Snap using sf::st_snap()
+  # fema <- sf::st_snap(
+  #             fema,
+  #             fema,
+  #             tolerance = 2
+  #             )
+  
+  fema <-
+    fema %>% 
+    # fema[!sf::st_is_empty(fema), ] %>% 
+    dplyr::select(geometry = geom) %>%
+    add_predicate_group_id(sf::st_intersects) %>% 
+    sf::st_make_valid() %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-group_id) %>% 
+    add_predicate_group_id(sf::st_intersects) %>% 
+    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+    dplyr::mutate(
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(fema_id, geometry)
+  
+  # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
+  # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
+  
+  fema <- 
+    fema %>% 
+    dplyr::mutate(
+      source = basename(file_path),
+      state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+    ) %>%
+    dplyr::select(fema_id, source, state, 
+                  # areasqkm, 
+                  geometry)
+  
+  message("End time: ", Sys.time())
+  
+  if (OVERWRITE_FEMA_FILES) {
+    message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
+    sf::write_sf(
+      # fema_clean,
+      fema,
+      file_path
+    )
+  }
+  message()
+  
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- 
+# -------------------------------------------------------------------------------------
+
+# Clean FEMA GPKG files
+FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+# paths to nextgen datasets and model attribute parquet files
+NEXTGEN_FILENAMES    <- list.files(nextgen_dir, full.names = FALSE)
+NEXTGEN_FILE_PATHS   <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
+
+for (file_path in FEMA_CLEAN_GPKG_PATHS) {
+  
+  fema_file <- basename(file_path)
+  
+  message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'")
+  
+  # read in fema polygons
+  fema <- sf::read_sf(file_path)
+  
+  for (nextgen_path in NEXTGEN_FILE_PATHS) {
+    nextgen_basename <- basename(nextgen_path)
+    vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
+    
+    message("VPU: ", vpu)   
+    message("- nextgen gpkg:\n > '", nextgen_path, "'")   
+    message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
+    
+    # read in nextgen flowlines 
+    flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+    
+    # get the FEMA polygons that intersect with the nextgen flowlines
+    fema_intersect <- polygons_with_line_intersects(fema, flines)
+    
+    fema_in_nextgen <-  nrow(fema_intersect) != 0
+    
+    message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
+    
+    if(fema_in_nextgen) {
+      
+      # create filepaths
+      vpu_subfolder      <- paste0("VPU_", vpu)
+      # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
+      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
+      
+      # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
+      
+      fema_intersect <-
+        fema_intersect %>%
+        dplyr::mutate(
+          vpu = vpu
+        ) %>%
+        dplyr::select(vpu, fema_id, source, state, geom)
+      
+      # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
+      
+      fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
+      fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
+      
+      
+      if (OVERWRITE_FEMA_FILES) {
+        message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
+        
+        sf::write_sf(
+          fema_intersect,
+          fema_vpu_path
+        )
+      }
+      
+      
+    }
+    message()
+  }
+  
+  
+  message(
+    "--------------------------------------------------------------\n", 
+    "Completed all VPU intersections for: \n > '", fema_file, "'",
+    "\n--------------------------------------------------------------\n"
+  )
+  
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
+# -------------------------------------------------------------------------------------
+
+DELETE_STAGING_GPKGS <- F
+
+for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+  # for (i in 1:4) {
+  # vpu_dir = FEMA_VPU_SUBFOLDERS2[12]
+  message("Merging files in '", basename(vpu_dir), "' directory...")
+  # }
+  
+  vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
+  
+  # path to the merged directory where the final merged geopackge will end up
+  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_gpkg_name  <- paste0(master_name, ".gpkg")
+  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
+  
+  # fema state geopackages partioned for the specific VPU
+  fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE)
+  
+  # make sure to ignore the master file if it already exists
+  fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath]
+  
+  for(gpkg_file in fema_state_gpkgs) {
+    # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
+    #         basename(gpkg_file), " > ", basename(master_filepath), 
+    #         "'")
+    message(" > '", 
+            basename(gpkg_file), " > ", basename(master_filepath), 
+            "'")
+    
+    ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",   
+                                    master_filepath, 
+                                    " ", gpkg_file
+    )
+    
+    if (OVERWRITE_FEMA_FILES) {
+      system(ogr2ogr_merge_command)
+    }
+  }
+  
+  has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0
+  
+  if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) {
+    message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...")
+    # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'")
+    
+    remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
+    
+    for (remove_cmd in remove_gpkg_cmds) {
+      message("  >  '", remove_cmd, "'")
+      system(remove_cmd)
+    }
+  }
+  
+  # message()
+  message("Merge complete!")
+  message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'")
+  message()
+}
+
+# -------------------------------------------------------------------------------------
+# ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
+# -------------------------------------------------------------------------------------
+
+for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+  vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
+  VPU        <- basename(vpu_dir)
+  
+  message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
+
+  # path to the merged directory where the final merged geopackage will end up
+  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_gpkg_name  <- paste0(master_name, ".gpkg")
+  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
+  
+  message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
+  
+  if(!file.exists(master_filepath)) { 
+    message("No FEMA geometries in '", VPU, "'")
+    message()
+    next
+  }
+  
+  
+  fema_vpu <- sf::read_sf(master_filepath)
+  
+  # fema_vpu %>% sf::st_geometry_type() %>% unique()
+
+  fema_vpu <- 
+    fema_vpu %>% 
+    nngeo::st_remove_holes(max_area = 200) %>%
+    # dplyr::select(geometry = geom) %>%
+    add_predicate_group_id(sf::st_intersects) %>% 
+    sf::st_make_valid() %>%
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-group_id) %>%
+    add_predicate_group_id(sf::st_intersects)
+  
+  geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+  
+  message("Geometry counts before casting all geometries to MULTIPOLYGON:")
+  for (g in seq_along(geom_type_counts)) {
+    message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
+  }
+  
+  message("Keeping only POLYGON and MULTIPOLYGON geometries...")
+  fema_vpu <- 
+    fema_vpu %>%
+    dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
+    sf::st_cast("MULTIPOLYGON") %>%
+    sf::st_make_valid() %>%
+    # dplyr::group_by(group_id) %>% 
+    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
+    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
+    nngeo::st_remove_holes(max_area = 200) %>% 
+    dplyr::mutate(
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(fema_id, geometry)
+  
+  # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+  # message("Geometry counts after all processing steps: ")
+  # for (g in seq_along(end_geom_type_counts)) {
+  #   message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g])
+  # }
+  
+  # fema_vpu2 %>% mapview::npts()
+  # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
+  
+  # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
+  #       mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white')
+  #   # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
+  # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
+  
+  fema_vpu <-
+    fema_vpu %>%
+    # dplyr::group_by(source) %>%
+    dplyr::mutate(
+      # state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
+      vpu      = gsub("VPU_", "", VPU),
+      fema_id  = 1:dplyr::n()
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(vpu, fema_id, 
+                  # state, 
+                  geom = geometry)
+  
+  if (OVERWRITE_FEMA_FILES) {
+    
+    message("> Overwritting '", basename(master_filepath), "' with final clean version...")
+    
+    # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
+    # message("> writting '", basename(union_file_path), "' (unioned and exploded version)")
+    
+    sf::write_sf(
+      fema_vpu,
+      master_filepath
+      # union_file_path
+    )
+  }
+  message()
+}

From e7e8023b7dca56665bb17a17285bd978e9fdf31b Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 17 Jul 2024 13:55:06 -0700
Subject: [PATCH 44/64] cleaned up and deleted old scripts, finalized FEMA
 polygon simplification, dissolve, explode process

---
 runners/cs_runner/add_fema_to_transects.R | 1080 ---------------------
 runners/cs_runner/preprocess_fema.R       |  201 +++-
 runners/cs_runner/preprocess_fema2.R      |  504 ----------
 3 files changed, 153 insertions(+), 1632 deletions(-)
 delete mode 100644 runners/cs_runner/add_fema_to_transects.R
 delete mode 100644 runners/cs_runner/preprocess_fema2.R

diff --git a/runners/cs_runner/add_fema_to_transects.R b/runners/cs_runner/add_fema_to_transects.R
deleted file mode 100644
index a5ac6f3..0000000
--- a/runners/cs_runner/add_fema_to_transects.R
+++ /dev/null
@@ -1,1080 +0,0 @@
-library(dplyr) 
-library(lwgeom)
-
-# generate the flowlines layer for the final cross_sections_<vpu>.gpkg for each vpu
-source("runners/cs_runner/config.r")
-
-# transect bucket prefix
-transects_prefix <- paste0(s3_bucket, version_prefix, "/3d/transects/")
-
-# paths to nextgen datasets and model attribute parquet files
-nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
-transects_files <- list.files(transects_dir, full.names = FALSE)
-transects_files <- transects_files[!grepl("updated", transects_files)]
-
-
-net_source <- "hydrofabric3d"
-
-# ensure the files are in the same order and matched up by vpu
-path_df <- align_files_by_vpu(
-  x    = nextgen_files,
-  y    = transects_files,
-  base = base_dir
-)
-
-path_df
-
-# # install.packages("devtools")
-devtools::install_github("anguswg-ucsb/hydrofabric3D")
-
-# loop over each vpu and generate cross sections, then save locally and upload to s3 bucket
-# for(i in 1:nrow(path_df)) {
-  
-  i = 8
-  
-  # nextgen file and full path
-  nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
-  
-  transect_file <- path_df$y[i]
-  transect_path <- paste0(transects_dir, transect_file)
-  
-  vpu <- path_df$vpu[i]
-  transect_path
-  
-  # # model attributes file and full path
-  # model_attr_file <- path_df$y[i]
-  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
-  
-  message("creating vpu ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
-  # message("creating vpu ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
-  
- 
-  fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
-  # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
-
-  vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
-  # vpu_fema_file1 <- vpu_fema_files[grepl("_union.gpkg", vpu_fema_files)]
-  
-  # vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, ".gpkg"), vpu_fema_files)]
-  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
-  vpu_fema_file
-  
-  # fema polygons and transect lines
-  fema <- sf::read_sf(vpu_fema_file)
-  # mapview::npts(fema)
-  
-  transects <- sf::read_sf(transect_path)
-  
-  # read in nextgen flowlines data
-  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-  # 
-  # system.time({
-  #  extended_transects <- get_transect_extension_distances_to_polygons(transects, fema, flines, 3000)
-  # })
-  # 
-  # sf::write_sf(extended_transects, '/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg')
-  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
-  # ----- Generate plots of extensions ----
-  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
-  extended_transects <- sf::read_sf('/Users/anguswatters/Desktop/test_fema_extended_trans.gpkg') %>% 
-    hydrofabric3D::add_tmp_id()
-  # ids_of_interest = c("wb-1002167", "wb-1002166", "wb-1002165", "wb-1002164")
-  # ids_of_interest = c( "wb-1002166", "wb-1002165")
-  # ids_of_interest = c("wb-1014540", "wb-1014541", "wb-1014542", "wb-1014570", "wb-1014574",
-  #   "wb-1014575", "wb-1014572" ,"wb-1014573" ,"wb-1014571", 
-  #   "wb-1014568", "wb-1014569", "wb-1014567", "wb-1014543")
-
-  # ids_of_interest = c( "wb-1014542")
-  
-  any_extended <- 
-    extended_transects %>% 
-    dplyr::filter(
-      (left_is_extended & right_is_extended) | (left_is_extended & !right_is_extended) | (!left_is_extended & right_is_extended)
-    ) %>%
-    dplyr::mutate(
-      extend_status = dplyr::case_when(
-        (left_is_extended & right_is_extended)  ~ "both",
-        (left_is_extended & !right_is_extended) ~ "left_only",
-        (!left_is_extended & right_is_extended) ~ "right_only",
-        TRUE ~ "no_extension"
-      )
-    ) %>% 
-    dplyr::mutate(
-      extend_distance = left_distance + right_distance
-    ) %>% 
-    dplyr::relocate(extend_status, extend_distance) %>% 
-    dplyr::filter(hy_id == "wb-1002550") 
-    # dplyr::slice(1:1500)
-  
-  fema_subset_intersects <- sf::st_intersects(fema, any_extended)
-  fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ]
-  fema_polygons
-  
-  og_trans <- 
-    transects %>% 
-    hydrofabric3D::add_tmp_id() %>% 
-    dplyr::filter(tmp_id %in% any_extended$tmp_id | hy_id %in% any_extended$hy_id) %>% 
-    dplyr::mutate(
-      extend_status = "original"
-    ) 
-  
-  Flowlines <-
-    flines %>% 
-    dplyr::filter(id %in% unique(any_extended$hy_id))
-  
-  FEMA = fema_polygons
-  Extended = sf::st_buffer(any_extended, 10)
-  Original = sf::st_buffer(og_trans, 10)
-  
-  mapview::mapview(FEMA, col.regions = "dodgerblue") + 
-    mapview::mapview(Flowlines, color = "darkblue") + 
-    mapview::mapview(Extended, col.regions = "green") +
-    mapview::mapview(Original, col.regions = "red") 
-    # mapview::mapview(Extended, color = "green") +
-    # mapview::mapview(Original, color = "red") 
-  extend_subset <- 
-    any_extended %>% 
-    dplyr::filter(hy_id %in% ids_of_interest)
-    # dplyr::group_by(extend_status) %>%
-    # dplyr::arrange(-extend_distance, .by_group = TRUE) %>% 
-    # dplyr::slice(
-    #   which.min(extend_distance),
-    #   which.max(extend_distance)
-    # )
-    # dplyr::slice(1)
-  extend_subset <-
-    any_extended %>%
-    dplyr::group_by(extend_status) %>%
-    dplyr::arrange(-extend_distance, .by_group = TRUE) %>%
-    # dplyr::slice(
-    #   which.min(extend_distance),
-    #   which.max(extend_distance)
-    # )
-    dplyr::slice(2000:2020)
-  extend_subset
-  
-  fline_subset <-
-    flines %>% 
-    dplyr::filter(id %in% unique(extend_subset$hy_id))
-  
-  plot_data <- 
-    dplyr::bind_rows(
-    transects %>% 
-      hydrofabric3D::add_tmp_id() %>% 
-      dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% 
-      dplyr::mutate(
-        extend_status = "original"
-      ),
-    dplyr::filter(extend_subset, 
-                  extend_status != "no_extension")
-  ) %>% 
-  dplyr::relocate(extend_status, extend_distance) %>% 
-    dplyr::mutate(
-      extend_status = dplyr::case_when(
-        extend_status != "original" ~ "extended",
-        TRUE ~ extend_status
-      )
-    ) 
-  
-  fema_subset_intersects <- sf::st_intersects(fema, fline_subset)
-  fema_polygons <- fema[lengths(fema_subset_intersects) > 0, ]
-  fema_polygons
-  
-  flines[lengths(sf::st_intersects(flines, fema_polygons)) > 0, ] %>% 
-    dplyr::pull(id) %>% 
-    unique()
-  fema_polygons
-  sf::st_crop(fema_polygons, fline_subset)
-  ggplot2::ggplot() +
-    ggplot2::geom_sf(data =   sf::st_crop(fema_polygons, fline_subset), fill = "grey") +
-    ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 1) +
-    # ggplot2::geom_sf(data = plot_data, ggplot2::aes(color = extend_status))
-   ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "extended"),
-                    color = "green") + 
-   ggplot2::geom_sf(data = dplyr::filter(plot_data, extend_status == "original"), 
-                    color = "red")
-
-  
-  mapview::mapview(fema_polygons, col.regions = "dodgerblue") + 
-  mapview::mapview(fline_subset, color = "dodgerblue") + 
-    mapview::mapview(dplyr::filter(plot_data, extend_status == "extended"), color = "green") +
-    mapview::mapview(dplyr::filter(plot_data, extend_status == "original"), color = "red") 
-  transects %>% 
-    hydrofabric3D::add_tmp_id() %>% 
-    dplyr::filter(tmp_id %in% extend_subset$tmp_id) %>% 
-    dplyr::mutate(
-      extend_status = "original"
-    )
-  
-  extend_subset
-  
-  ggplot2::ggplot() +
-    ggplot2::geom_sf(data = fline_subset, color = "black", lwd = 5) +
-    ggplot2::geom_sf(data = extend_subset, ggplot2::aes(color = extend_status))
-  
-  mapview::mapview(fline_subset, color = "dodgerblue") + 
-   mapview::mapview(extend_subset, color = "green") 
-  
-  
-  both_extended <-
-    extended_transects %>% 
-    dplyr::filter(left_is_extended, right_is_extended)
-  
-  left_only_extended <- 
-    extended_transects %>% 
-    dplyr::filter(left_is_extended, !right_is_extended) 
-  
-  right_only_extended <- 
-    extended_transects %>% 
-    dplyr::filter(!left_is_extended, right_is_extended)
-  
-  unique(both_extended$hy_id)
-  
-  extended_transects %>% 
-    dplyr::filter(
-      # tmp_id %in% unique(left_only_extended$tmp_id),
-      tmp_id %in% unique(right_only_extended$tmp_id) || 
-      tmp_id %in% unique(both_extended$tmp_id)
-      )
-  
-  unique(extended_transects$hy_id)
-  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
-  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- --- 
-  # ------- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ---  
-  # library(nngeo)
-  # fema %>% 
-  #   dplyr::group_by(fema_id) %>% 
-  #   dplyr::mutate(
-  #     n = dplyr::n()
-  #   ) %>% 
-  #   dplyr::arrange(-n) %>% 
-  #   dplyr::relocate(n)
-  # fema_sub <-   
-  #   fema %>% 
-  #   dplyr::filter(fema_id %in% c(1726))
-  # # fema %>% 
-  #   # dplyr::filter(fema_id %in% c(1726)) %>%
-  #   mapview::mapview(fema_sub[1, ], col.regions = "red") +
-  #       mapview::mapview(sf::st_buffer(fema_sub[2, ], 500), col.regions = "green")
-  # fema_no_holes <- nngeo::st_remove_holes(fema)
-  # fema_no_holes_union <- sf::st_union(fema_no_holes)
-  # touching_list = sf::st_touches(fema_no_holes)
-  # mapview::npts(fema)
-  # mapview::npts(fema_no_holes)
-  # mapview::npts(fema_no_holes_union)
-  # fema_no_holes %>% 
-  #   dplyr::mutate(new_fema_id = 1:dplyr::n()) %>% 
-  #   dplyr::group_by(new_fema_id)
-  # fema_no_holes_union
-  # fema$fema_id %>% unique() %>% length()
-  # # union then explode FEMA polygons 
-  # fema <- 
-  #   fema %>%
-  #   sf::st_union()
-  # # reassign IDs and change geometry column name
-  # fema <- 
-  #   fema %>% 
-  #   sf::st_as_sf() %>% 
-  #   dplyr::mutate(fema_id = 1:dplyr::n()) %>% 
-  #   dplyr::select(fema_id, geom = x)
-  
-  # sf::st_union() %>% 
-  #   rmapshaper::ms_explode() %>% 
-  #   sf::st_as_sf() %>% 
-  #   dplyr::mutate(fema_id = 1:dplyr::n()) %>% 
-  #   dplyr::select(fema_id, geom = x)
-  # Given 2 geos_geometry point geometries, create a line between the 2 points
-  # start: geos_geoemtry, point
-  # end: geos_geoemtry, point
-  # Returns geos_geometry linestring
-  make_line_from_start_and_end_pts <- function(start, end, line_crs) {
-        Y_start <- geos::geos_y(start)
-        X_start <- geos::geos_x(start)
-        Y_end   <- geos::geos_y(end)
-        X_end   <- geos::geos_x(end)
-        
-        # make the new transect line from the start and points 
-        geos_ls <- geos::geos_make_linestring(x = c(X_start, X_end),
-                                                 y = c(Y_start, Y_end), 
-                                                 crs = line_crs)
-                                              
-        return(geos_ls)                                              
-        
-        
-  }
-  
-  #' Check if an updated transect line is valid relative to the other transects and flowlines in the network
-  #' The 'transect_to_check' should be 'used' (i.e. function returns TRUE) if 
-  #' the 'transect_to_check' does NOT interesect any other transects ('transect_lines') AND it only intersects a single flowline ONCE.
-  #' If the 'transect_to_check' intersects ANY other transects OR intersects a flowline more
-  #' than once (OR more than one flowline in the network) then the function returns FALSE.
-  #' @param transect_to_check geos_geometry, linestring
-  #' @param transect_lines geos_geometry, linestring
-  #' @param flowlines geos_geometry, linestring
-  #'
-  #' @return TRUE if the extension should be used, FALSE if it shouldn't be used
-  #' @importFrom geos geos_intersection geos_type
-  is_valid_transect_line <- function(transect_to_check, transect_lines, flowlines) {
-    
-    # ###   ##   ##   ##   ##   ##   ##   ##   ##   ##  
-    # extension_line <- left_extended_trans
-    # transect_lines <- transect_geoms
-    # flowlines <- flines_geos
-    # ###   ##   ##   ##   ##   ##   ##   ##   ##   ##  
-    
-    # Define conditions to decide which version of the transect to use
-    
-    # 1. Use transect with extension in BOTH directions
-    # 2. Use transect with LEFT extension only
-    # 3. Use transect with RIGHT extension only
-    
-    # Check that the extended transect lines only intersect a single flowline in the network only ONCE
-    intersects_with_flowlines <- geos::geos_intersection(
-      transect_to_check,
-      flowlines
-    )
-    intersects_flowline_only_once <- sum(geos::geos_type(intersects_with_flowlines) == "point") == 1 && 
-      sum(geos::geos_type(intersects_with_flowlines) == "multipoint") == 0 
-    
-    # check that the extended transect line does NOT intersect other transect lines (other than SELF)
-    intersects_other_transects <- sum(geos::geos_intersects(transect_to_check, transect_lines)) > 1
-    
-    # TRUE == Only one flowline is intersected a single time AND no other transect lines are intersected
-    use_transect <- intersects_flowline_only_once  && !intersects_other_transects
-    
-    return(use_transect)
-  }
-
-  # Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons")
-  # internal function for extending transect lines out to FEMA 100 year flood plain polygons
-  # transect_lines, set of Sf linestrigns to extend (only if the transect lines are ENTIRELLY within a polygons)
-  # polygons, set of sf polygons that transect lines should be exteneded 
-  # max_extension_distance numeric, maximum distance (meters) to extend a transect line in either direction to try and intersect one of the "polygons"
-  get_transect_extension_distances_to_polygons <- function(transect_lines,   polygons,  flines, max_extension_distance) {
-    
-    ###    ###    ###    ###    ###    ###    ###
-    # transect_lines <- transects
-    # polygons <- fema
-    # # # flines <- flines
-    # # max_extension_distance <- 3000
-  # max_extension_distance = 3500
-    # ###    ###    ###    ###    ###    ###    ###
-    
-    # keep 10% of the original points for speed
-    polygons <- rmapshaper::ms_simplify(polygons, keep_shapes = F, keep = 0.10)
-    
-    # mapview::npts(fema)
-    # mapview::npts(polygons)
-    # transects <- sf::read_sf(transect_path)
-    
-     # polygons
-    transects_geos  <- geos::as_geos_geometry(transects)
-    polygons_geos   <- geos::as_geos_geometry(polygons)     
-    
-    # geos::geos_union( polygons_geos, polygons_geos) %>% length()
-    # polygons_geos %>% length()
-    # polygons
-    # polygons_geos  
-    transects_polygons_matrix <- geos::geos_intersects_matrix(transects_geos, polygons_geos) 
-    polygons_transects_matrix <- geos::geos_intersects_matrix(polygons_geos, transects_geos) 
-
-    # subset the transects and polygons to only those with intersections
-    intersect_transects  <- transects[lengths(transects_polygons_matrix) != 0, ]
-    intersect_polygons   <- polygons_geos[lengths(polygons_transects_matrix) != 0]
-    
-    # Convert our intersecting polygons to LINESTRINGS b/c we DON'T NEED polygons to calculate extension distances from our transect lines
-    # This can be done with just linestrings (not sure if this is actually more performent but I'm pretty sure it is....)
-    intersect_lines <- 
-      intersect_polygons %>% 
-      # geos::geos_make_valid() %>% 
-      sf::st_as_sf() %>% 
-      # sf::st_union() %>% 
-      # rmapshaper::ms_explode() %>% 
-      # sf::st_as_sf() %>% 
-      # dplyr::mutate(fema_id = 1:dplyr::n()) %>% 
-      # dplyr::select(fema_id, geom = x) %>% 
-      sf::st_cast("MULTILINESTRING") %>% 
-      geos::as_geos_geometry() %>% 
-      geos::geos_simplify_preserve_topology(25)
-    
-    # mapview::npts(sf::st_as_sf(intersect_lines))
-
- #    intersect_polygons %>% 
- #      geos::geos_make_valid() %>% 
- #      geos::geos_is_valid() %>% all()
- # is.null(intersect_lines$geometry )
-    # use half of the shortest transect line as the segmentation length for all transects (ensures all transects will have a midpoint...?)
-    # TODO: Double check this logic.
-    min_segmentation <- min(intersect_transects$cs_lengthm %/% 2)
-    
-    # which.min(intersect_transects$cs_lengthm %/% 2)
-    
-    # make each transect line have way more segments so we can take a left and right half of each transect line
-    segmented_trans <- sf::st_segmentize(intersect_transects, min_segmentation)
-    
-    # mapview::mapview(left_trans, col.regions = "dodgerblue") +
-      # mapview::mapview(intersect_transects, color = "red") +
-      #   mapview::mapview(intersect_transects[42, ], color = "yellow") +
-      # mapview::mapview(right_trans, color = "dodgerblue") +
-      # mapview::mapview(left_trans, color = "green")
-    
-    # Seperate the transect lines into LEFT and RIGHT halves
-    # We do this so we can check if a side of a transect is ENTIRELY WITHIN a polygon. 
-    # If the half is entirely within a polygon, 
-    left_trans <- 
-      segmented_trans %>% 
-      lwgeom::st_linesubstring(0, 0.50) %>% 
-      dplyr::mutate(
-        partition         = "left",
-        partition_lengthm = as.numeric(sf::st_length(geom))
-      ) %>% 
-      hydrofabric3D::add_tmp_id() %>% 
-      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                    cs_lengthm, is_extended, partition, partition_lengthm, geom)
-    
-    # Find the distances from the right side of transect lines 
-    right_trans <- 
-      segmented_trans %>% 
-      lwgeom::st_linesubstring(0.50, 1) %>% 
-      dplyr::mutate(
-        partition         = "right",
-        partition_lengthm = as.numeric(sf::st_length(geom))
-      ) %>% 
-      hydrofabric3D::add_tmp_id() %>%
-      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                    cs_lengthm, is_extended, partition, partition_lengthm, geom)
-   
-    # convert the transect geometries to geos types
-    # get the fema polygon indices for the transect halves that are completely within a fema polygon
-    # add the fema polygons index as a column to the transect dataframes
-    left_trans_geos     <- geos::as_geos_geometry(left_trans)
-    right_trans_geos    <- geos::as_geos_geometry(right_trans)
-    
-    left_within_matrix  <- geos::geos_within_matrix(left_trans_geos, intersect_polygons)
-    right_within_matrix <- geos::geos_within_matrix(right_trans_geos, intersect_polygons)
-    
-    left_within_vect    <- lapply(left_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
-    right_within_vect   <- lapply(right_within_matrix, function(i) { if(length(i) > 0) { c(i) } else { c(NA) } })
-    
-    # add the fema polygon indexes as columns
-    left_trans$left_fema_index    <- left_within_vect
-    right_trans$right_fema_index  <- right_within_vect
-    
-    # add boolean columns whether the transect is fully within the FEMA polygons
-    left_trans <- 
-      left_trans %>% 
-      dplyr::mutate(
-        left_is_within_fema = dplyr::case_when(
-          !is.na(left_fema_index) ~ TRUE,
-          TRUE                    ~ FALSE
-        )
-      ) %>% 
-      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                    cs_lengthm, is_extended, partition, partition_lengthm,
-                    left_fema_index, left_is_within_fema, 
-                    geom
-      )
-     
-    right_trans <- 
-      right_trans %>% 
-      dplyr::mutate(
-        right_is_within_fema = dplyr::case_when(
-          !is.na(right_fema_index) ~ TRUE,
-          TRUE               ~ FALSE
-        )
-      ) %>% 
-      dplyr::select(tmp_id, hy_id, cs_source, cs_id, cs_measure,  
-                    cs_lengthm, is_extended, partition, partition_lengthm,
-                    right_fema_index, right_is_within_fema, 
-                    geom
-      ) 
-    
-    # max_extension_distance = 3000
-    # which(transects_with_distances$hy_id == "wb-1003839")
-    # 
-    # left_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
-    # right_trans[which(left_trans$hy_id == "wb-1003839"), ]$cs_lengthm
-    # 
-    # which(right_trans$hy_id == "wb-1003839")
-    
-    left_distances <- calc_extension_distances(
-      geos_geoms             = left_trans_geos,
-      ids                    = left_trans$tmp_id,
-      lines_to_cut           = intersect_lines,
-      lines_to_cut_indices   = left_trans$left_fema_index,
-      direction              = "head",
-      max_extension_distance = max_extension_distance
-    )
-    
-    right_distances <- calc_extension_distances(
-      geos_geoms             = right_trans_geos,
-      ids                    = right_trans$tmp_id,
-      lines_to_cut           = intersect_lines,
-      lines_to_cut_indices   = right_trans$right_fema_index,
-      direction              = "tail",
-      max_extension_distance = max_extension_distance
-    )  
-    
-    left_trans$left_distance    <- left_distances
-    right_trans$right_distance  <- right_distances
-    
-    # extensions_by_id %>% 
-    #   dplyr::filter(hy_id == "wb-1003839")
-    # 
-    # distance to extend LEFT and/or RIGHT for each hy_id/cs_id
-    extensions_by_id <- dplyr::left_join(
-                          sf::st_drop_geometry(
-                            dplyr::select(left_trans, 
-                                          hy_id, cs_id, left_distance)
-                            ),
-                          sf::st_drop_geometry(
-                            dplyr::select(right_trans, 
-                                        hy_id, cs_id, 
-                                        right_distance)
-                            ),
-                          by = c("hy_id", "cs_id")
-                          )
-    
-    #  ######### ######## ######## ######## ####### 
-    # # TODO: This is temporary !!!!!!
-    # fema_index_df <- dplyr::left_join(
-    #   sf::st_drop_geometry(
-    #     dplyr::select(left_trans, 
-    #                   hy_id, cs_id, left_distance, left_fema_index)
-    #   ),
-    #   sf::st_drop_geometry(
-    #     dplyr::select(right_trans, 
-    #                   hy_id, cs_id, 
-    #                   right_distance, right_fema_index)
-    #   ),
-    #   by = c("hy_id", "cs_id")
-    # )
-    ######## ######## ######## ######## ########
-    
-    # foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
-    #   fema_id = fema_uids
-    # )
-    # 
-    # polygons_to_merge <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
-    #   fema_id = fema_uids
-    # ) %>% 
-    #   dplyr::filter(fema_id %in% c(1563, 1566, 1567, 590))
-    # merged_polygon <- 
-    #   polygons_to_merge %>% 
-    #   sf::st_union()
-    # merged_polygon %>% 
-    #   rmapshaper::ms_explode()
-    # mapview::mapview(foi, col.regions = "dodgerblue") +
-    #   mapview::mapview(polygons_to_merge, col.regions = "yellow") +
-    #   mapview::mapview(merged_polygon, col.regions = "green") +
-    #   mapview::mapview(toi, color = "red") +
-    #   mapview::mapview(og_trans, color = "green")
-    # polygons %>% 
-      # dplyr::filter(fema_id )
-    ######## ######## ######## ######## ########
-    
-    # TODO: Add left/right extension distancces to transect data
-    # TODO: this can ultimately just be the "transects" variable, dont need to make new "transects_with_distances" variable
-    transects <- 
-      transects %>% 
-      dplyr::left_join(
-                extensions_by_id,
-                by = c("hy_id", "cs_id")
-            ) %>% 
-      dplyr::mutate(
-        left_distance = dplyr::case_when(
-          is.na(left_distance) ~ 0,
-          TRUE                 ~ left_distance
-        ),
-        right_distance = dplyr::case_when(
-          is.na(right_distance) ~ 0,
-          TRUE                  ~ right_distance
-        )
-        # left_distance = dplyr::case_when(
-        #   left_distance == 0 ~ NA,
-        #   TRUE               ~ left_distance
-        # ),
-        # right_distance = dplyr::case_when(
-        #   right_distance == 0 ~ NA,
-        #   TRUE                ~ right_distance
-        # )
-      ) %>% 
-      hydrofabric3D::add_tmp_id()
-    
-    # transects_with_distances %>% 
-    #   dplyr::filter(hy_id == "wb-1003839")
-    # # transects
-    # flines %>% 
-    #   dplyr::arrange(-tot_drainage_areasqkm) 
-    # flines %>% 
-    #   dplyr::arrange(-hydroseq) %>% 
-    #   dplyr::filter(hydroseq == min(hydroseq) | hydroseq == max(hydroseq)) %>%
-    #   # dplyr::filter(hydroseq == max(hydroseq)) %>% 
-    #   mapview::mapview()
-    
-    # # extensions_by_id
-    # transects_with_distances %>% 
-    #   dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id)$tmp_id)
-    
-#     ######## ######## ######## ######## ######## ######## ######## ######## 
-#     left_extended_flag[1:20]
-#     right_extended_flag[1:20]
-#     both_extended_flag[1:20]
-#     
-#     range_vect <- 1:500
-#     
-#     fema_uids <- unique(c(unlist(fema_index_df[range_vect, ]$left_fema_index),     unlist(fema_index_df[range_vect, ]$right_fema_index)))
-#     fema_uids <- fema_uids[!is.na(fema_uids)]
-# fema_uids
-#     foi <- sf::st_as_sf(intersect_polygons[fema_uids]) %>% dplyr::mutate(
-#       fema_id = fema_uids
-#     )
-#     # ooi <- sf::st_as_sf()
-#     # toi <- sf::st_as_sf(new_transects[1:20])
-#      toi <- sf::st_as_sf(transect_geoms[range_vect])
-#     toi
-#     og_trans <- transects_with_distances[range_vect, ]
-#     mapview::mapview(foi, col.regions = "dodgerblue") +
-#       mapview::mapview(toi, color = "red") +
-#       mapview::mapview(og_trans, color = "green")
-    ######## ######## ### ##### ######## ######## ######## ######## ######## 
-    
-    fline_id_array       <- flines$id
-    
-    # Convert the net object into a geos_geometry
-    flines_geos          <- geos::as_geos_geometry(flines)
-    
-    transect_hy_id_array <- transects$hy_id
-    transect_cs_id_array <- transects$cs_id
-    
-    transect_geoms       <- geos::as_geos_geometry(transects$geom)
-   
-    left_distances       <- transects$left_distance
-    right_distances      <- transects$right_distance
-    
-    # preallocate vector that stores the extension. distances
-    new_transects <- vctrs::vec_c(rep(geos::geos_empty(), length(transect_hy_id_array)))
-    
-    left_extended_flag   <- rep(FALSE, length(transect_hy_id_array))   
-    right_extended_flag  <- rep(FALSE, length(transect_hy_id_array))
-    both_extended_flag   <- rep(FALSE, length(transect_hy_id_array))
-    
-    
-    updated_left_distances    <- rep(0, length(transect_hy_id_array))   
-    updated_right_distances   <- rep(0, length(transect_hy_id_array))   
-    
-    # new_transects <- geos::geos_empty()
-    # # measures  <- vctrs::vec_c()
-    # transects_with_distances[1:20, ]
-    # transects[1:20, ]
-    
-    # number of geometries that will be iterated over, keeping this variable to reference in message block  
-    total <- length(transect_hy_id_array)
-
-    # output a message every ~10% intervals
-    message_interval <- total %/% 20
-    number_of_skips = 0
-    
-    for (i in seq_along(transect_hy_id_array)) {
-      # message("i: ", i)
-      # i = 13
-      # if(i > 2000) {
-      #   message("-----> STOP BECAUSE at i", i)
-      #   break
-      # }
-      # i = 1
-      # Check if the iteration is a multiple of 100
-      if (i %% message_interval == 0) {
-        
-        # get the percent complete
-        percent_done <- round(i/total, 2) * 100
-        
-        # Print the message every "message_interval"
-        # if(verbose) { 
-          message(i, " > ", percent_done, "% ") 
-          message("Number of skips: ", number_of_skips)
-          # }
-        # message("Iteration ", i, " / ", length(extended_trans), 
-        #         " - (", percent_done, "%) ")
-        
-      }
-      # which(transects_with_distances$hy_id == "wb-1003839")
-      # i = 9587
-      # get the current transect, hy_id, cs_id, flowline, and extension distances
-      current_trans <- transect_geoms[i]
-      
-      current_hy_id <- transect_hy_id_array[i]
-      current_cs_id <- transect_cs_id_array[i]
-      
-      current_fline <- flines_geos[fline_id_array == current_hy_id]
-       
-      left_distance_to_extend <- left_distances[i]
-      right_distance_to_extend <- right_distances[i]
-      
-      no_extension_required <- (left_distance_to_extend == 0 && right_distance_to_extend == 0)
-      # no_extension_required <- is.na(left_distance_to_extend) && is.na(right_distance_to_extend)
-      # message("Transect tmp_id: ", curr_tmp_id, " - (", i, ")")
-      
-      if(no_extension_required) {
-        # message("Skipping -left/right extension are both 0")
-        number_of_skips = number_of_skips + 1
-        
-        next
-      }
-      
-      # message("Extending transect line left and right")
-      # extend the lines
-      left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, 
-                                                              left_distance_to_extend, "head")
-      right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, 
-                                                              right_distance_to_extend, "tail")
-      
-      # initial check to make sure the extended versions of the transects are valid
-      use_left_extension  <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos)
-      use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos)
-      # use_both_extensions <- use_left_extension && use_right_extension
-      
-      used_half_of_left  <- FALSE
-      used_half_of_right <- FALSE
-      
-      # TODO: Probably should precompute this division BEFORE the loop...
-      half_left_distance   <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0)
-      half_right_distance  <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0)
-      
-      # if we CAN'T use the original LEFT extension distance, 
-      # we try HALF the distance (or some distane less than we extended by in the first place)
-      if (!use_left_extension) {
-        
-        # half_left_distance   <- ifelse(left_distance_to_extend > 0, left_distance_to_extend %/% 2, 0)
-        left_extended_trans  <- hydrofabric3D::geos_extend_line(current_trans, 
-                                                                half_left_distance, "head")
-        use_left_extension  <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos)
-        
-        used_half_of_left <- ifelse(use_left_extension, TRUE,  FALSE)
-      }
-      
-      # if we CAN'T use the original RIGHT extension distance, 
-      # we try HALF the distance (or some distance less than we extended by in the first place)
-      if (!use_right_extension) {
-        
-        # half_right_distance  <- ifelse(right_distance_to_extend > 0, right_distance_to_extend %/% 2, 0)
-        right_extended_trans <- hydrofabric3D::geos_extend_line(current_trans, 
-                                                                half_right_distance, "tail")
-        use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos)
-        
-        used_half_of_right  <- ifelse(use_right_extension, TRUE,  FALSE)
-        
-        # mapview::mapview(sf::st_as_sf(current_trans), color = "red") + 
-        #   mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + 
-        #   mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") +
-        #   mapview::mapview(sf::st_as_sf(left_extended_trans2), color = "dodgerblue") + 
-        #   mapview::mapview(sf::st_as_sf(right_extended_trans2), color = "dodgerblue") 
-        
-      }
-
-      use_both_extensions <- use_left_extension && use_right_extension
-
-      # # message("Checking left and right intersections with flowline...")
-      # # ---------------------------------------------------------------------------------
-      # # TODO: UNCOMMENT BELOW ---> this was my original method 
-      # # ---------------------------------------------------------------------------------
-      # # Check that the extended transect lines only intersect the current flowline once
-      # left_intersects_fline <- geos::geos_intersection(
-      #   left_extended_trans,
-      #   # current_fline
-      #   flines_geos
-      # )
-      # 
-      # right_intersects_fline <- geos::geos_intersection(
-      #   right_extended_trans,
-      #   # current_fline
-      #   flines_geos
-      # )
-      # 
-      # 
-      # # mapview::mapview(sf::st_as_sf(flines_geos[which(geos::geos_type(left_intersects_fline) == "point")])) +
-      # #   mapview::mapview(sf::st_as_sf(current_trans), color = "red") +
-      # # mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") + 
-      # # mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green")
-      # 
-      # # Define conditions to decide which version of the transect to use
-      # 
-      # # 1. Use transect with extension in BOTH directions
-      # # 2. Use transect with LEFT extension only
-      # # 3. Use transect with RIGHT extension only
-      # 
-      # # left_intersects_fline_once  <- geos::geos_type(left_intersects_fline) == "point"
-      # # right_intersects_fline_once <- geos::geos_type(right_intersects_fline) == "point"
-      # left_intersects_fline_once  <- sum(geos::geos_type(left_intersects_fline) == "point") == 1 && 
-      #                                sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
-      # 
-      # right_intersects_fline_once <- sum(geos::geos_type(right_intersects_fline) == "point") == 1 &&
-      #                                sum(geos::geos_type(right_intersects_fline) == "multipoint") == 0 
-      # 
-      # # sum(geos::geos_type(left_intersects_fline) == "point") == 1
-      # # sum(geos::geos_type(right_intersects_fline) == "point") == 1
-      # # sum(geos::geos_type(left_intersects_fline) == "multipoint") == 0 
-      # 
-      # 
-      # 
-      # # # TODO: Consider doing the opppsite of these conditions (i.e. "left_intersects_other_transects" = TRUE) 
-      # # left_does_not_intersect_other_transects  <- !any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
-      # # right_does_not_intersect_other_transects <- !any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
-      # # 
-      # # use_left_extension  <- left_intersects_fline_once && left_does_not_intersect_other_transects
-      # # use_right_extension <- right_intersects_fline_once && right_does_not_intersect_other_transects
-      # # use_both_extensions <- use_left_extension && use_right_extension
-      # 
-      # 
-      # # TODO: This is the opposite phrasing of these conditions, i think this is clearer to read
-      # left_intersects_other_transects  <- any(geos::geos_intersects(left_extended_trans, transect_geoms[-i]))
-      # right_intersects_other_transects <- any(geos::geos_intersects(right_extended_trans, transect_geoms[-i]))  
-      # 
-      # # # make sure the extended transects don't hit any of the newly extended transects
-      # # # NOTE: I think this could be just done with a single transect list that starts with the original transects and if an update happens then we replace that transect
-      # # left_intersects_new_transects  <- any(geos::geos_intersects(left_extended_trans, new_transects))
-      # # right_intersects_new_transects <- any(geos::geos_intersects(right_extended_trans, new_transects))
-      # 
-      # # make TRUE/FALSE flags stating which transect should we use
-      # # - BOTH extensions
-      # # - LEFT ONLY extensions
-      # # - RIGHT only extensions
-      # use_left_extension  <- left_intersects_fline_once && !left_intersects_other_transects
-      # use_right_extension <- right_intersects_fline_once && !right_intersects_other_transects
-      # use_both_extensions <- use_left_extension && use_right_extension
-      # 
-      # new_use_left_extension  <- is_valid_transect_line(left_extended_trans, transect_geoms, flines_geos)
-      # new_use_right_extension <- is_valid_transect_line(right_extended_trans, transect_geoms, flines_geos)
-      # new_use_both_extensions <- new_use_left_extension && new_use_right_extension
-      # 
-      # message("--------------------------------------------")
-      # message("Left intersects FLINE ONCE: ", left_intersects_fline_once)
-      # message("Right intersects FLINE ONCE: ", right_intersects_fline_once)
-      # message()
-      # message("Left intersects OTHER TRANSECTS: ", left_intersects_other_transects)
-      # message("Right intersects OTHER TRANSECTS: ", right_intersects_other_transects)
-      # message()
-      # message("Use LEFT extension intersects: ", use_left_extension)
-      # message("Use RIGHT extension intersects: ", use_right_extension)
-      # message("Use BOTH extension intersects: ", use_both_extensions)
-      # message()
-      # message("--------------------------------------------")
-      # message()
-      # # merged_trans <- geos::geos_union(left_extended_trans, right_extended_trans)
-      # # sf::st_union(sf::st_cast(sf::st_as_sf(merged_trans), "LINESTRING"))
-      # # mapview::mapview(sf::st_as_sf(merged_trans), color = "green") +
-      # #   mapview::mapview(sf::st_as_sf(left_start), col.region = "red") +
-      # #   mapview::mapview(sf::st_as_sf(left_end), col.region = "red") + 
-      # #   mapview::mapview(sf::st_as_sf(right_start), col.region = "dodgerblue") +
-      # #   mapview::mapview(sf::st_as_sf(right_end), col.region = "dodgerblue")
-      # # ---------------------------------------------------------------------------------
-      # # TODO: UNCOMMENT ABOVE ---> this was my original method 
-      # # ---------------------------------------------------------------------------------
-      # 
-      # if(use_both_extensions) {
-      
-      # Get the start and end of both extended tranects
-      left_start  <- geos::geos_point_start(left_extended_trans)
-      left_end    <- geos::geos_point_end(left_extended_trans)
-      right_start <- geos::geos_point_start(right_extended_trans)
-      right_end   <- geos::geos_point_end(right_extended_trans)
-      
-      # }
-      # Extend in BOTH directions
-      if(use_both_extensions) {
-        # message("Extend direction: BOTH")
-        start  <- left_start
-        end    <- right_end
-        
-      # extend ONLY the left side
-      } else if(use_left_extension && !use_right_extension) {
-         # message("Extend direction: LEFT")       
-        start  <- left_start
-        end    <- left_end
-        
-      # Extend ONLY the right side
-      } else if(!use_left_extension && use_right_extension) {
-         # message("Extend direction: RIGHT")       
-        start  <- right_start
-        end    <- right_end
-        
-      # DO NOT extend either direction
-      } else {
-        # message("No extension")   
-        # TODO: Really dont need to do anything 
-        # TODO: in this scenario because we just use the original transect line
-        start  <- left_end
-        end    <- right_start
-      }
-      
-      # trans_needs_extension <- use_left_extension || use_right_extension
-      
-      # if(trans_needs_extension) {
-        
-        line_crs      <- wk::wk_crs(current_trans)
-        updated_trans <- make_line_from_start_and_end_pts(start, end, line_crs)
-        
-      #   touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ]
-      #   mapview::mapview(touched_flines, color = "dodgerblue") + 
-      #   mapview::mapview(sf::st_as_sf(current_trans), color = "red") +
-      #     mapview::mapview(sf::st_as_sf(left_extended_trans), color = "green") +
-      #     mapview::mapview(sf::st_as_sf(right_extended_trans), color = "green") +
-      #     mapview::mapview(sf::st_as_sf(updated_trans), color = "yellow")
-      # nrow(flines)
-      # touched_flines <- flines[geos::geos_type(right_intersects_fline) != "linestring", ]
-      # flines[lengths(right_intersects_fline) == 0, ]
-      # length(right_intersects_fline)
-        # }
-        
-        # ---------------------------------------------------
-        # TODO: UNCOMMENT BELOW
-        # ---------------------------------------------------
-        if(use_left_extension) {
-          left_extended_flag[i]  <- TRUE
-        }
-
-        if(use_right_extension) {
-          right_extended_flag[i] <- TRUE
-        }
-
-        if(use_both_extensions) {
-          both_extended_flag[i] <- TRUE
-        }
-        
-        if(used_half_of_left) {
-          updated_left_distances[i]  <- half_left_distance 
-        }
-        if(used_half_of_right) {
-          updated_right_distances[i] <- half_right_distance 
-        }
-        
-        # new_transects[i] <- updated_trans
-        transect_geoms[i] <- updated_trans
-
-        # ---------------------------------------------------
-        # TODO: UNCOMMENT ABOVE
-        # ---------------------------------------------------
-        
-        # start %>% class()
-    }      
-    
-    # transects2 <- transects 
-      # dplyr::mutate(
-      #   new_cs_lengthm = as.numeric(sf::st_length(geom))
-      # ) %>% 
-      # dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm)
-    
-    
-    # Update the "transects_to_extend" with new geos geometries ("geos_list")
-    sf::st_geometry(transects) <- sf::st_geometry(sf::st_as_sf(transect_geoms))
-    
-    transects <- 
-      transects %>% 
-      dplyr::mutate(
-        new_cs_lengthm = as.numeric(sf::st_length(geom))
-      ) %>% 
-      dplyr::relocate(hy_id, cs_id, cs_lengthm, new_cs_lengthm)
-    
-    # transects2 %>% 
-    #   dplyr::filter(
-    #     new_cs_lengthm > cs_lengthm
-    #   )
-    # 
-    
-    transects$left_is_extended  <- left_extended_flag
-    transects$right_is_extended <- right_extended_flag
-    
-    return(transects)
-    
-  }    
-   
-#     # ----------------------------------------------------------------
-#     # ----------------------------------------------------------------
-#     # ----------------------------------------------------------------  
-#   
-#       # make the new transect line from the start and points 
-#       final_line <- geos::geos_make_linestring(x = c(X_start, X_end),
-#                                                y = c(Y_start, Y_end), 
-#                                                crs = wk::wk_crs(current_trans)
-#                                                )
-#       geos::geos_make_collection(start, type_id = "LINESTRING")
-#       
-#       
-#       left_start <- geos::geos_point_start(left_extended_trans)
-#       right_end  <- geos::geos_point_end(right_extended_trans)
-#       
-#       fline_intersects
-#       geos::geos_type(fline_intersects) == "point"
-#       
-#       geos::geos_intersects(current_trans, )
-#       
-#       # if(
-#       #   geos::geos_type(fline_intersects)
-#       #   
-#       #   
-#       # )
-#       mapview::mapview(sf::st_as_sf(trans), color = "green") +
-#        mapview::mapview(sf::st_as_sf(left_extended_trans), color = "red")     +
-#         mapview::mapview(sf::st_as_sf(right_extended_trans), color = "red")     +
-#         mapview::mapview(      sf::st_as_sf(intersect_polygons[[curr_fema_index]]), col.regions = "dodgerblue")  +  
-#       mapview::mapview(      sf::st_as_sf(final_line), color = "yellow")   
-
-#       
-#     }
-
-#     left_trans$left_extension_distance
-
-#     length(right_distances)
-#     # Calculate the minimum distance a line would need to extend to reach the boundary of the polygon/line that the input geometries are entirely within 
-    calc_extension_distances <- function(geos_geoms, ids, lines_to_cut, lines_to_cut_indices, direction = "head", max_extension_distance = 2500) {
-      #####   #####   #####   #####   #####
-      # geos_geoms   <- left_trans_geos
-
-      # geos_geoms             = left_trans_geos
-      # ids                    = left_trans$tmp_id
-      # lines_to_cut           = intersect_lines
-      # lines_to_cut_indices   = left_trans$left_fema_index
-      # direction              = "head"
-      # max_extension_distance = max_extension_distance
-      #
-
-      #####   #####   #####   #####   #####
-
-      if (!direction %in% c("head", "tail")) {
-          stop("Invalid 'direction' value, must be one of 'head' or 'tail'")
-      }
-
-      # preallocate vector that stores the extension. distances
-      extension_dists <- vctrs::vec_c(rep(0, length(ids)))
-
-      # extension_dists <- vector(mode = "numeric", length = nrow(trans_data))
-      for (i in seq_along(ids)) {
-        # i = 118
-        curr_id           <- ids[i]
-        is_within_polygon <- any(!is.na(lines_to_cut_indices[[i]]))
-        polygon_index     <- lines_to_cut_indices[[i]]
-        # any(is_within_polygon)
-        message("Transect: '", curr_id, "' - (", i, ")")
-
-        if (is_within_polygon) {
-          message("- Side of transect intersects with FEMA")
-          message("\t > FEMA index: ", polygon_index)
-
-          curr_geom  <- geos_geoms[[i]]
-          index_vect <- sort(unlist(polygon_index))
-
-           distance_to_extend <- hydrofabric3D:::geos_bs_distance(
-            distances    = 1:max_extension_distance,
-            line         = curr_geom,
-            geoms_to_cut = lines_to_cut[index_vect],
-            direction    = direction
-          )
-
-          extension_dists[i] <- distance_to_extend
-        }
-
-      }
-
-      return(extension_dists)
-    }
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/preprocess_fema.R
index 1e1cba5..ee08e25 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/preprocess_fema.R
@@ -22,6 +22,8 @@ library(geos)
 library(fastmap)
 library(nngeo)
 
+# devtools::install_github("anguswg-ucsb/hydrofabric3D")
+
 # TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop
 # TODO: Delete old files as needed
 
@@ -33,7 +35,8 @@ library(nngeo)
 
 # Default is TRUE (i.e. a fresh processing run is done from start to finish)
 OVERWRITE_FEMA_FILES  <- TRUE
-DELETE_STAGING_GPKGS  <- TRUE
+DELETE_STAGING_GPKGS  <- FALSE
+# DELETE_STAGING_GPKGS  <- TRUE
 
 # -------------------------------------------------------------------------------------
 # ---- Create directories (if they do NOT exist) ----
@@ -337,8 +340,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
 # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
 # -------------------------------------------------------------------------------------
 
-DELETE_STAGING_GPKGS <- F
-
 for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
   # for (i in 1:4) {
   # vpu_dir = FEMA_VPU_SUBFOLDERS2[12]
@@ -399,8 +400,16 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
 # -------------------------------------------------------------------------------------
 # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
 # -------------------------------------------------------------------------------------
+# for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
+#   
+#   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
+#   VPU        <- basename(vpu_dir)
+#   
+#   message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
+# }
 
-for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
+  
   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
   VPU        <- basename(vpu_dir)
   
@@ -411,6 +420,9 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   master_gpkg_name  <- paste0(master_name, ".gpkg")
   master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
   
+  updated_gpkg_name  <- gsub(".gpkg", "_output.gpkg", master_gpkg_name)
+  updated_filepath   <- paste0(vpu_dir, "/", updated_gpkg_name)
+  
   message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
   
   if(!file.exists(master_filepath)) { 
@@ -423,21 +435,12 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   fema_vpu <- sf::read_sf(master_filepath)
   
   # fema_vpu %>% sf::st_geometry_type() %>% unique()
-  
-  fema_vpu <- 
-    fema_vpu %>% 
-    nngeo::st_remove_holes(max_area = 200) %>%
-    # dplyr::select(geometry = geom) %>%
-    add_predicate_group_id(sf::st_intersects) %>% 
-    sf::st_make_valid() %>%
-    dplyr::group_by(group_id) %>% 
-    dplyr::summarise(
-      geometry = sf::st_combine(sf::st_union(geometry))
-    ) %>% 
-    dplyr::ungroup() %>% 
-    dplyr::select(-group_id) %>%
-    add_predicate_group_id(sf::st_intersects)
-  
+  # fema_vpu %>% mapview::npts()
+  # fema_vpu %>% sf::st_is_valid() %>% all()
+  # fema_vpu %>% 
+  #   sf::st_make_valid() %>% 
+  #   sf::st_geometry_type() %>% 
+  #   unique()
   geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
   
   message("Geometry counts before casting all geometries to MULTIPOLYGON:")
@@ -445,20 +448,122 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
     message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
   }
   
-  message("Keeping only POLYGON and MULTIPOLYGON geometries...")
-  fema_vpu <- 
-    fema_vpu %>%
-    dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
-    sf::st_cast("MULTIPOLYGON") %>%
-    sf::st_make_valid() %>%
-    # dplyr::group_by(group_id) %>% 
-    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
-    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
-    nngeo::st_remove_holes(max_area = 200) %>% 
-    dplyr::mutate(
-      fema_id = as.character(1:dplyr::n())
-    ) %>% 
-    dplyr::select(fema_id, geometry)
+  # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
+  #       mapview::mapview(fema_union, color = 'green', col.regions = 'white')
+  
+  # fema_vpu %>% 
+  #   sf::st_make_valid() %>% 
+  #   dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+  #   sf::st_is_valid() %>% 
+  #   all()
+  
+  tryCatch({
+    
+    fema_vpu <- 
+      fema_vpu %>% 
+      nngeo::st_remove_holes(max_area = 200) %>% 
+      sf::st_make_valid() %>%
+      # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+      add_predicate_group_id(sf::st_intersects) %>% 
+      dplyr::group_by(group_id) %>% 
+      rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+      rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+      dplyr::ungroup() %>% 
+      nngeo::st_remove_holes(max_area = 200) %>% 
+      dplyr::mutate(      
+        vpu      = gsub("VPU_", "", VPU),
+        fema_id  = as.character(1:dplyr::n())
+      ) %>% 
+      dplyr::select(
+        vpu, fema_id,
+        # state, 
+        geom = geometry
+      )
+    
+  }, error = function(e) {
+    message(VPU, " threw into the following error \n ", e)
+    message(" > Cleaning ", VPU, " using a backup cleaning strategy...")
+    
+    fema_vpu <- 
+      fema_vpu %>% 
+      sf::st_make_valid() %>% 
+      dplyr::mutate(      
+        vpu      = gsub("VPU_", "", VPU),
+        fema_id  = as.character(1:dplyr::n())
+      ) %>% 
+      dplyr::select(
+        vpu, fema_id,
+        # state, 
+        geom
+      )
+  
+    })
+  
+    # fema_vpu2 <- 
+    #   fema_vpu %>% 
+    #   nngeo::st_remove_holes(max_area = 200) %>% 
+    #   # sf::st_make_valid() %>% 
+    #   # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+    #   add_predicate_group_id(sf::st_intersects) %>% 
+    #   dplyr::group_by(group_id) %>% 
+    #   dplyr::summarise(
+    #     geometry = sf::st_combine(sf::st_union(geometry))
+    #   ) %>%
+    #   rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+    #   dplyr::ungroup() %>% 
+    #   nngeo::st_remove_holes(max_area = 200)
+      
+  # fema_vpu <- 
+  #   fema_vpu %>% 
+  #   dplyr::mutate(      
+  #     vpu      = gsub("VPU_", "", VPU),
+  #     fema_id  = as.character(1:dplyr::n())
+  #   ) %>% 
+  #   dplyr::select(
+  #       vpu, fema_id,
+  #       # state, 
+  #       geom = geometry
+  #       )
+  
+  
+  # fema_vpu <- 
+  #   fema_vpu %>% 
+  #   nngeo::st_remove_holes(max_area = 200) %>%
+  #   # dplyr::select(geometry = geom) %>%
+  #   add_predicate_group_id(sf::st_intersects) %>% 
+  #   sf::st_make_valid() %>%
+  #   dplyr::group_by(group_id) %>% 
+  #   dplyr::summarise(
+  #     geometry = sf::st_combine(sf::st_union(geometry))
+  #   ) %>% 
+  #   dplyr::ungroup() %>% 
+  #   dplyr::select(-group_id) %>%
+  #   add_predicate_group_id(sf::st_intersects)
+  # 
+  # fema_vpu %>% sf::st_geometry_type() %>% unique()
+  # fema_vpu %>% mapview::npts()
+  # 
+  # geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+  # 
+  # message("Geometry counts before casting all geometries to MULTIPOLYGON:")
+  # for (g in seq_along(geom_type_counts)) {
+  #   message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
+  # }
+  # 
+  # message("Keeping only POLYGON and MULTIPOLYGON geometries...")
+  # fema_vpu <- 
+  #   fema_vpu %>%
+  #   dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
+  #   sf::st_cast("MULTIPOLYGON") %>%
+  #   sf::st_make_valid() %>%
+  #   # dplyr::group_by(group_id) %>% 
+  #   rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
+  #   rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
+  #   nngeo::st_remove_holes(max_area = 200) %>% 
+  #   dplyr::mutate(
+  #     fema_id = as.character(1:dplyr::n())
+  #   ) %>% 
+  #   dplyr::select(fema_id, geometry)
   
   # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
   # message("Geometry counts after all processing steps: ")
@@ -468,24 +573,23 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   
   # fema_vpu2 %>% mapview::npts()
   # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
-  
   # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
   #       mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white')
   #   # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
   # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
   
-  fema_vpu <-
-    fema_vpu %>%
-    # dplyr::group_by(source) %>%
-    dplyr::mutate(
-      # state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
-      vpu      = gsub("VPU_", "", VPU),
-      fema_id  = 1:dplyr::n()
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(vpu, fema_id, 
-                  # state, 
-                  geom = geometry)
+  # fema_vpu <-
+  #   fema_vpu %>%
+  #   # dplyr::group_by(source) %>%
+  #   dplyr::mutate(
+  #     # state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
+  #     vpu      = gsub("VPU_", "", VPU),
+  #     fema_id  = 1:dplyr::n()
+  #   ) %>%
+  #   dplyr::ungroup() %>%
+  #   dplyr::select(vpu, fema_id, 
+  #                 # state, 
+  #                 geom = geometry)
   
   if (OVERWRITE_FEMA_FILES) {
     
@@ -496,9 +600,10 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
     
     sf::write_sf(
       fema_vpu,
-      master_filepath
+      updated_filepath
+      # master_filepath
       # union_file_path
     )
   }
   message()
-}
\ No newline at end of file
+}
diff --git a/runners/cs_runner/preprocess_fema2.R b/runners/cs_runner/preprocess_fema2.R
deleted file mode 100644
index 2141946..0000000
--- a/runners/cs_runner/preprocess_fema2.R
+++ /dev/null
@@ -1,504 +0,0 @@
-# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3
-# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages 
-
-# Processing steps:
-# - Convert FGBs to GEOJSON (via ogr2ogr)
-# - Simplifies
-# - Dissolves
-# - Explodes
-# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr)
-# - Apply hydrofab::clean_geometry()
-# - Partition FEMA 100 geometries by VPU      # TODO still
-# - Get FEMA bounding box geometries (maybe)
-
-# load config variables
-source("runners/cs_runner/config_vars.R")
-source("runners/cs_runner/config.R")
-source("runners/cs_runner/utils.R")
-
-library(dplyr)
-library(sf)
-library(geos)
-library(fastmap)
-library(nngeo)
-
-# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop
-# TODO: Delete old files as needed
-
-# -------------------------------------------------------------------------------------
-# ---- OVERWRITE_FEMA_FILES constant logical ----
-# ---- > if TRUE, processing steps will be run again 
-#          and overwrite existing previously processed files
-# -------------------------------------------------------------------------------------
-
-# Default is TRUE (i.e. a fresh processing run is done from start to finish)
-OVERWRITE_FEMA_FILES  <- TRUE
-DELETE_STAGING_GPKGS  <- TRUE
-
-# -------------------------------------------------------------------------------------
-# ---- Create directories (if they do NOT exist) ----
-# -------------------------------------------------------------------------------------
-
-# create directory for cleaned FEMA geometries as geopackages (if not exists) 
-if (!dir.exists(FEMA_GPKG_PATH)) {
-  message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
-  dir.create(FEMA_GPKG_PATH)
-}
-
-# create directory for FEMA geomteries partioned by VPU
-if (!dir.exists(FEMA_BY_VPU_PATH)) {
-  message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'"))
-  dir.create(FEMA_BY_VPU_PATH)
-}
-
-for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) {
-  # create directory for FEMA geomteries by VPU
-  # message(VPU_SUBFOLDER)
-  
-  # state_dir  = paste0(VPU_SUBFOLDER, "/states/")
-  # merged_dir = paste0(VPU_SUBFOLDER, "/merged/")
-  
-  if (!dir.exists(VPU_SUBFOLDER)) {
-    message("Creating FEMA VPU subfolder...")
-    message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n  Creating directory:\n > '", VPU_SUBFOLDER, "'"))
-    dir.create(VPU_SUBFOLDER)
-  }
-  # if (!dir.exists(state_dir)) { 
-  #   message("Creating FEMA VPU states subfolder...")
-  #   message(paste0("'/", basename(state_dir), "' directory does not exist...\n  Creating directory:\n > '", state_dir, "'"))
-  #   dir.create(state_dir)
-  # }
-  # if (!dir.exists(merged_dir)) { 
-  #   message("Creating FEMA VPU merged subfolder...")
-  #   message(paste0("'/", basename(merged_dir), "' directory does not exist...\n  Creating directory:\n > '", merged_dir, "'"))
-  #   dir.create(merged_dir)
-  # }
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Get paths to downloaded FEMA 100 FGBs ----
-# -------------------------------------------------------------------------------------
-
-FEMA_FILENAMES        <- list.files(FEMA_FGB_PATH, full.names = FALSE)
-FEMA_FILE_PATHS       <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
-
-for (file in FEMA_FILENAMES) {
-  
-  STAGING_FILES_TO_DELETE <- c()
-  
-  # Convert FGB to GeoJSON
-  local_fema_path   <- paste0(FEMA_FGB_PATH, "/", file)
-  geojson_filename  <- gsub(".fgb", ".geojson", file)
-  geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename)
-  
-  message("FEMA filename: '", file, "'")
-  message("Converting \n > '", file, "' to geojson '", geojson_filename, "'")
-  
-  geojson_exists  <- file.exists(geojson_save_path)
-  
-  message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
-  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
-  
-  ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
-  
-  if (OVERWRITE_FEMA_FILES || !geojson_exists) {
-    system(ogr2ogr_command)
-    message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'")
-    
-    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path)
-  }
-  
-  # Clean GeoJSON
-  message("Simplify, dissolve, explode > '", geojson_filename, "'")
-  output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename)
-  output_clean_geojson_path     <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename)
-  
-  clean_geojson_exists  <- file.exists(output_clean_geojson_path)
-  message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
-  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
-  
-  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, 
-                             ' -dissolve2 FLD_AR_ID \\', 
-                             ' -simplify 0.1 visvalingam \\', 
-                             ' -snap \\',
-                             ' -o ', output_clean_geojson_path
-                             )
-  
-  
-  if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) {
-    message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
-    system(mapshaper_command)
-    message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'")
-    
-    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
-  }
-  
-  # Convert cleaned GeoJSON to GeoPackage
-  message("Fema 100 year flood plain:\n > '", output_clean_filename, "'")
-  
-  output_gpkg_filename  <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename)
-  output_gpkg_path      <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
-  
-  message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'")
-  
-  clean_gpkg_exists <- file.exists(output_gpkg_path)
-  message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists)
-  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
-  
-  ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path)
-  
-  if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) {
-    system(ogr2ogr_command)
-    message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'")
-  }
-  
-  message("Deleting intermediary files\n")
-  for (delete_file in STAGING_FILES_TO_DELETE) {
-    if (file.exists(delete_file)) {
-      message("Deleting >>> '", delete_file, "'")
-      file.remove(delete_file)
-    }
-    
-  }
-  
-  message()
-  
-}
-
-# -------------------------------------------------------------------------------------------------------------------
-# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
-# -------------------------------------------------------------------------------------------------------------------
-
-# paths to FEMA 100 year flood plain files
-FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
-
-for (file_path in FEMA_gpkg_paths) {
-  message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
-  
-  fema <- sf::read_sf(file_path)
-  
-  fema <-
-    fema[!sf::st_is_empty(fema), ] %>% 
-    sf::st_transform(5070)
-  
-  #  TODO: Snap using geos::geos_snap()
-  # fema <-
-  #   geos::geos_snap(
-  #     geos::as_geos_geometry(fema),
-  #     geos::as_geos_geometry(fema),
-  #     tolerance = 1
-  #     ) %>%
-  #   geos::geos_make_valid()  %>%
-  #   sf::st_as_sf()
-  
-  # TODO: we get this error when trying to use the geometry column after geos snapping
-  # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]."
-  # fema %>%
-  # sf::st_cast("POLYGON")
-  
-  # TODO: Snap using sf::st_snap()
-  # fema <- sf::st_snap(
-  #             fema,
-  #             fema,
-  #             tolerance = 2
-  #             )
-  
-  fema <-
-    fema %>% 
-    # fema[!sf::st_is_empty(fema), ] %>% 
-    dplyr::select(geometry = geom) %>%
-    add_predicate_group_id(sf::st_intersects) %>% 
-    sf::st_make_valid() %>% 
-    dplyr::group_by(group_id) %>% 
-    dplyr::summarise(
-      geometry = sf::st_combine(sf::st_union(geometry))
-    ) %>% 
-    dplyr::ungroup() %>% 
-    dplyr::select(-group_id) %>% 
-    add_predicate_group_id(sf::st_intersects) %>% 
-    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
-    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
-    dplyr::mutate(
-      fema_id = as.character(1:dplyr::n())
-    ) %>% 
-    dplyr::select(fema_id, geometry)
-  
-  # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
-  # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
-  
-  fema <- 
-    fema %>% 
-    dplyr::mutate(
-      source = basename(file_path),
-      state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
-    ) %>%
-    dplyr::select(fema_id, source, state, 
-                  # areasqkm, 
-                  geometry)
-  
-  message("End time: ", Sys.time())
-  
-  if (OVERWRITE_FEMA_FILES) {
-    message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
-    sf::write_sf(
-      # fema_clean,
-      fema,
-      file_path
-    )
-  }
-  message()
-  
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- 
-# -------------------------------------------------------------------------------------
-
-# Clean FEMA GPKG files
-FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
-
-# paths to nextgen datasets and model attribute parquet files
-NEXTGEN_FILENAMES    <- list.files(nextgen_dir, full.names = FALSE)
-NEXTGEN_FILE_PATHS   <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
-
-for (file_path in FEMA_CLEAN_GPKG_PATHS) {
-  
-  fema_file <- basename(file_path)
-  
-  message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'")
-  
-  # read in fema polygons
-  fema <- sf::read_sf(file_path)
-  
-  for (nextgen_path in NEXTGEN_FILE_PATHS) {
-    nextgen_basename <- basename(nextgen_path)
-    vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
-    
-    message("VPU: ", vpu)   
-    message("- nextgen gpkg:\n > '", nextgen_path, "'")   
-    message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
-    
-    # read in nextgen flowlines 
-    flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-    
-    # get the FEMA polygons that intersect with the nextgen flowlines
-    fema_intersect <- polygons_with_line_intersects(fema, flines)
-    
-    fema_in_nextgen <-  nrow(fema_intersect) != 0
-    
-    message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
-    
-    if(fema_in_nextgen) {
-      
-      # create filepaths
-      vpu_subfolder      <- paste0("VPU_", vpu)
-      # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
-      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
-      
-      # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
-      
-      fema_intersect <-
-        fema_intersect %>%
-        dplyr::mutate(
-          vpu = vpu
-        ) %>%
-        dplyr::select(vpu, fema_id, source, state, geom)
-      
-      # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
-      
-      fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
-      fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
-      
-      
-      if (OVERWRITE_FEMA_FILES) {
-        message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
-        
-        sf::write_sf(
-          fema_intersect,
-          fema_vpu_path
-        )
-      }
-      
-      
-    }
-    message()
-  }
-  
-  
-  message(
-    "--------------------------------------------------------------\n", 
-    "Completed all VPU intersections for: \n > '", fema_file, "'",
-    "\n--------------------------------------------------------------\n"
-  )
-  
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
-# -------------------------------------------------------------------------------------
-
-DELETE_STAGING_GPKGS <- F
-
-for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
-  # for (i in 1:4) {
-  # vpu_dir = FEMA_VPU_SUBFOLDERS2[12]
-  message("Merging files in '", basename(vpu_dir), "' directory...")
-  # }
-  
-  vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
-  
-  # path to the merged directory where the final merged geopackge will end up
-  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
-  master_gpkg_name  <- paste0(master_name, ".gpkg")
-  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
-  
-  # fema state geopackages partioned for the specific VPU
-  fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE)
-  
-  # make sure to ignore the master file if it already exists
-  fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath]
-  
-  for(gpkg_file in fema_state_gpkgs) {
-    # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
-    #         basename(gpkg_file), " > ", basename(master_filepath), 
-    #         "'")
-    message(" > '", 
-            basename(gpkg_file), " > ", basename(master_filepath), 
-            "'")
-    
-    ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",   
-                                    master_filepath, 
-                                    " ", gpkg_file
-    )
-    
-    if (OVERWRITE_FEMA_FILES) {
-      system(ogr2ogr_merge_command)
-    }
-  }
-  
-  has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0
-  
-  if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) {
-    message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...")
-    # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'")
-    
-    remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
-    
-    for (remove_cmd in remove_gpkg_cmds) {
-      message("  >  '", remove_cmd, "'")
-      system(remove_cmd)
-    }
-  }
-  
-  # message()
-  message("Merge complete!")
-  message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'")
-  message()
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
-# -------------------------------------------------------------------------------------
-
-for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
-  vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
-  VPU        <- basename(vpu_dir)
-  
-  message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
-
-  # path to the merged directory where the final merged geopackage will end up
-  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
-  master_gpkg_name  <- paste0(master_name, ".gpkg")
-  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
-  
-  message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
-  
-  if(!file.exists(master_filepath)) { 
-    message("No FEMA geometries in '", VPU, "'")
-    message()
-    next
-  }
-  
-  
-  fema_vpu <- sf::read_sf(master_filepath)
-  
-  # fema_vpu %>% sf::st_geometry_type() %>% unique()
-
-  fema_vpu <- 
-    fema_vpu %>% 
-    nngeo::st_remove_holes(max_area = 200) %>%
-    # dplyr::select(geometry = geom) %>%
-    add_predicate_group_id(sf::st_intersects) %>% 
-    sf::st_make_valid() %>%
-    dplyr::group_by(group_id) %>% 
-    dplyr::summarise(
-      geometry = sf::st_combine(sf::st_union(geometry))
-    ) %>% 
-    dplyr::ungroup() %>% 
-    dplyr::select(-group_id) %>%
-    add_predicate_group_id(sf::st_intersects)
-  
-  geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
-  
-  message("Geometry counts before casting all geometries to MULTIPOLYGON:")
-  for (g in seq_along(geom_type_counts)) {
-    message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
-  }
-  
-  message("Keeping only POLYGON and MULTIPOLYGON geometries...")
-  fema_vpu <- 
-    fema_vpu %>%
-    dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
-    sf::st_cast("MULTIPOLYGON") %>%
-    sf::st_make_valid() %>%
-    # dplyr::group_by(group_id) %>% 
-    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
-    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
-    nngeo::st_remove_holes(max_area = 200) %>% 
-    dplyr::mutate(
-      fema_id = as.character(1:dplyr::n())
-    ) %>% 
-    dplyr::select(fema_id, geometry)
-  
-  # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
-  # message("Geometry counts after all processing steps: ")
-  # for (g in seq_along(end_geom_type_counts)) {
-  #   message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g])
-  # }
-  
-  # fema_vpu2 %>% mapview::npts()
-  # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
-  
-  # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
-  #       mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white')
-  #   # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
-  # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
-  
-  fema_vpu <-
-    fema_vpu %>%
-    # dplyr::group_by(source) %>%
-    dplyr::mutate(
-      # state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
-      vpu      = gsub("VPU_", "", VPU),
-      fema_id  = 1:dplyr::n()
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(vpu, fema_id, 
-                  # state, 
-                  geom = geometry)
-  
-  if (OVERWRITE_FEMA_FILES) {
-    
-    message("> Overwritting '", basename(master_filepath), "' with final clean version...")
-    
-    # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
-    # message("> writting '", basename(union_file_path), "' (unioned and exploded version)")
-    
-    sf::write_sf(
-      fema_vpu,
-      master_filepath
-      # union_file_path
-    )
-  }
-  message()
-}

From 5b449f782b726e27accc1d4fbd1e7c183fb8f504 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 17 Jul 2024 13:59:48 -0700
Subject: [PATCH 45/64] moved variables for running partition_fema_by_vpu.R and
 renamed process_fema.R to parition_fema_by_vpu, added fema download and
 processing to 04_driver script

---
 runners/cs_runner/01_transects.R              |  2 +-
 runners/cs_runner/04_driver.R                 |  6 +++
 runners/cs_runner/config_vars.R               | 10 +++++
 ...process_fema.R => partition_fema_by_vpu.R} | 43 ++-----------------
 4 files changed, 20 insertions(+), 41 deletions(-)
 rename runners/cs_runner/{preprocess_fema.R => partition_fema_by_vpu.R} (94%)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index e8d0476..f88d216 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -38,7 +38,7 @@ for(i in 1:nrow(path_df)) {
   # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
 
   vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
-  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_union.gpkg"), vpu_fema_files)]
+  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)]
   
 
   # # model attributes file and full path
diff --git a/runners/cs_runner/04_driver.R b/runners/cs_runner/04_driver.R
index af8e1ca..a19a7c4 100644
--- a/runners/cs_runner/04_driver.R
+++ b/runners/cs_runner/04_driver.R
@@ -6,6 +6,12 @@ source("runners/cs_runner/config.R")
 # downloads nextgen datasets 
 source("runners/cs_runner/download_nextgen.R")
 
+# download FEMA100 year FGBs
+source("runners/cs_runner/download_fema100.R")
+
+# simplify, dissolve, FEMA polygons and partition FEMA polygons by VPU 
+source("runners/cs_runner/partition_fema_by_vpu.R")
+
 # generate and upload transects datasets 
 source("runners/cs_runner/01_transects.R")
 
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 47483c9..492611f 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -38,6 +38,16 @@ FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
 #                           )
 #                         )
 
+# -------------------------------------------------------------------------------------
+# ---- OVERWRITE_FEMA_FILES constant logical ----
+# ---- > if TRUE, processing steps will be run again 
+#          and overwrite existing previously processed files
+# -------------------------------------------------------------------------------------
+
+# Default is TRUE (i.e. a fresh processing run is done from start to finish)
+OVERWRITE_FEMA_FILES  <- TRUE
+DELETE_STAGING_GPKGS  <- TRUE # remove intermediary files from the main output folder
+
 # ----------------------------------------------------------------------------
 # ---- Cross section point extraction constant variables ----
 # ----------------------------------------------------------------------------
diff --git a/runners/cs_runner/preprocess_fema.R b/runners/cs_runner/partition_fema_by_vpu.R
similarity index 94%
rename from runners/cs_runner/preprocess_fema.R
rename to runners/cs_runner/partition_fema_by_vpu.R
index ee08e25..48447d8 100644
--- a/runners/cs_runner/preprocess_fema.R
+++ b/runners/cs_runner/partition_fema_by_vpu.R
@@ -35,8 +35,7 @@ library(nngeo)
 
 # Default is TRUE (i.e. a fresh processing run is done from start to finish)
 OVERWRITE_FEMA_FILES  <- TRUE
-DELETE_STAGING_GPKGS  <- FALSE
-# DELETE_STAGING_GPKGS  <- TRUE
+DELETE_STAGING_GPKGS  <- TRUE
 
 # -------------------------------------------------------------------------------------
 # ---- Create directories (if they do NOT exist) ----
@@ -498,32 +497,7 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
       )
   
     })
-  
-    # fema_vpu2 <- 
-    #   fema_vpu %>% 
-    #   nngeo::st_remove_holes(max_area = 200) %>% 
-    #   # sf::st_make_valid() %>% 
-    #   # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
-    #   add_predicate_group_id(sf::st_intersects) %>% 
-    #   dplyr::group_by(group_id) %>% 
-    #   dplyr::summarise(
-    #     geometry = sf::st_combine(sf::st_union(geometry))
-    #   ) %>%
-    #   rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
-    #   dplyr::ungroup() %>% 
-    #   nngeo::st_remove_holes(max_area = 200)
-      
-  # fema_vpu <- 
-  #   fema_vpu %>% 
-  #   dplyr::mutate(      
-  #     vpu      = gsub("VPU_", "", VPU),
-  #     fema_id  = as.character(1:dplyr::n())
-  #   ) %>% 
-  #   dplyr::select(
-  #       vpu, fema_id,
-  #       # state, 
-  #       geom = geometry
-  #       )
+
   
   
   # fema_vpu <- 
@@ -539,17 +513,11 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
   #   dplyr::ungroup() %>% 
   #   dplyr::select(-group_id) %>%
   #   add_predicate_group_id(sf::st_intersects)
-  # 
-  # fema_vpu %>% sf::st_geometry_type() %>% unique()
-  # fema_vpu %>% mapview::npts()
-  # 
   # geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
-  # 
   # message("Geometry counts before casting all geometries to MULTIPOLYGON:")
   # for (g in seq_along(geom_type_counts)) {
   #   message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
   # }
-  # 
   # message("Keeping only POLYGON and MULTIPOLYGON geometries...")
   # fema_vpu <- 
   #   fema_vpu %>%
@@ -575,9 +543,6 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
   # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
   # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
   #       mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white')
-  #   # mapview::mapview(fema_vpu2_subset, color = 'green', col.regions = 'white')
-  # # mapview::mapview(fema_vpu2[1:100, ], color = 'green', col.regions = 'white')
-  
   # fema_vpu <-
   #   fema_vpu %>%
   #   # dplyr::group_by(source) %>%
@@ -587,9 +552,7 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
   #     fema_id  = 1:dplyr::n()
   #   ) %>%
   #   dplyr::ungroup() %>%
-  #   dplyr::select(vpu, fema_id, 
-  #                 # state, 
-  #                 geom = geometry)
+  #   dplyr::select(vpu, fema_id, geom = geometry)
   
   if (OVERWRITE_FEMA_FILES) {
     

From c81675c73a2225c7ca9e63848a176ff0a1dfebe6 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 18 Jul 2024 04:56:40 -0700
Subject: [PATCH 46/64] small cleanups

---
 runners/cs_runner/partition_fema_by_vpu.R | 99 +++++------------------
 1 file changed, 22 insertions(+), 77 deletions(-)

diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R
index 48447d8..03f224f 100644
--- a/runners/cs_runner/partition_fema_by_vpu.R
+++ b/runners/cs_runner/partition_fema_by_vpu.R
@@ -341,7 +341,8 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
 
 for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
   # for (i in 1:4) {
-  # vpu_dir = FEMA_VPU_SUBFOLDERS2[12]
+  # i = 1
+  # vpu_dir = FEMA_VPU_SUBFOLDERS2[i]
   message("Merging files in '", basename(vpu_dir), "' directory...")
   # }
   
@@ -352,25 +353,31 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
   master_gpkg_name  <- paste0(master_name, ".gpkg")
   master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
   
+  # if the file already exists, remove it so we dont OVER append data to the "master file"
+  if (file.exists(master_filepath)) {
+    file.remove(master_filepath)
+  }
+  
   # fema state geopackages partioned for the specific VPU
-  fema_state_gpkgs <- list.files(vpu_dir, full.names = TRUE)
+  fema_state_gpkgs        <- list.files(vpu_dir, full.names = TRUE)
+  master_output_filepath  <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name))
   
   # make sure to ignore the master file if it already exists
-  fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath]
+  fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath]
   
   for(gpkg_file in fema_state_gpkgs) {
-    # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '", 
-    #         basename(gpkg_file), " > ", basename(master_filepath), 
+    # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '",
+    #         basename(gpkg_file), " > ", basename(master_filepath),
     #         "'")
-    message(" > '", 
-            basename(gpkg_file), " > ", basename(master_filepath), 
+    message(" > '",
+            basename(gpkg_file), " > ", basename(master_filepath),
             "'")
-    
-    ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",   
-                                    master_filepath, 
+
+    ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",
+                                    master_filepath,
                                     " ", gpkg_file
     )
-    
+
     if (OVERWRITE_FEMA_FILES) {
       system(ogr2ogr_merge_command)
     }
@@ -399,15 +406,8 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
 # -------------------------------------------------------------------------------------
 # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
 # -------------------------------------------------------------------------------------
-# for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
-#   
-#   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
-#   VPU        <- basename(vpu_dir)
-#   
-#   message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
-# }
 
-for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
+for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   
   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
   VPU        <- basename(vpu_dir)
@@ -440,6 +440,7 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
   #   sf::st_make_valid() %>% 
   #   sf::st_geometry_type() %>% 
   #   unique()
+  
   geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
   
   message("Geometry counts before casting all geometries to MULTIPOLYGON:")
@@ -461,10 +462,10 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
     fema_vpu <- 
       fema_vpu %>% 
       nngeo::st_remove_holes(max_area = 200) %>% 
-      sf::st_make_valid() %>%
+      # sf::st_make_valid() %>%
       # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
       add_predicate_group_id(sf::st_intersects) %>% 
-      dplyr::group_by(group_id) %>% 
+      dplyr::group_by(group_id) %>%
       rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
       rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
       dplyr::ungroup() %>% 
@@ -497,62 +498,6 @@ for (i in 5:length(FEMA_VPU_SUBFOLDERS)) {
       )
   
     })
-
-  
-  
-  # fema_vpu <- 
-  #   fema_vpu %>% 
-  #   nngeo::st_remove_holes(max_area = 200) %>%
-  #   # dplyr::select(geometry = geom) %>%
-  #   add_predicate_group_id(sf::st_intersects) %>% 
-  #   sf::st_make_valid() %>%
-  #   dplyr::group_by(group_id) %>% 
-  #   dplyr::summarise(
-  #     geometry = sf::st_combine(sf::st_union(geometry))
-  #   ) %>% 
-  #   dplyr::ungroup() %>% 
-  #   dplyr::select(-group_id) %>%
-  #   add_predicate_group_id(sf::st_intersects)
-  # geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
-  # message("Geometry counts before casting all geometries to MULTIPOLYGON:")
-  # for (g in seq_along(geom_type_counts)) {
-  #   message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
-  # }
-  # message("Keeping only POLYGON and MULTIPOLYGON geometries...")
-  # fema_vpu <- 
-  #   fema_vpu %>%
-  #   dplyr::filter(sf::st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
-  #   sf::st_cast("MULTIPOLYGON") %>%
-  #   sf::st_make_valid() %>%
-  #   # dplyr::group_by(group_id) %>% 
-  #   rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>%
-  #   rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>%
-  #   nngeo::st_remove_holes(max_area = 200) %>% 
-  #   dplyr::mutate(
-  #     fema_id = as.character(1:dplyr::n())
-  #   ) %>% 
-  #   dplyr::select(fema_id, geometry)
-  
-  # end_geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
-  # message("Geometry counts after all processing steps: ")
-  # for (g in seq_along(end_geom_type_counts)) {
-  #   message(" > ", names(end_geom_type_counts[g]), ": ", end_geom_type_counts[g])
-  # }
-  
-  # fema_vpu2 %>% mapview::npts()
-  # fema_vpu2_subset <- fema_vpu2[lengths(sf::st_intersects(fema_vpu2, fema_vpu[1:100, ]))  > 1, ]
-  # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
-  #       mapview::mapview(fema_vpu2, color = 'green', col.regions = 'white')
-  # fema_vpu <-
-  #   fema_vpu %>%
-  #   # dplyr::group_by(source) %>%
-  #   dplyr::mutate(
-  #     # state    = tolower(gsub("-100yr-flood_valid_clean.gpkg", "", source)),
-  #     vpu      = gsub("VPU_", "", VPU),
-  #     fema_id  = 1:dplyr::n()
-  #   ) %>%
-  #   dplyr::ungroup() %>%
-  #   dplyr::select(vpu, fema_id, geom = geometry)
   
   if (OVERWRITE_FEMA_FILES) {
     

From 64f5b156bbd4b11d55a3629554ae514d81692297 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 18 Jul 2024 12:15:06 -0700
Subject: [PATCH 47/64] small cleanups and set fema simplification to 1% but to
 keep all shapes

---
 runners/cs_runner/01_transects.R | 144 +++----------------------------
 1 file changed, 11 insertions(+), 133 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index f88d216..de1bd90 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -26,7 +26,7 @@ path_df <- align_files_by_vpu(
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
-   # i = 8
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -34,17 +34,12 @@ for(i in 1:nrow(path_df)) {
   vpu <- path_df$vpu[i]
 
   # Get FEMA by VPU directory and files for current VPU 
-  fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+  fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))])
   # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
 
   vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
-  vpu_fema_file <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)]
+  vpu_fema_file  <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)]
   
-
-  # # model attributes file and full path
-  # model_attr_file <- path_df$y[i]
-  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
-
   message("Creating VPU ", vpu, " transects:", 
           "\n - flowpaths: '",
           nextgen_file, "'",
@@ -57,29 +52,12 @@ for(i in 1:nrow(path_df)) {
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
-  # # model attributes
-  # model_attrs <- arrow::read_parquet(model_attr_path)
-
-  # # join flowlines with model atttributes
-  # flines <- dplyr::left_join(
-  #   flines,
-  #   dplyr::select(
-  #     model_attrs,
-  #     id, eTW
-  #   ), by = "id")
-
   # calculate bankfull width
   flines <-
     flines %>%
     dplyr::mutate(
       bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
     ) %>%
-    # dplyr::mutate( bf_width = 11 * eTW) %>%
-    # dplyr::mutate( # if there are any NAs, use exp(0.700    + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width
-    #   bf_width = dplyr::case_when(
-    #     is.na(bf_width) ~ exp(0.700    + 0.365* log(tot_drainage_areasqkm)),
-    #     TRUE            ~ bf_width
-    #   )) %>%
     dplyr::select(
       hy_id = id,
       lengthkm,
@@ -131,6 +109,7 @@ for(i in 1:nrow(path_df)) {
     dplyr::mutate(
       cs_source = net_source
     )
+  
   # ---------------------------------------------------------------------
   # --- Extend transects out to FEMA 100yr floodplains
   # ---------------------------------------------------------------------
@@ -140,29 +119,15 @@ for(i in 1:nrow(path_df)) {
   
   # mapview::npts(fema)
   message("Simplifying FEMA polygons...")
+  
   # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
-  # keep 10% of the original points for speed
-  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01)
-  fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.01, sys = TRUE, sys_mem = 16)
-
+  # keep 1% of the original points for speed
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.1, sys = TRUE, sys_mem = 16)
   # mapview::npts(fema)
-  
-  # # TODO: the flines argument needs the "hy_id" column to be named "id" 
-  # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons()
-  # flines <- 
-  #   flines %>% 
-  #   dplyr::rename(id = hy_id)
-  
+
   message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
   
-  # # TODO: hacky, need to fix the extend-trancterts to polygons function to not need these columns
-  # transects <- 
-  #   transects %>% 
-  #   dplyr::rename(geom = geometry) %>%
-  #   dplyr::mutate(
-  #     is_extended = FALSE
-  #   )
-  
   transects <- 
     transects  %>%
     dplyr::left_join(
@@ -193,8 +158,8 @@ for(i in 1:nrow(path_df)) {
   transects  <- dplyr::select(transects, -tmp_id)
   transects  <- hydrofabric3D::add_tmp_id(transects)
   
-  # transects <- 
-    transects3 %>%  
+  transects <-
+    transects %>%  
     # dplyr::select(-cs_lengthm) %>% 
     # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% 
     dplyr::select(
@@ -213,93 +178,6 @@ for(i in 1:nrow(path_df)) {
   gc()
    
   # # ---------------------------------------------------------------------
-  # # ---------------------------------------------------------------------
-  # 
-  # transects <- sf::read_sf(out_path)
-  # 
-  # # flines <-
-  # #   flines %>%
-  # #   dplyr::filter(hy_id %in% transects$hy_id)
-  # #   dplyr::slice(1:1000)
-  # 
-  # flines <-
-  #   flines %>%
-  #   dplyr::slice(seq(1, nrow(flines), 10))
-  # # 
-  # transects <-
-  #   transects %>%
-  #   dplyr::filter(hy_id %in% flines$hy_id)
-  # # 
-  # 
-  # # TODO: the flines argument needs the "hy_id" column to be named "id" 
-  # # TODO: probably should fix this in hydrofabric3D::get_transect_extension_distances_to_polygons()
-  # flines <- 
-  #   flines %>% 
-  #   dplyr::rename(id = hy_id)
-  # 
-  # 
-  #  # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01)
-  # 
-  # mapview::mapview(dplyr::filter(fema, fema_id %in% 1:10), col.regions = "red") + 
-  #   mapview::mapview(dplyr::filter(fema_keep, fema_id %in% 1:10), col.regions = "green") 
-  #   # mapview::mapview(dplyr::filter(fema_nokeep, fema_id %in% 1:10), col.regions = "dodgerblue")
-  # transects
-  # 
-  # # system.time({
-  # # profvis::profvis({
-  #   
-  # # TODO: make sure this 3000m extension distance is appropriate across VPUs 
-  # # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
-  # # transects2 <- hydrofabric3D::get_transect_extension_distances_to_polygons(
-  # #                                                 transect_lines         = transects, 
-  # #                                                 polygons               = fema, 
-  # #                                                 flines                 = flines, 
-  # #                                                 max_extension_distance = 3000 
-  # #                                                 )
-  # transects2 <- hydrofabric3D::extend_transects_to_polygons(
-  #   transect_lines         = transects, 
-  #   polygons               = fema, 
-  #   flowlines              = flines, 
-  #   max_extension_distance = 3000 
-  # )
-  # # })
-  # # })
-  # 
-  # # mapview::mapview(transects2, color = "green") + 
-  # # mapview::mapview(transects, color = "red") + 
-  # #    mapview::mapview(fema_keep, col.regions = "dodgerblue")
-  # # ---------------------------------------------------------------------
-  # # ----------------------------------------------------------------------------------------------------------------
-  # 
-  # transects2 <- dplyr::select(transects2, -tmp_id)
-  # transects2 <- hydrofabric3D::add_tmp_id(transects2)
-  # 
-  # extended_ids <- 
-  #   transects2 %>% 
-  #   dplyr::filter(left_is_extended | right_is_extended) %>% 
-  #   dplyr::pull(hy_id) %>% 
-  #   unique()
-  # 
-  # start_trans <- dplyr::filter(transects, hy_id %in% extended_ids[1:150])
-  #   end_trans <- dplyr::filter(transects2, hy_id %in% extended_ids[1:150])
-  # mapview::mapview(start_trans, color = "red")  + 
-  #   mapview::mapview(end_trans, color = "green")
-  # transects <- 
-  #   transects %>%  
-  #   dplyr::select(-cs_lengthm) %>% 
-  #   # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% 
-  #   dplyr::select(
-  #     hy_id, 
-  #     cs_id, 
-  #     # cs_lengthm,
-  #     cs_lengthm = new_cs_lengthm, 
-  #     cs_source,
-  #     cs_measure,
-  #     # is_extended,
-  #     # is_fema_extended,
-  #     geometry = geom
-  #   )
-  
   message("Saving transects to:\n - filepath: '", out_path, "'")
   
   # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)

From 16cd482c8dc7260e0c0d837d6ccaad42a6467e65 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Thu, 18 Jul 2024 12:16:57 -0700
Subject: [PATCH 48/64] removed extra 00_fema.R file

---
 runners/cs_runner/00_fema.R | 143 ------------------------------------
 1 file changed, 143 deletions(-)
 delete mode 100644 runners/cs_runner/00_fema.R

diff --git a/runners/cs_runner/00_fema.R b/runners/cs_runner/00_fema.R
deleted file mode 100644
index 11f55be..0000000
--- a/runners/cs_runner/00_fema.R
+++ /dev/null
@@ -1,143 +0,0 @@
-library(dplyr) 
-
-# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-source("runners/cs_runner/config.R")
-
-# transect bucket prefix
-transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
-
-# paths to nextgen datasets and model attribute parquet files
-nextgen_files   <- list.files(nextgen_dir, full.names = FALSE)
-FEMA_files      <- list.files(FEMA_FGB_PATH, full.names = FALSE)
-FEMA_BB_files   <- list.files(FEMA_FGB_BB_PATH, full.names = FALSE)
-transects_files <- list.files(transects_dir, full.names = FALSE)
-transects_files <- transects_files[!grepl("updated", transects_files)]
-
-# string to fill in "cs_source" column in output datasets
-net_source <- "hydrofabric3D"
-
-# ensure the files are in the same order and matched up by VPU
-path_df <- align_files_by_vpu(
-  x    = nextgen_files,
-  y    = transects_files,
-  base = base_dir
-)
-
-path_df
-
-us_states <- 
-  USAboundaries::us_states() %>% 
-  sf::st_transform(5070)
-
-# loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
-# for(i in 1:nrow(path_df)) {
-  
-  i = 8
-  
-  # nextgen file and full path
-  nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
-  
-  transect_file <- path_df$y[i]
-  transect_path <- paste0(transects_dir, transect_file)
-  
-  transect_path
-  
-  # # model attributes file and full path
-  # model_attr_file <- path_df$y[i]
-  # model_attr_path <- paste0(model_attr_dir, model_attr_file)
-  
-  message("Creating VPU ", path_df$vpu[i], "\n - transects: ", transect_file, "\n - flowpaths: '", nextgen_file, "'")
-  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
-  
-  # read in nextgen data
-  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-  
-  flines_bb <- 
-    flines %>% 
-    sf::st_bbox() %>% 
-    sf::st_as_sfc() %>% 
-    sf::st_as_sf()
-  
-  transects <- sf::read_sf(transect_path)
-  
-  
-  # find the states intersecting with the given VPU flowlines
-  intersecting_states <- 
-    sf::st_intersection(us_states, flines_bb) %>% 
-    sf::st_drop_geometry() %>% 
-    .$name %>% 
-    gsub(" ", "-", .)
-  
-  # get the matching FEMA floodplain FGB file names
-  matching_fema_files <- unlist(lapply(intersecting_states, function(state_name) {
-    FEMA_files[grepl(state_name, FEMA_files)]
-    }))
-  
-  # full paths
-  files_of_interest <-  paste0(FEMA_FGB_PATH, "/", matching_fema_files)
-  
-  # Iterate over each FEMA file and determine optimal widths for cross sections.....
-  # for (file in rev(files_of_interest)) {
-  
-  file = "/Users/anguswatters/Desktop/lynker-spatial/FEMA100/Tennessee-100yr-flood_valid.fgb" 
-    fema_fgb <- 
-      file %>% 
-      sf::read_sf() %>% 
-      sf::st_transform(5070)
-    
-    fema_bb <- 
-      fema_fgb %>% 
-      sf::st_bbox() %>% 
-      sf::st_as_sfc() %>% 
-      sf::st_as_sf()
-
-    # fline_subset <- 
-    # flines %>% 
-    # sf::st_intersection(fema_fgb)
-    
-    fline_fema_intersects <- sf::st_intersects(flines, fema_fgb)
-    fema_fline_intersects <- sf::st_intersects(fema_fgb, flines)
-    fema_subset$FLD_AR_ID %>% unique() %>% length()
-    
-    fema_subset <- 
-      fema_fgb[unlist(fema_fline_intersects), ]  %>% 
-      rmapshaper::ms_simplify()
-    
-    fema_subset$FLD_AR_ID %>% unique() %>% length()
-    
-    fema_subset <- fema_subset %>% 
-      rmapshaper::ms_dissolve(field = "FLD_AR_ID")
-    
-    
-    fema_subset %>% mapview::npts()
-    unlist(flines_with_fema)[1]
-    fema_fgb 
-    
-    flines_with_fema      <- flines[lengths(fline_fema_intersects) > 0,]
-    
-    
-    # flines %>% 
-    #   sf::st_filter(
-    #     fema_fgb, 
-    #     .predicate = st_touches
-    #   )
-    fema_subset %>% 
-      dplyr::filter()
-    fema_subset %>% mapview::npts()
-    
-    transects_subset <- 
-      transects %>% 
-      dplyr::filter(hy_id %in% flines_with_fema$id)
-    
-    flines %>% sf::st_crs()
-    mapview::mapview(flines) + fema_bb
-    
-    # mapview::mapview(fema_fgb, col.regions = "dodgerblue") +
-    mapview::mapview(fema_subset, col.regions = "dodgerblue") +
-      mapview::mapview(transects_subset, color = "red") +
-      mapview::mapview(flines_with_fema, color = "green") 
-    
-    message("file: ", file)
-  
-  # }
\ No newline at end of file

From 72b3e888a9a33a4e3217fe1de506fe636a321e98 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 19 Jul 2024 10:06:58 -0700
Subject: [PATCH 49/64] updated final step for processing fema polygons to use
 mapshaper instead of sf dissolve methods

---
 runners/cs_runner/partition_fema_by_vpu.R | 259 +++++++++++++++-------
 1 file changed, 184 insertions(+), 75 deletions(-)

diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R
index 03f224f..5a2d35b 100644
--- a/runners/cs_runner/partition_fema_by_vpu.R
+++ b/runners/cs_runner/partition_fema_by_vpu.R
@@ -402,12 +402,14 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
   message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'")
   message()
 }
-
 # -------------------------------------------------------------------------------------
-# ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
+# ----Apply simplify, dissolve, explode on the MERGED polygons  ---- 
 # -------------------------------------------------------------------------------------
 
+# list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]
 for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+  # i = 8
+  STAGING_FILES_TO_DELETE <- c()
   
   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
   VPU        <- basename(vpu_dir)
@@ -419,10 +421,14 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   master_gpkg_name  <- paste0(master_name, ".gpkg")
   master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
   
+  master_geojson_name       <- paste0(master_name, ".geojson")
+  master_geojson_filepath   <- paste0(vpu_dir, "/", master_geojson_name)
+  
   updated_gpkg_name  <- gsub(".gpkg", "_output.gpkg", master_gpkg_name)
   updated_filepath   <- paste0(vpu_dir, "/", updated_gpkg_name)
   
-  message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
+  message("VPU Merged FEMA filename: '", master_gpkg_name, "'")
+  message("> Simplifying, dissolve, exploding VPU aggregated FEMA polygons... '", basename(master_filepath), "'")
   
   if(!file.exists(master_filepath)) { 
     message("No FEMA geometries in '", VPU, "'")
@@ -430,88 +436,191 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
     next
   }
   
+  message("Converting \n > '", basename(master_filepath), "' to geojson '", master_geojson_name, "'")
   
-  fema_vpu <- sf::read_sf(master_filepath)
+  geojson_exists  <- file.exists(master_geojson_filepath)
   
-  # fema_vpu %>% sf::st_geometry_type() %>% unique()
-  # fema_vpu %>% mapview::npts()
-  # fema_vpu %>% sf::st_is_valid() %>% all()
-  # fema_vpu %>% 
-  #   sf::st_make_valid() %>% 
-  #   sf::st_geometry_type() %>% 
-  #   unique()
+  # message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
+  # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
-  geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+  gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath)
   
-  message("Geometry counts before casting all geometries to MULTIPOLYGON:")
-  for (g in seq_along(geom_type_counts)) {
-    message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
+  if (OVERWRITE_FEMA_FILES || !geojson_exists) {
+    system(gpkg_to_geojson_cmd)
+    message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath)
   }
   
-  # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
-  #       mapview::mapview(fema_union, color = 'green', col.regions = 'white')
+  # Clean GeoJSON
+  message("Simplify, dissolve, explode > '", master_geojson_name, "'")
+  output_clean_filename      <- gsub(".geojson", "_clean.geojson", master_geojson_name)
+  output_clean_geojson_path  <- paste0(vpu_dir, "/", output_clean_filename)
   
-  # fema_vpu %>% 
-  #   sf::st_make_valid() %>% 
-  #   dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
-  #   sf::st_is_valid() %>% 
-  #   all()
+  clean_geojson_exists  <- file.exists(output_clean_geojson_path)
+  message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
-  tryCatch({
-    
-    fema_vpu <- 
-      fema_vpu %>% 
-      nngeo::st_remove_holes(max_area = 200) %>% 
-      # sf::st_make_valid() %>%
-      # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
-      add_predicate_group_id(sf::st_intersects) %>% 
-      dplyr::group_by(group_id) %>%
-      rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
-      rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
-      dplyr::ungroup() %>% 
-      nngeo::st_remove_holes(max_area = 200) %>% 
-      dplyr::mutate(      
-        vpu      = gsub("VPU_", "", VPU),
-        fema_id  = as.character(1:dplyr::n())
-      ) %>% 
-      dplyr::select(
-        vpu, fema_id,
-        # state, 
-        geom = geometry
-      )
-    
-  }, error = function(e) {
-    message(VPU, " threw into the following error \n ", e)
-    message(" > Cleaning ", VPU, " using a backup cleaning strategy...")
-    
-    fema_vpu <- 
-      fema_vpu %>% 
-      sf::st_make_valid() %>% 
-      dplyr::mutate(      
-        vpu      = gsub("VPU_", "", VPU),
-        fema_id  = as.character(1:dplyr::n())
-      ) %>% 
-      dplyr::select(
-        vpu, fema_id,
-        # state, 
-        geom
-      )
-  
-    })
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', master_geojson_filepath, 
+                             ' -dissolve2 \\', 
+                             ' -simplify 0.5 visvalingam \\', 
+                             ' -snap \\',
+                             ' -explode \\',
+                             ' -o ', output_clean_geojson_path
+  )
   
-  if (OVERWRITE_FEMA_FILES) {
-    
-    message("> Overwritting '", basename(master_filepath), "' with final clean version...")
-    
-    # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
-    # message("> writting '", basename(union_file_path), "' (unioned and exploded version)")
+  system(mapshaper_command)
+  # message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
+  
+  STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
+  output_clean_gpkg_filename   <- gsub(".geojson", ".gpkg", master_geojson_name)
+  output_clean_gpkg_path       <- paste0(vpu_dir, "/", output_clean_gpkg_filename)
+  
+  # fema_vpu <- sf::read_sf(master_filepath)
+  geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON ", updated_filepath, " ", output_clean_geojson_path)
+  updated_gpkg_exists  <- file.exists(updated_filepath)
+  
+  if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) {
+    system(geojson_to_gpkg_cmd)
+    message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'")
     
-    sf::write_sf(
-      fema_vpu,
-      updated_filepath
-      # master_filepath
-      # union_file_path
+  }
+  
+  fema <- 
+    sf::read_sf(updated_filepath) %>% 
+    rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% 
+    dplyr::mutate(
+      vpu      = gsub("VPU_", "", VPU),
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(
+      fema_id, 
+      geom
     )
+  
+  sf::write_sf(
+    fema, 
+    updated_filepath
+  )
+  
+  message("Deleting intermediary files\n")
+  for (delete_file in STAGING_FILES_TO_DELETE) {
+    if (file.exists(delete_file)) {
+      message("Deleting >>> '", delete_file, "'")
+      file.remove(delete_file)
+    }
+    
   }
-  message()
 }
+# # -------------------------------------------------------------------------------------
+# # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
+# # -------------------------------------------------------------------------------------
+# 
+# for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+#   
+#   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
+#   VPU        <- basename(vpu_dir)
+#   
+#   message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
+#   
+#   # path to the merged directory where the final merged geopackage will end up
+#   master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+#   master_gpkg_name  <- paste0(master_name, ".gpkg")
+#   master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
+#   
+#   updated_gpkg_name  <- gsub(".gpkg", "_output.gpkg", master_gpkg_name)
+#   updated_filepath   <- paste0(vpu_dir, "/", updated_gpkg_name)
+#   
+#   message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
+#   
+#   if(!file.exists(master_filepath)) { 
+#     message("No FEMA geometries in '", VPU, "'")
+#     message()
+#     next
+#   }
+#   
+#   
+#   fema_vpu <- sf::read_sf(master_filepath)
+#   
+#   # fema_vpu %>% sf::st_geometry_type() %>% unique()
+#   # fema_vpu %>% mapview::npts()
+#   # fema_vpu %>% sf::st_is_valid() %>% all()
+#   # fema_vpu %>% 
+#   #   sf::st_make_valid() %>% 
+#   #   sf::st_geometry_type() %>% 
+#   #   unique()
+#   
+#   geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+#   
+#   message("Geometry counts before casting all geometries to MULTIPOLYGON:")
+#   for (g in seq_along(geom_type_counts)) {
+#     message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
+#   }
+#   
+#   # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
+#   #       mapview::mapview(fema_union, color = 'green', col.regions = 'white')
+#   
+#   # fema_vpu %>% 
+#   #   sf::st_make_valid() %>% 
+#   #   dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+#   #   sf::st_is_valid() %>% 
+#   #   all()
+#   
+#   tryCatch({
+#     
+#     fema_vpu <- 
+#       fema_vpu %>% 
+#       nngeo::st_remove_holes(max_area = 200) %>% 
+#       # sf::st_make_valid() %>%
+#       # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+#       add_predicate_group_id(sf::st_intersects) %>% 
+#       dplyr::group_by(group_id) %>%
+#       rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+#       rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+#       dplyr::ungroup() %>% 
+#       nngeo::st_remove_holes(max_area = 200) %>% 
+#       dplyr::mutate(      
+#         vpu      = gsub("VPU_", "", VPU),
+#         fema_id  = as.character(1:dplyr::n())
+#       ) %>% 
+#       dplyr::select(
+#         vpu, fema_id,
+#         # state, 
+#         geom = geometry
+#       )
+#     
+#   }, error = function(e) {
+#     message(VPU, " threw into the following error \n ", e)
+#     message(" > Cleaning ", VPU, " using a backup cleaning strategy...")
+#     
+#     fema_vpu <- 
+#       fema_vpu %>% 
+#       sf::st_make_valid() %>% 
+#       dplyr::mutate(      
+#         vpu      = gsub("VPU_", "", VPU),
+#         fema_id  = as.character(1:dplyr::n())
+#       ) %>% 
+#       dplyr::select(
+#         vpu, fema_id,
+#         # state, 
+#         geom
+#       )
+#   
+#     })
+#   
+#   if (OVERWRITE_FEMA_FILES) {
+#     
+#     message("> Overwritting '", basename(master_filepath), "' with final clean version...")
+#     
+#     # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
+#     # message("> writting '", basename(union_file_path), "' (unioned and exploded version)")
+#     
+#     sf::write_sf(
+#       fema_vpu,
+#       updated_filepath
+#       # master_filepath
+#       # union_file_path
+#     )
+#   }
+#   message()
+# }

From 8477da02f8524138f0a839c2abcfc89ca0c3055b Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 22 Jul 2024 10:58:59 -0700
Subject: [PATCH 50/64] changes to fema processing steps, using mapshaper clean
 arg and removed excess dissolve

---
 runners/cs_runner/01_transects.R          |  6 ++-
 runners/cs_runner/partition_fema_by_vpu.R | 62 ++++++++++++++++++-----
 2 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 7b38b23..31467ae 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -25,7 +25,8 @@ path_df <- align_files_by_vpu(
                 )
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
-for(i in 2:nrow(path_df)) {
+for(i in 1:nrow(path_df)) {
+  # i = 8
   
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
@@ -114,10 +115,11 @@ for(i in 2:nrow(path_df)) {
   # --- Extend transects out to FEMA 100yr floodplains
   # ---------------------------------------------------------------------
   message("Reading in FEMA polygons...") 
+  
   # fema polygons and transect lines
   fema <- sf::read_sf(vpu_fema_file)
   
-  # mapview::npts(fema)
+  mapview::npts(fema)
   message("Simplifying FEMA polygons...")
   
   # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R
index 5a2d35b..13ea3ba 100644
--- a/runners/cs_runner/partition_fema_by_vpu.R
+++ b/runners/cs_runner/partition_fema_by_vpu.R
@@ -36,6 +36,7 @@ library(nngeo)
 # Default is TRUE (i.e. a fresh processing run is done from start to finish)
 OVERWRITE_FEMA_FILES  <- TRUE
 DELETE_STAGING_GPKGS  <- TRUE
+Sys.setenv(OGR_GEOJSON_MAX_OBJ_SIZE=0)
 
 # -------------------------------------------------------------------------------------
 # ---- Create directories (if they do NOT exist) ----
@@ -406,7 +407,11 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
 # ----Apply simplify, dissolve, explode on the MERGED polygons  ---- 
 # -------------------------------------------------------------------------------------
 
-# list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]
+# # NOTE: remove past runs for testing...
+# for (i in list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]) {
+#   file.remove(i)
+# }
+
 for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   # i = 8
   STAGING_FILES_TO_DELETE <- c()
@@ -442,8 +447,10 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   
   # message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
   # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
-  
-  gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath)
+  gpkg_to_geojson_cmd <- paste0("ogr2ogr -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath)
+  # gpkg_to_geojson_cmd <- paste0("ogr2ogr -f GEOJSON -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath)
+  # gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath)
+  # file.remove(master_geojson_filepath)
   
   if (OVERWRITE_FEMA_FILES || !geojson_exists) {
     system(gpkg_to_geojson_cmd)
@@ -452,6 +459,9 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
     STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath)
   }
   
+  # master_gj <- sf::read_sf(master_geojson_filepath)
+  # master_gpkg <- sf::read_sf(master_filepath)
+
   # Clean GeoJSON
   message("Simplify, dissolve, explode > '", master_geojson_name, "'")
   output_clean_filename      <- gsub(".geojson", "_clean.geojson", master_geojson_name)
@@ -460,12 +470,19 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   clean_geojson_exists  <- file.exists(output_clean_geojson_path)
   message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
   message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
-  
-  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', master_geojson_filepath, 
-                             ' -dissolve2 \\', 
-                             ' -simplify 0.5 visvalingam \\', 
+
+  # file.remove(output_clean_geojson_path)
+  
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ',
+                             master_geojson_filepath, 
+                             # ' -clean \\', 
+                             # ' -explode \\',
+                             # ' -dissolve2 \\',
+                             ' -simplify 0.3 visvalingam \\', 
                              ' -snap \\',
                              ' -explode \\',
+                             ' -clean \\',
+                             # ' -proj EPSG:5070 \\',
                              ' -o ', output_clean_geojson_path
   )
   
@@ -477,30 +494,49 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   output_clean_gpkg_path       <- paste0(vpu_dir, "/", output_clean_gpkg_filename)
   
   # fema_vpu <- sf::read_sf(master_filepath)
-  geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON ", updated_filepath, " ", output_clean_geojson_path)
+  # geojson_to_gpkg_cmd <- paste0("ogr2ogr -f GPKG ", updated_filepath, " ", output_clean_geojson_path)
+  geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070  ", updated_filepath, " ", output_clean_geojson_path)
+  # geojson_to_gpkg_cmd <- paste0("ogr2ogr ", updated_filepath, " ", output_clean_geojson_path)
+
   updated_gpkg_exists  <- file.exists(updated_filepath)
+  # updated_gpkg_exists
+  # file.remove(updated_filepath)
   
   if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) {
     system(geojson_to_gpkg_cmd)
     message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'")
-    
   }
+
+  # sf::st_layers(updated_filepath)
   
-  fema <- 
-    sf::read_sf(updated_filepath) %>% 
-    rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% 
+  # mapview::npts(fema)
+  
+  fema  <- 
+    sf::read_sf(updated_filepath) %>%
+    # sf::read_sf(output_clean_geojson_path) %>%
+    # rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% 
     dplyr::mutate(
       vpu      = gsub("VPU_", "", VPU),
       fema_id = as.character(1:dplyr::n())
     ) %>% 
     dplyr::select(
+      vpu,
       fema_id, 
       geom
     )
   
+  # fema %>% 
+  #   rmapshaper::ms_simplify(keep = 0.5, keep_shapes = T) %>% 
+  #   dplyr::group_by(fema_id) %>% 
+  #   dplyr::mutate(pts = mapview::npts(geom)) %>% 
+  #   dplyr::arrange(-pts) 
+  
+  file.remove(updated_filepath)
+  
   sf::write_sf(
     fema, 
-    updated_filepath
+    updated_filepath, 
+    append = FALSE
   )
   
   message("Deleting intermediary files\n")

From 550c85c66b07ab99b69f7c3d2fa2b4d3127b1b37 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 26 Jul 2024 12:42:41 -0700
Subject: [PATCH 51/64] testing out new fema simplification amounts...

---
 runners/cs_runner/01_transects.R | 58 +++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 31467ae..3bf707a 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -25,9 +25,9 @@ path_df <- align_files_by_vpu(
                 )
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
-for(i in 1:nrow(path_df)) {
-  # i = 8
-  
+for(i in 4:nrow(path_df)) {
+  # i = 2
+  # i = 4
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -103,7 +103,6 @@ for(i in 1:nrow(path_df)) {
   out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg")
   out_path <- paste0(transects_dir, out_file)
   
-  
   # add cs_source column and rename cs_widths to cs_lengthm
   transects <- 
     transects %>%
@@ -119,15 +118,51 @@ for(i in 1:nrow(path_df)) {
   # fema polygons and transect lines
   fema <- sf::read_sf(vpu_fema_file)
   
-  mapview::npts(fema)
+  # mapview::npts(fema)
+  
+  # mapview::npts(fema)
   message("Simplifying FEMA polygons...")
+  message(" - Number of points BEFORE simplifying: ", mapview::npts(fema))
+  message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
   
   # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
   # keep 1% of the original points for speed
-  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.02, sys = TRUE, sys_mem = 16)
+  
+  message(" - Number of points AFTER simplifying: ", mapview::npts(fema))
+  message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+  # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, explode = TRUE, keep = 0.05, snap = F, sys = TRUE, sys_mem = 16)
+  # fema_nokeep <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.05, sys = TRUE, sys_mem = 16)
+   # mapview::npts(fema, by_feature = F)
+ #  mapview::npts(fema_keep, by_feature = F)
+ #  mapview::npts(fema_keep, by_feature = T)
+ # 
+ #  mapview::npts(fema_nokeep, by_feature = F)
+ #  mapview::npts(fema_nokeep, by_feature = T)
+ # 
+ # fema_keep <- 
+ #    fema_keep %>% 
+ #    dplyr::mutate(nid = 1:dplyr::n()) %>% 
+ #    dplyr::group_by(nid) %>% 
+ #    dplyr::mutate(npts = mapview::npts(geom)) %>% 
+ #    dplyr::arrange(-npts)
+ #  
+ #  fema_nokeep <- 
+ #    fema_nokeep %>% 
+ #    dplyr::mutate(nid = 1:dplyr::n()) %>% 
+ #    dplyr::group_by(nid) %>% 
+ #    dplyr::mutate(npts = mapview::npts(geom)) %>% 
+ #    dplyr::arrange(-npts)
+  
   # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.1, sys = TRUE, sys_mem = 16)
-  # mapview::npts(fema)
-
+  
+  # mapview::npts(fema_nokeep, by_feature = T)
+  
+  # fema <-
+  #   fema %>% 
+  #   sf::st_make_valid()
+  # fema %>% sf::st_is_valid() %>% all()
+  
   message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
   
   transects <- 
@@ -140,6 +175,13 @@ for(i in 1:nrow(path_df)) {
       by = "hy_id" 
     )
   
+  # TODO: TESTING DATA
+  # sf::write_sf(transects2, "/Users/anguswatters/Desktop/test_transects_02.gpkg")
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_flines_02.gpkg")
+  # fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_02/fema_vpu_02_output.gpkg")
+  # transects3 <- sf::read_sf("/Users/anguswatters/Desktop/test_transects_02.gpkg")
+  # flines2 <- sf::read_sf("/Users/anguswatters/Desktop/test_flines_02.gpkg")
+ 
   # system.time({
     
   # TODO: make sure this 3000m extension distance is appropriate across VPUs 

From b99bbd9deead721cba025f76ed5f91fa6b08a04c Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 31 Jul 2024 15:12:17 -0700
Subject: [PATCH 52/64] small cleanups

---
 runners/cs_runner/01_transects.R | 62 +++++---------------------------
 1 file changed, 9 insertions(+), 53 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 3bf707a..804e2dc 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -25,9 +25,8 @@ path_df <- align_files_by_vpu(
                 )
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
-for(i in 4:nrow(path_df)) {
-  # i = 2
-  # i = 4
+for(i in 1:nrow(path_df)) {
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(nextgen_dir, nextgen_file)
@@ -49,6 +48,10 @@ for(i in 4:nrow(path_df)) {
           )
   
   # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  # sf::write_sf(
+  #   dplyr::slice(dplyr::filter(flines, order == 2), 2), 
+  #   "/Users/anguswatters/Desktop/example_flowline.gpkg"
+  #   )
   
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
@@ -118,51 +121,15 @@ for(i in 4:nrow(path_df)) {
   # fema polygons and transect lines
   fema <- sf::read_sf(vpu_fema_file)
   
-  # mapview::npts(fema)
-  
-  # mapview::npts(fema)
   message("Simplifying FEMA polygons...")
-  message(" - Number of points BEFORE simplifying: ", mapview::npts(fema))
   message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
   
   # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
   # keep 1% of the original points for speed
-  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.02, sys = TRUE, sys_mem = 16)
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
   
-  message(" - Number of points AFTER simplifying: ", mapview::npts(fema))
   message(" - Number of geoms AFTER simplifying: ", nrow(fema))
-  # fema_keep <- rmapshaper::ms_simplify(fema, keep_shapes = T, explode = TRUE, keep = 0.05, snap = F, sys = TRUE, sys_mem = 16)
-  # fema_nokeep <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.05, sys = TRUE, sys_mem = 16)
-   # mapview::npts(fema, by_feature = F)
- #  mapview::npts(fema_keep, by_feature = F)
- #  mapview::npts(fema_keep, by_feature = T)
- # 
- #  mapview::npts(fema_nokeep, by_feature = F)
- #  mapview::npts(fema_nokeep, by_feature = T)
- # 
- # fema_keep <- 
- #    fema_keep %>% 
- #    dplyr::mutate(nid = 1:dplyr::n()) %>% 
- #    dplyr::group_by(nid) %>% 
- #    dplyr::mutate(npts = mapview::npts(geom)) %>% 
- #    dplyr::arrange(-npts)
- #  
- #  fema_nokeep <- 
- #    fema_nokeep %>% 
- #    dplyr::mutate(nid = 1:dplyr::n()) %>% 
- #    dplyr::group_by(nid) %>% 
- #    dplyr::mutate(npts = mapview::npts(geom)) %>% 
- #    dplyr::arrange(-npts)
-  
-  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = F, keep = 0.1, sys = TRUE, sys_mem = 16)
-  
-  # mapview::npts(fema_nokeep, by_feature = T)
-  
-  # fema <-
-  #   fema %>% 
-  #   sf::st_make_valid()
-  # fema %>% sf::st_is_valid() %>% all()
-  
   message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
   
   transects <- 
@@ -175,15 +142,6 @@ for(i in 4:nrow(path_df)) {
       by = "hy_id" 
     )
   
-  # TODO: TESTING DATA
-  # sf::write_sf(transects2, "/Users/anguswatters/Desktop/test_transects_02.gpkg")
-  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_flines_02.gpkg")
-  # fema <- sf::read_sf("/Users/anguswatters/Desktop/lynker-spatial/FEMA_BY_VPU/VPU_02/fema_vpu_02_output.gpkg")
-  # transects3 <- sf::read_sf("/Users/anguswatters/Desktop/test_transects_02.gpkg")
-  # flines2 <- sf::read_sf("/Users/anguswatters/Desktop/test_flines_02.gpkg")
- 
-  # system.time({
-    
   # TODO: make sure this 3000m extension distance is appropriate across VPUs 
   # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
   transects <- hydrofabric3D::extend_transects_to_polygons(
@@ -191,12 +149,10 @@ for(i in 4:nrow(path_df)) {
     polygons               = fema, 
     flowlines              = flines, 
     crosswalk_id           = "hy_id",
-    intersect_group_id     = "mainstem", 
+    grouping_id     = "mainstem", 
     max_extension_distance = 3000 
   )
   
-  # })
-  
   message("FEMA extensions complete! - ( ", Sys.time(), " )")
   
   transects  <- dplyr::select(transects, -tmp_id)

From 92fad96457d04d56d2fa0d760bc14ea023ad3679 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 2 Aug 2024 09:04:47 -0700
Subject: [PATCH 53/64] huge overall of variable declarations and path variable
 creations for cs_runners/, split download_nextgen.R into seperate scripts for
 downloading nextgen, ref_features, and ML outputs, moved util functions to a
 utils.R script, updated 04_driver.R script, still more cleanups needed

---
 runners/cs_runner/01_transects.R          |  26 ++--
 runners/cs_runner/02_cs_pts.R             |  50 +++---
 runners/cs_runner/03_inject_ml.R          |  27 ++--
 runners/cs_runner/04_driver.R             |  12 +-
 runners/cs_runner/config.R                | 108 ++++++++-----
 runners/cs_runner/config_vars.R           | 158 ++++++++++++++-----
 runners/cs_runner/download_fema100.R      |   8 +-
 runners/cs_runner/download_ml_outputs.R   |  16 ++
 runners/cs_runner/download_nextgen.R      | 178 +++++++---------------
 runners/cs_runner/download_ref_features.R |  35 +++++
 runners/cs_runner/partition_fema_by_vpu.R |   4 +-
 runners/cs_runner/set_path_variables.R    |  10 ++
 runners/cs_runner/utils.R                 | 149 ++++++++++++++++++
 13 files changed, 521 insertions(+), 260 deletions(-)
 create mode 100644 runners/cs_runner/download_ml_outputs.R
 create mode 100644 runners/cs_runner/download_ref_features.R
 create mode 100644 runners/cs_runner/set_path_variables.R

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 804e2dc..2c2ce2f 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -1,5 +1,6 @@
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
 
 # # # # load libraries
 # library(hydrofabric3D)
@@ -8,20 +9,17 @@ source("runners/cs_runner/config.R")
 # install.packages("devtools")
 
 # transect bucket prefix
-transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
-nextgen_files    <- list.files(nextgen_dir, full.names = FALSE)
-model_attr_files <- list.files(model_attr_dir, full.names = FALSE)
-
-# string to fill in "cs_source" column in output datasets
-net_source <- "hydrofabric3D"
+NEXTGEN_FILES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
+# model_attr_files <- list.files(MODEL_ATTR_DIR, full.names = FALSE)
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-                x    = nextgen_files,
-                y    = model_attr_files,
-                base = base_dir
+                x    = NEXTGEN_FILES,
+                y    = NEXTGEN_FILES,
+                base = BASE_DIR
                 )
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
@@ -29,7 +27,7 @@ for(i in 1:nrow(path_df)) {
   
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
   
   vpu <- path_df$vpu[i]
 
@@ -104,13 +102,13 @@ for(i in 1:nrow(path_df)) {
   
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg")
-  out_path <- paste0(transects_dir, out_file)
+  out_path <- paste0(TRANSECTS_DIR, out_file)
   
   # add cs_source column and rename cs_widths to cs_lengthm
   transects <- 
     transects %>%
     dplyr::mutate(
-      cs_source = net_source
+      cs_source = CS_SOURCE
     )
   
   # ---------------------------------------------------------------------
@@ -196,8 +194,8 @@ for(i in 1:nrow(path_df)) {
     )
   
   # command to copy transects geopackage to S3
-  copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
-                   ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
+  copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_TRANSECTS_DIR, out_file, 
+                   ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))
                    )
   
   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 4b18b93..d28e43e 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -11,33 +11,32 @@ library(hydrofabric3D)
 library(dplyr)
 library(sf)
 
-# cross section bucket prefix
-cs_pts_prefix <- paste0(s3_bucket, version_prefix, "/3D/dem-cross-sections/")
-
-# transect bucket prefix
-transects_prefix <- paste0(s3_bucket, version_prefix, "/3D/transects/")
+# # cross section bucket prefix
+# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/")
+# 
+# # transect bucket prefix
+# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
 
 # paths to nextgen datasets
-nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
+NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE)
 
 # paths to nextgen datasets
-transect_files <- list.files(transects_dir, full.names = FALSE)
+transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
 transect_files <- transect_files[!grepl("updated_", transect_files)]
 
-# string to fill in "cs_source" column in output datasets
-cs_source <- "hydrofabric3D"
+REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
 
 # reference features dataframe
 ref_df <- data.frame(
-  vpu      = sapply(strsplit(ref_features, "_", fixed = TRUE), function(i) { i[1] }), 
-  ref_file = ref_features
+  vpu      = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = REF_FEATURES
 )
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-  x    = nextgen_files,
+  x    = NEXTGEN_FILES,
   y    = transect_files,
-  base = base_dir
+  base = BASE_DIR
 ) %>% 
   dplyr::left_join(
     ref_df,
@@ -48,21 +47,21 @@ path_df <- align_files_by_vpu(
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
-
+  # i = 8
   
   start <- Sys.time()
   
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
   
   # model attributes file and full path
   transect_file <- path_df$y[i]
-  transect_path <- paste0(transects_dir, transect_file)
+  transect_path <- paste0(TRANSECTS_DIR, transect_file)
   
   # model attributes file and full path
   ref_file <- path_df$ref_file[i]
-  ref_path <- paste0(ref_features_dir, "gpkg/", ref_file)
+  ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
   
   # current VPU being processed
   VPU = path_df$vpu[i]
@@ -76,6 +75,7 @@ for (i in 1:nrow(path_df)) {
   
   ################### 
   
+  
   # read in transects data
   transects <- sf::read_sf(transect_path)
   
@@ -339,7 +339,7 @@ for (i in 1:nrow(path_df)) {
   fixed_pts <-
     fixed_pts %>%
     dplyr::mutate(
-      Z_source = cs_source
+      Z_source = CS_SOURCE
     ) %>%
     dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
                     class, point_type, 
@@ -399,8 +399,8 @@ for (i in 1:nrow(path_df)) {
   )
   
   # command to copy transects geopackage to S3
-  trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", transects_prefix, transect_file, 
-                        ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+  trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", S3_TRANSECTS_DIR, transect_file, 
+                        ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)))
   
   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
           trans_to_s3, 
@@ -416,7 +416,7 @@ for (i in 1:nrow(path_df)) {
   
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
-  out_path <- paste0(cs_pts_dir, out_file)
+  out_path <- paste0(CS_PTS_DIR, out_file)
   
   message("Saving cross section points to:\n - filepath: '", out_path, "'")
   
@@ -424,12 +424,12 @@ for (i in 1:nrow(path_df)) {
   arrow::write_parquet(fixed_pts, out_path)
   
   # command to copy cross section points parquet to S3
-  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                              ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
+  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file,
+                              ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)))
   
   message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
-          paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
+          paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file,
+                 ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))), 
           "'\n==========================")
   
   system(copy_cs_pts_to_s3, intern = TRUE)
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index c607e9f..0321003 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -8,24 +8,25 @@ library(patchwork)
 
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
 
 # cross section bucket prefix
-CS_ML_PTS_S3_PREFIX <- paste0(s3_bucket, version_prefix, "/3D/cross-sections/")
-# cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
+S3_CS_ML_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/")
+# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, "v20/3D/dem-cross-sections/")
 
 ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE)
 
 # paths to nextgen datasets
-nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
+NEXTGEN_FILES   <- list.files(NEXTGEN_DIR, full.names = FALSE)
 
 # paths to nextgen datasets
-cs_files <- list.files(cs_pts_dir, full.names = FALSE)
+cs_files        <- list.files(CS_PTS_DIR, full.names = FALSE)
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-  x    = nextgen_files,
+  x    = NEXTGEN_FILES,
   y    = cs_files,
-  base = base_dir
+  base = BASE_DIR
 ) 
 # dplyr::left_join(
 #   ref_df,
@@ -44,11 +45,11 @@ for (i in 1:nrow(path_df)) {
   
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
   
   # model attributes file and full path
   cs_file      <- path_df$y[i]
-  cs_pts_path  <- paste0(cs_pts_dir, cs_file)
+  cs_pts_path  <- paste0(CS_PTS_DIR, cs_file)
   
   # current VPU being processed
   VPU = path_df$vpu[i]
@@ -56,8 +57,8 @@ for (i in 1:nrow(path_df)) {
   message("Augmenting DEM cross sections with ML estimated widths/depths: ", VPU, 
           " cross section points:",
           "'\n - cross section points: '", cs_file, "'", 
-          "'\n - ML estimated widths/depths: '", ML_OUTPUTS_FILE, "'", 
-          # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_URI, "'", 
+          "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_FILE, "'", 
+          # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_URI, "'", 
           "\n - CONUS network file: '", CONUS_NETWORK_URI, "'",
           "\n - flowpaths: '", nextgen_file,
           # "\n - waterbodies: '", ref_file, "'",
@@ -326,21 +327,21 @@ for (i in 1:nrow(path_df)) {
 
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
-  out_path <- paste0(final_dir, out_file)
+  out_path <- paste0(CS_OUTPUT_DIR, out_file)
   
   message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", out_path, "'")
   
   # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
   arrow::write_parquet(final_cs, out_path)
   
-  s3_save_uri <- paste0(CS_ML_PTS_S3_PREFIX, out_file)
+  s3_save_uri <- paste0(S3_CS_ML_PTS_DIR, out_file)
   
   # command to copy cross section points parquet to S3
   copy_cs_pts_to_s3 <- paste0("aws s3 cp ", 
                               out_path,
                               " ", 
                               s3_save_uri,
-                              ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))
+                              ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))
                               )
   
   message(
diff --git a/runners/cs_runner/04_driver.R b/runners/cs_runner/04_driver.R
index a19a7c4..212e1ac 100644
--- a/runners/cs_runner/04_driver.R
+++ b/runners/cs_runner/04_driver.R
@@ -3,10 +3,14 @@
 # downloads nextgen datasets 
 source("runners/cs_runner/config.R")
 
-# downloads nextgen datasets 
+# downloads datasets
+# - Nextgen data
+# - Reference features (for waterbody filtering)
+# - ML outputs
+# - FEMA 100 year floodplain polygons (FGBs)
 source("runners/cs_runner/download_nextgen.R")
-
-# download FEMA100 year FGBs
+source("runners/cs_runner/download_ref_features.R")
+source("runners/cs_runner/download_ml_outputs.R")
 source("runners/cs_runner/download_fema100.R")
 
 # simplify, dissolve, FEMA polygons and partition FEMA polygons by VPU 
@@ -19,4 +23,4 @@ source("runners/cs_runner/01_transects.R")
 source("runners/cs_runner/02_cs_pts.R")
 
 # Apply machine learning topwidths and depths estimates to DEM cross section points
-source("runners/cs_runner/02_cs_pts.R")
+source("runners/cs_runner/03_inject_ml.R")
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index 8d22554..a839f61 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -10,69 +10,99 @@ pacman::p_load(
 
 # load root directory 
 source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner/utils.R")
 
 sf::sf_use_s2(FALSE)
 
-### Cross section point 
+### Cross section point
 
-### S3 names
+# # -------------------------------------------------------------------------------------
+# # ----- S3 names ------
+# # -------------------------------------------------------------------------------------
 
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
+# # AWS S3 bucket URI 
+# S3_BUCKET_URI <- "s3://lynker-spatial/"
 
-# name of bucket with nextgen data
-nextgen_bucket <- "lynker-spatial"
+# # name of bucket with nextgen data
+# S3_BUCKET_NAME <- "lynker-spatial"
 
-# nextgen bucket folder name
-nextgen_bucket_folder <- "v20.1/gpkg/"
+# # the name of the folder in the S3 bucket with the nextgen data
+# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
 
-# nextgen bucket name
-nextgen_prefix  <- paste0(s3_bucket, nextgen_bucket_folder)
+# # full URI to the S3 bucket folder with the nextgen data 
+# S3_BUCKET_NEXTGEN_DIR_URI  <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR)
 
-# reference features S3 bucket prefix
-ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/"
+# # reference features S3 bucket prefix
+# S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/"
 
-# S3 prefix/folder of version run
-version_prefix <- "v20.1"
-# version_prefix <- "v20"
+# # S3 prefix/folder of version run
+# VERSION <- "v20.1"
 
-### LOCAL DIRS
+# # -------------------------------------------------------------------------------------
 
-# directory to copy nextgen bucket data too
-nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder)
-# nextgen_dir <- paste0(base_dir, "/pre-release/")
+# # -------------------------------------------------------------------------------------
+# # ----- Local directories ------
+# # -------------------------------------------------------------------------------------
 
-# model attributes directory
-model_attr_dir <- paste0(base_dir, "/model_attributes/")
+# ### LOCAL DIRS
 
-# cross-section data model data directories
-transects_dir <- paste0(base_dir, "/01_transects/")
-cs_pts_dir <- paste0(base_dir, "/02_cs_pts/")
+# # directory to copy nextgen bucket data too
+# NEXTGEN_DIR      <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
+# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/")
 
-# final output directory with geopackages per VPU
-final_dir <- paste0(base_dir, "/cross_sections/")
+# # model attributes directory
+# MODEL_ATTR_DIR   <- paste0(BASE_DIR, "/model_attributes/")
 
-# directory to copy nextgen bucket data too
-ref_features_dir <- paste0(base_dir, "/00_reference_features/")
+# # cross-section data model data directories
+# TRANSECTS_DIR    <- paste0(BASE_DIR, "/01_transects/")
+# CS_PTS_DIR       <- paste0(BASE_DIR, "/02_cs_pts/")
 
-# make a directory for the ML outputs data
-ML_OUTPUTS_DIR <- paste0(base_dir, "/ml-outputs/")
+# # final output directory with geopackages per VPU
+# CS_OUTPUT_DIR    <- paste0(BASE_DIR, "/cross_sections/")
 
+# # directory to copy nextgen bucket data too
+# REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/")
+
+# # make a directory for the ML outputs data
+# ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
+
+# -------------------------------------------------------------------------------------
+# ----- Create local directories ------
+# -------------------------------------------------------------------------------------
 # create directories 
-dir.create(transects_dir, showWarnings = FALSE)
-dir.create(cs_pts_dir,    showWarnings = FALSE)
-dir.create(ref_features_dir,     showWarnings = FALSE)
-dir.create(paste0(ref_features_dir, "gpkg/"),     showWarnings = FALSE)
-dir.create(final_dir,     showWarnings = FALSE)
+dir.create(TRANSECTS_DIR, showWarnings = FALSE)
+dir.create(CS_PTS_DIR,    showWarnings = FALSE)
+dir.create(REF_FEATURES_DIR,     showWarnings = FALSE)
+dir.create(paste0(REF_FEATURES_DIR, "gpkg/"),     showWarnings = FALSE)
+dir.create(CS_OUTPUT_DIR,     showWarnings = FALSE)
 dir.create(ML_OUTPUTS_DIR,     showWarnings = FALSE)
-# dir.create(model_attr_dir,  showWarnings = FALSE)
+
+# create the directory if it does NOT exist
+if(!dir.exists(NEXTGEN_DIR)) {
+  message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'")
+  
+  dir.create(NEXTGEN_DIR)
+}
+
+# # create the directory if it does NOT exist
+# if(!dir.exists(MODEL_ATTR_DIR)) {
+#   message("Directory does not exist at: \n\t'", MODEL_ATTR_DIR, "'\nCreating directory at: \n\t'", MODEL_ATTR_DIR, "'")
+#   dir.create(MODEL_ATTR_DIR)
+# }
+# dir.create(MODEL_ATTR_DIR,  showWarnings = FALSE)
+
+# -------------------------------------------------------------------------------------
+
+# -------------------------------------------------------------------------------------
+# ----- Get the paths / locations of reference_features data ------
+# -------------------------------------------------------------------------------------
 
 ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
 
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 list_ref_features <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
-            S3_BUCKET="', ref_features_prefix , '"
+            S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '"
             
             # Regular expression pattern to match object keys
             PATTERN="reference_features.gpkg"
@@ -88,8 +118,8 @@ list_ref_features <- paste0('#!/bin/bash
 ref_features <- system(list_ref_features, intern = TRUE)
 
 # ref features datasets
-ref_features_keys <- paste0(ref_features_prefix, ref_features)
-ref_features_files <- paste0(ref_features_dir, "gpkg/", ref_features)
+ref_features_keys  <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features)
+ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features)
 
 ###
 ### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ###
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 492611f..5c4a713 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -1,47 +1,142 @@
-### EDIT base_dir, aws_profile, and DEM_URL ###
+### EDIT BASE_DIR, AWS_PROFILE, and DEM_URL ###
 
-# ----------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------
 # ---- General paths and constants variables ----
-# ----------------------------------------------------------------------------
-base_dir     <- '/Users/anguswatters/Desktop/lynker-spatial'
+# - edit to match your local environment
+# - BASE_DIR: base directory for local file storage
+# - AWS_PROFILE: AWS profile to run CLI commands
+# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for
+# ---------------------------------------------------------------------------------
 
-# AWS profile to run CLI commands 
-aws_profile  <- "angus-lynker"
+# Base directory for local file storage
+BASE_DIR           <- '/Users/anguswatters/Desktop/lynker-spatial'
 
-# name of S3 bucket
-s3_bucket    <- "s3://lynker-spatial/"
+# AWS profile to run CLI commands 
+AWS_PROFILE        <- "angus-lynker"
 
 # S3 prefix/folder of version run
-version_prefix <- "v20.1"
+VERSION            <- "v20.1"
+
+# string to fill in "CS_SOURCE" column in output datasets
+CS_SOURCE <- "hydrofabric3D"
+
+# name of bucket with nextgen data
+S3_BUCKET_NAME     <- "lynker-spatial"
+S3_BUCKET_SUBDIR   <- "hydrofabric"
+
+# AWS S3 bucket URI 
+S3_BUCKET_BASE_URI <- paste0("s3://", S3_BUCKET_NAME, "/")
+S3_BUCKET_URI      <- paste0(S3_BUCKET_BASE_URI, S3_BUCKET_SUBDIR, "/")
+# S3_BUCKET_URI      <- "s3://lynker-spatial/"
+
+# -------------------------------------------------------------------------------------
+# ---- S3 output directories -----
+# - transects 
+# - cross section points
+# - ML cross section points
+# -------------------------------------------------------------------------------------
+
+# transect bucket prefix
+S3_TRANSECTS_DIR   <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+
+# cross section bucket prefix
+S3_CS_PTS_DIR      <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/")
+
+# cross section bucket prefix
+S3_CS_ML_PTS_DIR   <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/")
+
+# -------------------------------------------------------------------------------------
+# ---- S3 nextgen data paths / directories -----
+# -------------------------------------------------------------------------------------
+
+# the name of the folder in the S3 bucket with the nextgen data
+S3_BUCKET_NEXTGEN_DIR       <- paste0(VERSION, "/gpkg/")
+# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
+
+# full URI to the S3 bucket folder with the nextgen data 
+S3_BUCKET_NEXTGEN_DIR_URI   <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR)
+
+# reference features S3 bucket prefix
+S3_BUCKET_REF_FEATURES_URI  <- paste0("s3://", S3_BUCKET_NAME, "/00_reference_features/gpkg/")
+# S3_BUCKET_REF_FEATURES_URI  <- "s3://lynker-spatial/00_reference_features/gpkg/"
+
+# ----------------------------------------------------------------------------
+# ---- Machine learning data path variables ----
+# ----------------------------------------------------------------------------
+
+ML_OUTPUTS_S3_FILE   <- "channel_ml_outputs.parquet"
+
+# ML_OUTPUTS_S3_DIR    <- paste0(VERSION, "/3D/ml-outputs/")
+# ML_OUTPUTS_S3_DIR  <- "v20.1/3D/ml-outputs/"
+
+ML_OUTPUTS_S3_URI      <- paste0(S3_BUCKET_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# ML_OUTPUTS_S3_URI    <- paste0(S3_BUCKET_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE)
+
+ML_OUTPUTS_PATH      <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE)
+
+# path to the remote CONUS net parquet file
+CONUS_NETWORK_FILE   <- "conus_net.parquet"
+CONUS_NETWORK_URI    <- paste0(S3_BUCKET_URI, VERSION, "/", CONUS_NETWORK_FILE)
+
+# ----------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+# ---- Local directory / path variables ----
+# -------------------------------------------------------------------------------------
+
+# directory to copy nextgen bucket data too
+NEXTGEN_DIR      <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
+# NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/")
+
+# # model attributes directory
+# MODEL_ATTR_DIR   <- paste0(BASE_DIR, "/model_attributes/")
+
+# cross-section data model data directories
+TRANSECTS_DIR    <- paste0(BASE_DIR, "/01_transects/")
+CS_PTS_DIR       <- paste0(BASE_DIR, "/02_cs_pts/")
+
+# final output directory with geopackages per VPU
+CS_OUTPUT_DIR    <- paste0(BASE_DIR, "/cross_sections/")
+
+# directory to copy nextgen bucket data too
+REF_FEATURES_DIR      <- paste0(BASE_DIR, "/00_reference_features/")
+REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/")
+
+# make a directory for the ML outputs data
+ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
+
+# -------------------------------------------------------------------------------------
+# ---- Create local directory / path variables (FEMA data) ----
+# -------------------------------------------------------------------------------------
 
 # location of FEMA 100 year flood plain FGB files
-FEMA_S3_BUCKET <- "s3://lynker-hydrofabric/"
-FEMA_S3_BUCKET_PREFIX <- "FEMA100/"
-FEMA_S3_DIR <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX)
+FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
+FEMA_S3_BUCKET_PREFIX  <- "FEMA100/"
+FEMA_S3_DIR            <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX)
 
 # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
-FEMA_FGB_PATH <- paste0(base_dir, "/FEMA100")
-FEMA_GEOJSON_PATH    <- paste0(base_dir, "/FEMA100_geojson")
-FEMA_CLEAN_PATH      <- paste0(base_dir, "/FEMA100_clean")
-FEMA_GPKG_PATH       <- paste0(base_dir, "/FEMA100_gpkg")
-FEMA_GPKG_BB_PATH    <- paste0(base_dir, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
+FEMA_FGB_PATH        <- paste0(BASE_DIR, "/FEMA100")
+FEMA_GEOJSON_PATH    <- paste0(BASE_DIR, "/FEMA100_geojson")
+FEMA_CLEAN_PATH      <- paste0(BASE_DIR, "/FEMA100_clean")
+FEMA_GPKG_PATH       <- paste0(BASE_DIR, "/FEMA100_gpkg")
+FEMA_GPKG_BB_PATH    <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
 
-FEMA_BY_VPU_PATH     <- paste0(base_dir, "/FEMA_BY_VPU")
+FEMA_BY_VPU_PATH     <- paste0(BASE_DIR, "/FEMA_BY_VPU")
 VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
 
-FEMA_VPU_SUBFOLDERS <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
 # FEMA_VPU_SUBFOLDERS <- paste0(
 #                         FEMA_BY_VPU_PATH, "/VPU_",
 #                         unlist(
-#                           lapply(list.files(nextgen_dir, full.names = FALSE), function(vpu_file_names) {
+#                           lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) {
 #                             unlist(regmatches(vpu_file_names,  gregexpr("\\d+[A-Za-z]*", vpu_file_names)))})
 #                           )
 #                         )
 
 # -------------------------------------------------------------------------------------
-# ---- OVERWRITE_FEMA_FILES constant logical ----
+# ---- OVERWRITE_FEMA_FILES constant logicals----
 # ---- > if TRUE, processing steps will be run again 
 #          and overwrite existing previously processed files
+# TODO: Describe these variables
 # -------------------------------------------------------------------------------------
 
 # Default is TRUE (i.e. a fresh processing run is done from start to finish)
@@ -53,12 +148,12 @@ DELETE_STAGING_GPKGS  <- TRUE # remove intermediary files from the main output f
 # ----------------------------------------------------------------------------
 
 # DEM URL
-DEM_URL      <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+DEM_URL        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
 
 # scale argument for cross_section_pts() function. 
 # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
 # Default setting is 50% of the original transect lines length (0.5)
-EXTENSION_PCT <- 0.5
+EXTENSION_PCT  <- 0.5
 
 # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
 # 1% of the cross sections length is the default value we are using 
@@ -72,22 +167,7 @@ COLLECT_META <- TRUE
 # Where should meta data CSVs be saved to? 
 # Local path to save CSVs of cross section meta data during each iteration
 # TODO: Probably delete this 
-META_PATH <- '/Users/anguswatters/Desktop/cs_meta/'
+META_PATH    <- '/Users/anguswatters/Desktop/cs_meta/'
 # META_PATH <-  "/local/path/to/save/cross_section_meta_data/"
 
-# ----------------------------------------------------------------------------
-# ---- Machine learning data path variables ----
-# ----------------------------------------------------------------------------
-
-ML_OUTPUTS_FILE   = "channel_ml_outputs.parquet"
-ML_OUTPUTS_PREFIX = "v20.1/3D/ml-outputs/"
-ML_OUTPUTS_URI    = paste0(s3_bucket, ML_OUTPUTS_PREFIX, ML_OUTPUTS_FILE)
-# ML_OUTPUTS_URI    = "s3://lynker-spatial/v20.1/3D/ml-outputs/channel_ml_outputs.parquet"
-
-ML_OUTPUTS_PATH <- paste0(base_dir, "/ml-outputs/", ML_OUTPUTS_FILE)
-
-# path to the remote CONUS net parquet file
-CONUS_NETWORK_FILENAME <- "conus_net.parquet"
-CONUS_NETWORK_URI      <- paste0(s3_bucket, version_prefix, "/", CONUS_NETWORK_FILENAME)
 
-### EDIT ###
diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index 0606142..9fa2f8a 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -1,5 +1,5 @@
-# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "base_dir"
-# base_dir is defined within runners/workflow/root_dir.R
+# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
 
 # NOTE: The lynker-hydrofabric S3 bucket is private at the moment
 
@@ -75,7 +75,7 @@ fema_list_command <- paste0('#!/bin/bash
             PATTERN=".fgb$"
             
             # AWS CLI command to list objects in the S3 bucket and use grep to filter them
-            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', aws_profile, ' | awk \'{print $4}\' | grep -E "$PATTERN")
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN")
             
             echo "$S3_OBJECTS"'
 )
@@ -99,7 +99,7 @@ for (key in FEMA_BUCKET_KEYS) {
   local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
   
   if(!file.exists(local_save_path)) {
-    copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", aws_profile)
+    copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", AWS_PROFILE)
     
     message("S3 object:\n > '", FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, "'")
     message("Downloading S3 object to:\n > '", local_save_path, "'")
diff --git a/runners/cs_runner/download_ml_outputs.R b/runners/cs_runner/download_ml_outputs.R
new file mode 100644
index 0000000..a3751cc
--- /dev/null
+++ b/runners/cs_runner/download_ml_outputs.R
@@ -0,0 +1,16 @@
+# download_ml_outputs.R
+# This script pulls the ML outputs data from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Get ML outputs data from S3 bucket ----
+# ---------------------------------------------------------------------------
+
+ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)))
+
+message("Copying S3 object:\n", ML_OUTPUTS_S3_URI)
+system(ml_copy_cmd)
+message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!")
+message("------------------")
diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R
index dd0b807..9c455ee 100644
--- a/runners/cs_runner/download_nextgen.R
+++ b/runners/cs_runner/download_nextgen.R
@@ -1,43 +1,17 @@
-# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "base_dir"
-# base_dir is defined within runners/workflow/root_dir.R
+# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
 
 # load config variables
 source("runners/cs_runner/config_vars.R")
 
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
-
-# nextgen bucket folder name
-nextgen_bucket_folder <- "v20.1/gpkg/"
-
-# nextgen bucket name
-nextgen_prefix  <- paste0(s3_bucket, nextgen_bucket_folder)
-
-# prerelease_prefix     <- "s3://lynker-spatial/pre-release/"
-
-# # reference features S3 bucket prefix
-# ref_features_prefix <- "s3://lynker-spatial/00_reference_features/gpkg/"
-
-# nextgen model attributes folder in S3 bucket with parquet files
-model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/")
-
 # directory to copy nextgen bucket data too
-nextgen_dir <- paste0(base_dir, "/", nextgen_bucket_folder)
+NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
 
 # create the directory if it does NOT exist
-if(!dir.exists(nextgen_dir)) {
-  message("Directory does not exist at: \n\t'", nextgen_dir, "'\nCreating directory at: \n\t'", nextgen_dir, "'")
+if(!dir.exists(NEXTGEN_DIR)) {
+  message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'")
   
-  dir.create(nextgen_dir)
-}
-
-# model attributes directory
-model_attr_dir <- paste0(base_dir, "/model_attributes/")
-
-# create the directory if it does NOT exist
-if(!dir.exists(model_attr_dir)) {
-  message("Directory does not exist at: \n\t'", model_attr_dir, "'\nCreating directory at: \n\t'", model_attr_dir, "'")
-  dir.create(model_attr_dir)
+  dir.create(NEXTGEN_DIR)
 }
 
 # ---------------------------------------------------------------------------
@@ -47,8 +21,8 @@ if(!dir.exists(model_attr_dir)) {
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 command <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
-            S3_BUCKET="', nextgen_prefix, '"
-            DESTINATION_DIR=', nextgen_dir, '
+            S3_BUCKET="', S3_BUCKET_NEXTGEN_DIR_URI, '"
+            DESTINATION_DIR=', NEXTGEN_DIR, '
             
             # Regular expression pattern to match object keys
             PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\.gpkg$"
@@ -66,8 +40,8 @@ bucket_keys <- system(command, intern = TRUE)
 # Parse the selected S3 objects keys and copy them to the destination directory
 for (key in bucket_keys) {
   
-  copy_cmd <- paste0('aws s3 cp ', nextgen_prefix, key, " ", nextgen_dir, key)
-  message("Copying S3 object:\n", paste0(nextgen_prefix, key))
+  copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_NEXTGEN_DIR_URI, key, " ", NEXTGEN_DIR, key)
+  message("Copying S3 object:\n", paste0(S3_BUCKET_NEXTGEN_DIR_URI, key))
   
   system(copy_cmd)
   
@@ -75,87 +49,51 @@ for (key in bucket_keys) {
   message("------------------")
 }
 
-# ---------------------------------------------------------------------------
-# ---- List/Get nextgen model attributes parquets from S3 bucket ----
-# ---------------------------------------------------------------------------
-
-# aws s3 ls s3://lynker-spatial/v20/3D/model_attributes/
-
-# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
-list_model_attr_cmd <- paste0('#!/bin/bash
-            # AWS S3 Bucket and Directory information
-            S3_BUCKET="', model_attr_prefix, '"
-            
-            # Regular expression pattern to match object keys
-            PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\_model_attributes.parquet$"
-            
-            # AWS CLI command to list objects in the S3 bucket and use grep to filter them
-            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
-            
-            echo "$S3_OBJECTS"'
-)
-
-# get a list of the model attributes objects in S3
-model_attr_keys <- system(list_model_attr_cmd, intern = TRUE)
-
-# Parse the selected S3 objects keys and copy them to the destination directory
-for (key in model_attr_keys) {
-  
-  copy_cmd <- paste0('aws s3 cp ', model_attr_prefix, key, ' ', model_attr_dir, key)
-  
-  message("Copying S3 object:\n", paste0(model_attr_prefix, key))
-  
-  system(copy_cmd)
-  
-  message("Download '", paste0(model_attr_prefix, key), "' complete!")
-  message("------------------")
-}
-
-# ---------------------------------------------------------------------------
-# ---- List/Get reference features from S3 bucket ----
-# ---------------------------------------------------------------------------
-
-## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
-
-# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
-list_ref_features <- paste0('#!/bin/bash
-            # AWS S3 Bucket and Directory information
-            S3_BUCKET="', ref_features_prefix , '"
-            
-            # Regular expression pattern to match object keys
-            PATTERN="reference_features.gpkg"
-
-            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
-            
-            echo "$S3_OBJECTS"'
-)
-
-# ---- Get a list of reference features geopackages geopackages ----
-# Run the script to get a list of the nextgen geopackages that matched the regular expression above
-ref_features <- system(list_ref_features, intern = TRUE)
-
-## Download reference features geopackages and save them to a local directory
-# Parse the selected S3 objects keys and copy them to the destination directory
-for (key in ref_features) {
-  # paste0(ref_features_dir, "gpkg/")
-  copy_cmd <- paste0('aws s3 cp ', ref_features_prefix, key, ' ', paste0(ref_features_dir, "gpkg/"), key)
-  
-  message("Copying S3 object:\n", paste0(ref_features_prefix, key))
-  system(copy_cmd)
-  
-  message("Download '", paste0(ref_features_dir, "gpkg/", key), "' complete!")
-  message("------------------")
-}
-
-# ---------------------------------------------------------------------------
-# ---- Get ML outputs data from S3 bucket ----
-# ---------------------------------------------------------------------------
-
-ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI)))
-
-message("Copying S3 object:\n", ML_OUTPUTS_URI)
-system(ml_copy_cmd)
-
-message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_URI)), "' complete!")
-message("------------------")
-
+# # ---------------------------------------------------------------------------
+# # ---- List/Get reference features from S3 bucket ----
+# # ---------------------------------------------------------------------------
+# 
+# ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
+# 
+# # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+# list_ref_features <- paste0('#!/bin/bash
+#             # AWS S3 Bucket and Directory information
+#             S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '"
+#             
+#             # Regular expression pattern to match object keys
+#             PATTERN="reference_features.gpkg"
+# 
+#             S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+#             
+#             echo "$S3_OBJECTS"'
+# )
+# 
+# # ---- Get a list of reference features geopackages geopackages ----
+# # Run the script to get a list of the nextgen geopackages that matched the regular expression above
+# ref_features <- system(list_ref_features, intern = TRUE)
+# 
+# ## Download reference features geopackages and save them to a local directory
+# # Parse the selected S3 objects keys and copy them to the destination directory
+# for (key in ref_features) {
+#   # paste0(REF_FEATURES_DIR, "gpkg/")
+#   copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key)
+#   
+#   message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key))
+#   system(copy_cmd)
+#   
+#   message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!")
+#   message("------------------")
+# }
+# 
+# # ---------------------------------------------------------------------------
+# # ---- Get ML outputs data from S3 bucket ----
+# # ---------------------------------------------------------------------------
+# 
+# ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)))
+# 
+# message("Copying S3 object:\n", ML_OUTPUTS_S3_URI)
+# system(ml_copy_cmd)
+# 
+# message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!")
+# message("------------------")
+# 
diff --git a/runners/cs_runner/download_ref_features.R b/runners/cs_runner/download_ref_features.R
new file mode 100644
index 0000000..fc10e80
--- /dev/null
+++ b/runners/cs_runner/download_ref_features.R
@@ -0,0 +1,35 @@
+# download_ref_features.R
+# This script pulls the reference features geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- List/Get reference features from S3 bucket ----
+# ---------------------------------------------------------------------------
+
+# list objects in S3 bucket, and regular expression match to reference_features.gpkg pattern
+list_ref_features <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI, '"
+            
+            # Regular expression pattern to match object keys
+            PATTERN="reference_features.gpkg"
+
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# ---- Get a list of reference features geopackages ----
+# Run the script to get a list of the reference features geopackages that matched the regular expression above
+ref_features <- system(list_ref_features, intern = TRUE)
+
+# Parse the selected S3 objects keys and copy them to the destination directory
+for (key in ref_features) {
+  copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key)
+  message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key))
+  system(copy_cmd)
+  message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!")
+  message("------------------")
+}
diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner/partition_fema_by_vpu.R
index 13ea3ba..8e413eb 100644
--- a/runners/cs_runner/partition_fema_by_vpu.R
+++ b/runners/cs_runner/partition_fema_by_vpu.R
@@ -261,8 +261,8 @@ for (file_path in FEMA_gpkg_paths) {
 FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 
 # paths to nextgen datasets and model attribute parquet files
-NEXTGEN_FILENAMES    <- list.files(nextgen_dir, full.names = FALSE)
-NEXTGEN_FILE_PATHS   <- paste0(nextgen_dir, NEXTGEN_FILENAMES)
+NEXTGEN_FILENAMES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
+NEXTGEN_FILE_PATHS   <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES)
 
 for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   
diff --git a/runners/cs_runner/set_path_variables.R b/runners/cs_runner/set_path_variables.R
new file mode 100644
index 0000000..b590024
--- /dev/null
+++ b/runners/cs_runner/set_path_variables.R
@@ -0,0 +1,10 @@
+# Generate the list of file paths for locally stored nextgen datasets:
+# - NEXTGEN_FILES --> (list NEXTGEN_DIR files that were downloaded via download_nextgen.R)
+
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# NOTE: SET VARIABLE FOR REST OF PROCESSING 
+# paths to nextgen datasets
+NEXTGEN_FILES   <- list.files(NEXTGEN_DIR, full.names = FALSE)
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
index 8bfd145..c4495d7 100644
--- a/runners/cs_runner/utils.R
+++ b/runners/cs_runner/utils.R
@@ -1,3 +1,152 @@
+# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
+# make sure they are aligned and in the same order
+# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# base is a character vector of the base directory of the files. Defaults to NULL
+# Returns a dataframe with VPU, x, and y columns
+align_files_by_vpu <- function(
+    x, 
+    y, 
+    base = NULL
+) {
+
+  # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period
+  regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*"
+  
+  # path dataframe for X filepaths
+  x_paths <- data.frame(x = x)
+  
+  # path dataframe for Y filepaths
+  y_paths <- data.frame(y = y)
+  
+  # generate VPU IDs based on file path regular expression matching with "regex_pattern" above
+  x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x)
+  y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y)
+  
+  # match paths based on VPU column
+  matched_paths <- dplyr::left_join(
+    x_paths,
+    y_paths,
+    by = "vpu"
+  ) 
+  
+  # reorder columns
+  matched_paths <- dplyr::relocate(matched_paths, vpu, x, y)
+  
+  if(!is.null(base)) {
+    matched_paths$base_dir <- base
+  }
+  
+  return(matched_paths)
+  
+}
+
+# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+# flowlines: flowlines linestring sf object
+# trans: transects linestring sf object
+# waterbodies: waterbodies polygon sf object
+# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
+### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
+wb_intersects <- function(flowlines, trans, waterbodies) {
+  
+  ########  ########  ########  ########  ########  ########
+  
+  flowlines_geos <- geos::as_geos_geometry(flowlines)
+  wbs_geos <- geos::as_geos_geometry(waterbodies)
+  
+  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  
+  message("Checking flowlines against waterbodies...")
+  
+  # create an index between flowlines and waterbodies 
+  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+  
+  # remove any flowlines that cross more than 1 waterbody
+  to_keep  <- flowlines[lengths(wb_index) == 0, ]
+  to_check <- flowlines[lengths(wb_index) != 0, ]
+  
+  # subset transects to the hy_ids in "to_check" set of flowlines
+  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+  
+  # check where the transects linestrings intersect with the waterbodies
+  trans_geos_check <- geos::as_geos_geometry(trans_check)
+  
+  message("Checking transects against waterbodies (v2) ...")
+  wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos)                    # (NEW METHOD)
+  # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)])   # (OLD METHOD)
+  
+  # sum(lengths(wb_trans_index) == 0)
+  # length(wb_trans_index)
+  
+  # within the transects lines that are on a flowline that crosses a waterbody, 
+  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  trans_keep <- trans_check[lengths(wb_trans_index) == 0, ]                       # (NEW METHOD)
+  # trans_keep <- trans_check[!wb_trans_index, ]                                  # (OLD METHOD)
+  
+  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  
+  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # have transects that are NOT in the waterbody
+  to_keep <- dplyr::bind_rows(to_keep, to_check)
+  
+  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # that were determined to be valid (are being kept)
+  check_ids <- unique(trans_check$tmp_id)
+  keep_ids <- unique(trans_keep$tmp_id)
+  
+  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  valid_flowlines <- flowlines$id %in% to_keep$id
+  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  
+  # return alist of updated flowlines and transects 
+  return(
+    list(
+      "valid_flowlines" =  valid_flowlines,
+      "valid_transects" =  valid_transects
+    )
+  )
+  
+  # # within the transects lines that are on a flowline that crosses a waterbody, 
+  # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  # trans_keep <- trans_check[!trans_wb_index, ]
+  # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  # 
+  # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  # 
+  # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # # have transects that are NOT in the waterbody
+  # to_keep <- dplyr::bind_rows(to_keep, to_check)
+  # 
+  # # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # # that were determined to be valid (are being kept)
+  # check_ids <- unique(trans_check$tmp_id)
+  # keep_ids <- unique(trans_keep$tmp_id)
+  # 
+  # # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  # valid_flowlines <- flowlines$id %in% to_keep$id
+  # valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+  #                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  # 
+  # # return alist of updated flowlines and transects 
+  # return(
+  #   list(
+  #     "valid_flowlines" =  valid_flowlines,
+  #     "valid_transects" =  valid_transects
+  #   )
+  # )
+}
+
 #' Get the polygons that interesect with any of the linestring geometries
 #' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes
 #' @param polygons polygon sf object. Default is NULL

From 6c6b7a559a1bbdbda3bb0e4655f3e2b64a530d50 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 2 Aug 2024 09:05:44 -0700
Subject: [PATCH 54/64] removed duplicate S3_TRANSECTS_DIR variable declaration

---
 runners/cs_runner/01_transects.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 2c2ce2f..01bc78a 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -8,8 +8,8 @@ source("runners/cs_runner/utils.R")
 # library(sf)
 # install.packages("devtools")
 
-# transect bucket prefix
-S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+# # transect bucket prefix
+# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
 NEXTGEN_FILES    <- list.files(NEXTGEN_DIR, full.names = FALSE)

From 967f40b1bd7e2991e86a6a3e1fd4667066270160 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 26 Aug 2024 14:25:16 -0700
Subject: [PATCH 55/64] updated 02_cs_pts.R to use new ID based hydrofabric3D
 updates

---
 runners/cs_runner/02_cs_pts.R    | 94 +++++++++++++++++++++++---------
 runners/cs_runner/03_inject_ml.R | 10 +++-
 2 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index d28e43e..f91991b 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -1,8 +1,3 @@
-
-##############################################################################################################################
-################################################     REDO EVERYTHING   #######################################################
-##############################################################################################################################
-
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
 source("runners/cs_runner/config.R")
 
@@ -11,20 +6,14 @@ library(hydrofabric3D)
 library(dplyr)
 library(sf)
 
-# # cross section bucket prefix
-# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/")
-# 
-# # transect bucket prefix
-# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
-
 # paths to nextgen datasets
-NEXTGEN_FILES <- list.files(NEXTGEN_DIR, full.names = FALSE)
+NEXTGEN_FILES  <- list.files(NEXTGEN_DIR, full.names = FALSE)
 
 # paths to nextgen datasets
 transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
 transect_files <- transect_files[!grepl("updated_", transect_files)]
 
-REF_FEATURES <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
+REF_FEATURES   <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
 
 # reference features dataframe
 ref_df <- data.frame(
@@ -46,11 +35,10 @@ path_df <- align_files_by_vpu(
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 1:nrow(path_df)) {
-  # i = 8
-  
+for (i in 11:nrow(path_df)) {
   start <- Sys.time()
-  
+  # i = 8
+  message("Using newest Hydrofabric3D!!!")
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
@@ -64,7 +52,9 @@ for (i in 1:nrow(path_df)) {
   ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
   
   # current VPU being processed
-  VPU = path_df$vpu[i]
+  VPU   <- path_df$vpu[i]
+  
+  start <- Sys.time()
   
   message("Creating VPU ", VPU, 
           " cross section points:\n - flowpaths: '", nextgen_file,
@@ -75,7 +65,6 @@ for (i in 1:nrow(path_df)) {
   
   ################### 
   
-  
   # read in transects data
   transects <- sf::read_sf(transect_path)
   
@@ -96,46 +85,94 @@ for (i in 1:nrow(path_df)) {
   gc()
   
   start_cs_pts <- Sys.time()
+  # # ------------------------------------------------------------------------
+  # # ------ TESTING DATA -------
+  # # ------------------------------------------------------------------------
+  #   flines <-
+  #     flines %>%
+  #     dplyr::slice(1:3500)
+  # 
+  # transects <-
+  #   transects %>%
+  #   dplyr::filter(hy_id %in% flines$id)
+
+  # ------------------------------------------------------------------------
+  
   message("Extracting cross section points (", start_cs_pts, ")")
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 1: Extract cs points from DEM ----
   # ----------------------------------------------------------------------------------------------------------------
-
+  # system.time({
+  
   # get cross section point elevations
   cs_pts <- hydrofabric3D::cross_section_pts(
+    
     cs             = transects,
+    crosswalk_id   = "hy_id",
     points_per_cs  = NULL,
     min_pts_per_cs = 10,
     dem            = DEM_URL
   )
 
+  # })
+  
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
   # ----------------------------------------------------------------------------------------------------------------
 
   # system.time({
-    
+  
   # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
   cs_pts <- 
     cs_pts %>% 
     dplyr::group_by(hy_id, cs_id) %>% 
     dplyr::filter(!any(is.na(Z))) %>% 
     dplyr::ungroup() %>% 
-    hydrofabric3D::classify_points(pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF)  
+    hydrofabric3D::classify_points(
+      crosswalk_id             = "hy_id", 
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+      )  
   
   # })
   
   ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
   
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_06.gpkg")
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_06.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_06.gpkg")
+  # # 
+  
+  
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
   # ----------------------------------------------------------------------------------------------------------------
+  # dplyr::rename(flines, hy_id = id)
+  # profvis::profvis({
   # system.time({
-  # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief
-  fixed_pts <- hydrofabric3D::improve_invalid_cs(
+    
+  # # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief
+  # fixed_pts <- hydrofabric3D::improve_invalid_cs2(
+  #   cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  #   net            = dplyr::rename(flines, hy_id = id),    # original flowline network
+  #   # net            = flines,    # original flowline network
+  #   transects      = transects, # original transect lines
+  #   points_per_cs  = NULL, 
+  #   min_pts_per_cs = 10, # number of points per cross sections
+  #   dem            = DEM_URL, # DEM to extract points from
+  #   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  #   pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  #   fix_ids = FALSE,
+  #   verbose = TRUE
+  # )
+  
+  
+  # system.time({
+  fixed_pts <- hydrofabric3D::get_improved_cs_pts(
     cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
-    net            = flines,    # original flowline network
+    net            = dplyr::rename(flines, hy_id = id),    # original flowline network
+    # net            = flines,    # original flowline network
     transects      = transects, # original transect lines
+    crosswalk_id   = "hy_id",
     points_per_cs  = NULL, 
     min_pts_per_cs = 10, # number of points per cross sections
     dem            = DEM_URL, # DEM to extract points from
@@ -146,6 +183,8 @@ for (i in 1:nrow(path_df)) {
   )
   # })
   
+  # fixed_pts2$is_extended %>% sum()
+  
   ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
 
   # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more
@@ -354,8 +393,9 @@ for (i in 1:nrow(path_df)) {
   
   message("Reclassifying cross section points...")
   fixed_pts <- hydrofabric3D::classify_points(
-                  cs_pts                   = fixed_pts, 
-                  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                  cs_pts                    = fixed_pts, 
+                  crosswalk_id              = "hy_id",
+                  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
                   )
   
   ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index 0321003..834dec1 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -225,6 +225,10 @@ for (i in 1:nrow(path_df)) {
       DINGMAN_R = owp_dingman_r
     )
   
+  # bankful_cs %>% 
+  #   dplyr::slice(1000:1200) %>% 
+  #   hydrofabric3D::plot_cs_pts(color = "point_type")
+  
   # sanity check that all rows are accounted for after splitting up data
   split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)
   # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
@@ -242,7 +246,11 @@ for (i in 1:nrow(path_df)) {
   inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
       cross_section_pts = inchannel_cs
     )
-  
+  # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+  # ml_subset %>%
+  #   dplyr::filter(hy_id == "wb-1005207") %>%
+  #   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+  #   .$owp_y_inchan
   message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
 
   # Add bathymetry using "bankful" estimates

From 1998c4282c8090a80e2ed7787e21898c2ea52ba5 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 26 Aug 2024 14:26:47 -0700
Subject: [PATCH 56/64] added crosswalk_id paramater to classify_points
 function in ml_inject workflow

---
 runners/cs_runner/03_inject_ml.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index 834dec1..9b34c03 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -288,7 +288,7 @@ for (i in 1:nrow(path_df)) {
   message(round(Sys.time()), " - Reclassifying cross section point types...")
   
   # reclassify
-  final_cs <- hydrofabric3D::classify_points(final_cs)
+  final_cs <- hydrofabric3D::classify_points(final_cs, crosswalk_id = "hy_id")
 
   # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids()
   # random_uids <- sample(x=final_uids, size=12)

From 3ac563da09a9138fe55f856bf215f269f1ce9ccf Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 25 Sep 2024 15:50:51 -0700
Subject: [PATCH 57/64] setup new indepdent file (domain_with_fema.R) for
 generating a fema informed, set of transect lines and cross section points
 given a CONUS network and an AOI for subsetting that network

---
 runners/cs_runner/02_cs_pts.R        |  75 ++--
 runners/cs_runner/config.R           | 245 ++-----------
 runners/cs_runner/config_vars.R      |  69 ++++
 runners/cs_runner/domain_with_fema.R | 471 +++++++++++++++++++++++++
 runners/cs_runner/new_domain.R       | 510 +++++++++++++++++++++++++++
 runners/cs_runner/utils.R            | 229 ++++++++++++
 6 files changed, 1346 insertions(+), 253 deletions(-)
 create mode 100644 runners/cs_runner/domain_with_fema.R
 create mode 100644 runners/cs_runner/new_domain.R

diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index f91991b..4713c4b 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -35,10 +35,10 @@ path_df <- align_files_by_vpu(
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 11:nrow(path_df)) {
+for (i in 20:nrow(path_df)) {
+  
   start <- Sys.time()
-  # i = 8
-  message("Using newest Hydrofabric3D!!!")
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
   nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
@@ -64,13 +64,15 @@ for (i in 11:nrow(path_df)) {
   )
   
   ################### 
-  
+  message("Reading in transects...\n > ", transect_file)
   # read in transects data
   transects <- sf::read_sf(transect_path)
   
+  message("Reading in flowlines... \n > ", nextgen_file)
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
+  message("Reading in waterbodies... \n > ", ref_file)
   # read in waterbodies reference features layer
   waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
 
@@ -119,15 +121,31 @@ for (i in 11:nrow(path_df)) {
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
   # ----------------------------------------------------------------------------------------------------------------
-
+  
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg")
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg")
+  
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg")
+  # cs_pts %>% 
+  #   dplyr::group_by(hy_id, cs_id) %>% 
+  #   dplyr::filter(!any(is.na(Z))) %>% 
+  #   dplyr::ungroup()
+  # 
+  # cs_pts %>% 
+  #   hydrofabric3D::drop_incomplete_cs_pts("hy_id")
+  
   # system.time({
   
   # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
   cs_pts <- 
+  # cs_pts2 <- 
     cs_pts %>% 
-    dplyr::group_by(hy_id, cs_id) %>% 
-    dplyr::filter(!any(is.na(Z))) %>% 
-    dplyr::ungroup() %>% 
+    # dplyr::group_by(hy_id, cs_id) %>% 
+    # dplyr::filter(!any(is.na(Z))) %>% 
+    # dplyr::ungroup() %>% 
+    hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
     hydrofabric3D::classify_points(
       crosswalk_id             = "hy_id", 
       pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
@@ -135,12 +153,11 @@ for (i in 11:nrow(path_df)) {
   
   # })
   
-  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id  
+  # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
   
-  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_06.gpkg")
-  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_06.gpkg")
-  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_06.gpkg")
-  # # 
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
   
   
   # ----------------------------------------------------------------------------------------------------------------
@@ -167,20 +184,20 @@ for (i in 11:nrow(path_df)) {
   
   
   # system.time({
-  fixed_pts <- hydrofabric3D::get_improved_cs_pts(
-    cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
-    net            = dplyr::rename(flines, hy_id = id),    # original flowline network
-    # net            = flines,    # original flowline network
-    transects      = transects, # original transect lines
-    crosswalk_id   = "hy_id",
-    points_per_cs  = NULL, 
-    min_pts_per_cs = 10, # number of points per cross sections
-    dem            = DEM_URL, # DEM to extract points from
-    scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
-    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
-    fix_ids = FALSE,
-    verbose = TRUE
-  )
+    fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = dplyr::rename(flines, hy_id = id),    # original flowline network
+      # net            = flines,    # original flowline network
+      transects      = transects, # original transect lines
+      crosswalk_id   = "hy_id",
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_URL, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
   # })
   
   # fixed_pts2$is_extended %>% sum()
@@ -219,6 +236,7 @@ for (i in 11:nrow(path_df)) {
   
   # get the counts of each point type to add this data to the transects dataset
   point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts)
+  # point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, crosswalk_id = "hy_id")
   
   # # check the number of cross sections that were extended
   # fixed_pts$is_extended %>% table()
@@ -387,7 +405,8 @@ for (i in 11:nrow(path_df)) {
   ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
   
   message("Aligning banks and smoothing bottoms...")
-  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts)
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts)
+  # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
   
   ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
   
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index a839f61..5a435b3 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -112,7 +112,27 @@ list_ref_features <- paste0('#!/bin/bash
             echo "$S3_OBJECTS"'
 )
 
+# --------------------------------------------------------------------------
+# ---- Create empty file structure for a "new_domain" ----
+# --------------------------------------------------------------------------
+
+create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME)
+
+# --------------------------------------------------------------------------
+# ---- Create empty file structure for a "domain_with_fema" ----
+# --------------------------------------------------------------------------
+
+create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME)
+
+# --------------------------------------------------------------------------
+# ---- Create empty file structure for a "new_conus_domain" ----
+# --------------------------------------------------------------------------
+
+create_new_domain_dirs(BASE_DIR, NEW_CONUS_DOMAIN_DIRNAME)
+
+# --------------------------------------------------------------------------
 # ---- Get a list of reference features geopackages ----
+# --------------------------------------------------------------------------
 
 # Run the script to get a list of the nextgen geopackages that matched the regular expression above
 ref_features <- system(list_ref_features, intern = TRUE)
@@ -120,228 +140,3 @@ ref_features <- system(list_ref_features, intern = TRUE)
 # ref features datasets
 ref_features_keys  <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features)
 ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features)
-
-###
-### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ###
-###
-
-# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
-# make sure they are aligned and in the same order
-# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
-# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
-# base is a character vector of the base directory of the files. Defaults to NULL
-# Returns a dataframe with VPU, x, and y columns
-align_files_by_vpu <- function(
-    x, 
-    y, 
-    base = NULL
-) {
-
-  # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period
-  regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*"
-  
-  # path dataframe for X filepaths
-  x_paths <- data.frame(x = x)
-  
-  # path dataframe for Y filepaths
-  y_paths <- data.frame(y = y)
-  
-  # generate VPU IDs based on file path regular expression matching with "regex_pattern" above
-  x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x)
-  y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y)
-  
-  # match paths based on VPU column
-  matched_paths <- dplyr::left_join(
-    x_paths,
-    y_paths,
-    by = "vpu"
-  ) 
-  
-  # reorder columns
-  matched_paths <- dplyr::relocate(matched_paths, vpu, x, y)
-  
-  if(!is.null(base)) {
-    matched_paths$base_dir <- base
-  }
-  
-  return(matched_paths)
-  
-}
-
-# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
-# flowlines: flowlines linestring sf object
-# trans: transects linestring sf object
-# waterbodies: waterbodies polygon sf object
-# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
-### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
-wb_intersects <- function(flowlines, trans, waterbodies) {
-  
-  ########  ########  ########  ########  ########  ########
-  
-  flowlines_geos <- geos::as_geos_geometry(flowlines)
-  wbs_geos <- geos::as_geos_geometry(waterbodies)
-  
-  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
-  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
-  
-  message("Checking flowlines against waterbodies...")
-  
-  # create an index between flowlines and waterbodies 
-  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
-  
-  # remove any flowlines that cross more than 1 waterbody
-  to_keep  <- flowlines[lengths(wb_index) == 0, ]
-  to_check <- flowlines[lengths(wb_index) != 0, ]
-  
-  # subset transects to the hy_ids in "to_check" set of flowlines
-  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
-  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
-  
-  # check where the transects linestrings intersect with the waterbodies
-  trans_geos_check <- geos::as_geos_geometry(trans_check)
-  
-  message("Checking transects against waterbodies (v2) ...")
-  wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos)                    # (NEW METHOD)
-  # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)])   # (OLD METHOD)
-  
-  # sum(lengths(wb_trans_index) == 0)
-  # length(wb_trans_index)
-  
-  # within the transects lines that are on a flowline that crosses a waterbody, 
-  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
-  trans_keep <- trans_check[lengths(wb_trans_index) == 0, ]                       # (NEW METHOD)
-  # trans_keep <- trans_check[!wb_trans_index, ]                                  # (OLD METHOD)
-  
-  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
-  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
-  
-  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
-  # have transects that are NOT in the waterbody
-  to_keep <- dplyr::bind_rows(to_keep, to_check)
-  
-  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
-  # that were determined to be valid (are being kept)
-  check_ids <- unique(trans_check$tmp_id)
-  keep_ids <- unique(trans_keep$tmp_id)
-  
-  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
-  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
-  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
-  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
-  valid_flowlines <- flowlines$id %in% to_keep$id
-  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
-                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
-  
-  # return alist of updated flowlines and transects 
-  return(
-    list(
-      "valid_flowlines" =  valid_flowlines,
-      "valid_transects" =  valid_transects
-    )
-  )
-  
-  # # within the transects lines that are on a flowline that crosses a waterbody, 
-  # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
-  # trans_keep <- trans_check[!trans_wb_index, ]
-  # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
-  # 
-  # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
-  # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
-  # 
-  # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
-  # # have transects that are NOT in the waterbody
-  # to_keep <- dplyr::bind_rows(to_keep, to_check)
-  # 
-  # # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
-  # # that were determined to be valid (are being kept)
-  # check_ids <- unique(trans_check$tmp_id)
-  # keep_ids <- unique(trans_keep$tmp_id)
-  # 
-  # # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
-  # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
-  # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
-  # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
-  # valid_flowlines <- flowlines$id %in% to_keep$id
-  # valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
-  #                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
-  # 
-  # # return alist of updated flowlines and transects 
-  # return(
-  #   list(
-  #     "valid_flowlines" =  valid_flowlines,
-  #     "valid_transects" =  valid_transects
-  #   )
-  # )
-}
-
-# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
-# flowlines: flowlines linestring sf object
-# trans: transects linestring sf object
-# waterbodies: waterbodies polygon sf object
-# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
-### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
-wb_intersects_v1 <- function(flowlines, trans, waterbodies) {
-  
-  ########  ########  ########  ########  ########  ########
-  
-  flowlines_geos <- geos::as_geos_geometry(flowlines)
-  wbs_geos <- geos::as_geos_geometry(waterbodies)
-  
-  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
-  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
-  
-  message("Checking flowlines against waterbodies...")
-  
-  # create an index between flowlines and waterbodies 
-  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
-  
-  # remove any flowlines that cross more than 1 waterbody
-  to_keep  <- flowlines[lengths(wb_index) == 0, ]
-  to_check <- flowlines[lengths(wb_index) != 0, ]
-  
-  # subset transects to the hy_ids in "to_check" set of flowlines
-  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
-  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
-  
-  # check where the transects linestrings intersect with the waterbodies
-  trans_geos_check <- geos::as_geos_geometry(trans_check)
-  
-  message("Checking transects against waterbodies...")
-  trans_wb_index <- geos::geos_intersects_any(
-    trans_geos_check,  
-    wbs_geos[unlist(wb_index)]
-  )
-  
-  # within the transects lines that are on a flowline that crosses a waterbody, 
-  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
-  trans_keep <- trans_check[!trans_wb_index, ]
-  # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
-  
-  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
-  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
-  
-  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
-  # have transects that are NOT in the waterbody
-  to_keep <- dplyr::bind_rows(to_keep, to_check)
-  
-  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
-  # that were determined to be valid (are being kept)
-  check_ids <- unique(trans_check$tmp_id)
-  keep_ids <- unique(trans_keep$tmp_id)
-  
-  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
-  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
-  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
-  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
-  valid_flowlines <- flowlines$id %in% to_keep$id
-  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
-                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
-  
-  # return alist of updated flowlines and transects 
-  return(
-    list(
-      "valid_flowlines" =  valid_flowlines,
-      "valid_transects" =  valid_transects
-    )
-  )
-}
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 5c4a713..dab3c25 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -170,4 +170,73 @@ COLLECT_META <- TRUE
 META_PATH    <- '/Users/anguswatters/Desktop/cs_meta/'
 # META_PATH <-  "/local/path/to/save/cross_section_meta_data/"
 
+# -------------------------------------------------------------------------------------
+# ---- (New single domain) Local directory / path variables ----
+# -------------------------------------------------------------------------------------
+
+# directory for new domain data 
+NEW_DOMAIN_DIRNAME  <- "new_domain"
+NEW_DOMAIN_DIR      <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME)
+
+NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines"
+NEW_DOMAIN_FLOWLINES_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME)
+
+NEW_DOMAIN_DEM_DIRNAME <- "dem"
+NEW_DOMAIN_DEM_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME)
+
+NEW_DOMAIN_FLOWLINES_FILE  <- "AllDiffusiveCombined.gpkg"
+NEW_DOMAIN_FLOWLINES_PATH  <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE)
+
+# # Local DEM file
+# NEW_DOMAIN_DEM_FILE        <- "hi_dem.tif"
+# NEW_DOMAIN_DEM_PATH        <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE)
+
+# Remote DEM file
+NEW_DOMAIN_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+NEW_DOMAIN_TRANSECTS_DIRNAME         <- "transects"
+NEW_DOMAIN_CS_PTS_DIRNAME            <- "cs_pts"
+NEW_DOMAIN_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+
+NEW_DOMAIN_TRANSECTS_DIR          <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME)
+NEW_DOMAIN_CS_PTS_DIR             <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME)
+NEW_DOMAIN_CROSS_SECTIONS_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME)
+
+# -------------------------------------------------------------------------------------
+# ---- (New single domain) Local directory / path variables ----
+# -------------------------------------------------------------------------------------
+
+# directory for new domain data 
+DOMAIN_WITH_FEMA_DIRNAME  <- "domain_with_fema"
+DOMAIN_WITH_FEMA_DIR      <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME)
+
+DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines"
+DOMAIN_WITH_FEMA_FLOWLINES_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME)
+
+DOMAIN_WITH_FEMA_SUBSET_DIRNAME  <- "domain_subset"
+DOMAIN_WITH_FEMA_SUBSET_DIR      <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME)
+
+DOMAIN_WITH_FEMA_DEM_DIRNAME     <- "dem"
+DOMAIN_WITH_FEMA_DEM_DIR         <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME)
+
+DOMAIN_WITH_FEMA_FLOWLINES_FILE  <- "ls_conus.gpkg"
+DOMAIN_WITH_FEMA_FLOWLINES_PATH  <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE)
+
+# Geopackage containing area to subset flowlines to before processing
+DOMAIN_WITH_FEMA_SUBSET_FILE     <- "AllDiffusiveCombined.gpkg"
+DOMAIN_WITH_FEMA_SUBSET_PATH     <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE)
+
+# # Local DEM file
+# DOMAIN_WITH_FEMA_DEM_FILE        <- "hi_dem.tif"
+# DOMAIN_WITH_FEMA_DEM_PATH        <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE)
+
+# Remote DEM file
+DOMAIN_WITH_FEMA_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME         <- "transects"
+DOMAIN_WITH_FEMA_CS_PTS_DIRNAME            <- "cs_pts"
+DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
 
+DOMAIN_WITH_FEMA_TRANSECTS_DIR          <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME)
+DOMAIN_WITH_FEMA_CS_PTS_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME)
+DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME)
diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R
new file mode 100644
index 0000000..b425de5
--- /dev/null
+++ b/runners/cs_runner/domain_with_fema.R
@@ -0,0 +1,471 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# # # # load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+# install.packages("devtools")
+
+# # transect bucket prefix
+# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+
+# paths to NEW DOMAIN datasets 
+# NEW_DOMAIN_FILES    <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE)
+
+# Unique Flowline ID column name
+CROSSWALK_ID <- "id"
+
+# ---------------------------------------------------------------------
+# --- Read in flowlines
+# ---------------------------------------------------------------------
+# sf::st_layers(DOMAIN_WITH_FEMA_SUBSET_PATH)
+
+# Subsetting area
+aoi     <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides")
+aoi     <- rmapshaper::ms_simplify(aoi, keep = 0.05)
+
+id_col <- "id"
+# id_col <- "divide_id"
+
+# query the conus.gpkg for matching IDs
+ids        <- unique(aoi[[id_col]])
+query_ids  <- paste0(paste0("'", ids, "'"), collapse= ", ")
+
+gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+layer       <- gpkg_layers$name[gpkg_layers$name == "flowpaths"]
+
+wkt <- 
+  aoi %>% 
+  rmapshaper::ms_dissolve() %>% 
+  # rmapshaper::ms_explode() %>% 
+  # sf::st_as_sfc() %>%
+  sf::st_sf() %>%
+  sf::st_geometry() %>%
+  sf::st_as_text()
+
+# wkt   <- sf::st_as_text(sf::st_geometry(aoi))
+# wkt <-
+#   aoi %>%
+#   # sf::st_bbox() %>%
+#   sf::st_as_sfc() %>%
+#   sf::st_sf() %>%
+#   sf::st_geometry() %>%
+#   sf::st_as_text()
+
+# length(wkt)
+
+# read in flowlines based on IDs in AOI
+flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths",
+                      wkt_filter = wkt
+                      # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids),
+                      # query = query
+                      )
+
+# ---------------------------------------------------------------------
+# --- Split flowlines by VPU
+# ---------------------------------------------------------------------
+
+# VPUs polygons
+VPU_boundaries   <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines))
+
+# add a VPU ID column to each flowline
+flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID")
+
+# set of unique VPUs
+VPU_IDS <- unnest_ids(flines$VPUID)
+
+# all possible FEMA dirs
+AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ]
+
+# flines %>% 
+#   sf::st_drop_geometry() %>% 
+#   dplyr::group_by(VPUID) %>% 
+#   dplyr::count()
+# unique(flines$VPUID)
+
+# calculate bankfull width
+flines <-
+  flines %>%
+  dplyr::mutate(
+    bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm)
+    # bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+  ) %>%
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    VPUID,
+    # hy_id = id,
+    lengthkm,
+    tot_drainage_areasqkm,
+    bf_width,
+    mainstem
+  ) %>% 
+  hydroloom::rename_geometry("geometry")
+
+# save the flowlines subset 
+DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg")
+sf::write_sf(
+  flines, 
+  DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH
+)
+
+# split the flowlines into groups by VPU
+fline_groups <- dplyr::group_split(flines, VPUID)
+
+# loop through each VPU group of flowlines and save a local copy
+for (i in seq_along(fline_groups)) {
+  flowlines  <- fline_groups[[i]]
+  VPU        <- unique(flowlines$VPUID)
+  
+  # save the flowlines subset 
+  vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg")
+  
+  sf::write_sf(
+    flowlines, 
+    vpu_flowlines_path
+  )
+}
+
+for (i in seq_along(fline_groups)) {
+
+  flowlines  <- fline_groups[[i]]
+  VPU        <-  unique(flowlines$VPUID)
+  
+  message("(", i, "/", length(fline_groups), ")\n", 
+          " > Processing flowlines in VPU group: '", VPU, "'")
+  
+  # unique VPUs for group
+  GROUP_VPU_IDS <- unnest_ids(flowlines$VPUID)
+  
+  # all FEMA dirs for the current area
+  GROUP_FEMA_DIRS  <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ]
+  GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))]
+  # GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)
+  # GROUP_FEMA_FILES <- GROUP_FEMA_FILES[grepl("_output.gpkg", GROUP_FEMA_FILES)]
+  
+  # create transect lines
+  transects <- hydrofabric3D::cut_cross_sections(
+    net               = flowlines,                        # flowlines network
+    id                = CROSSWALK_ID,                     # Unique feature ID
+    cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+    num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+    smooth            = TRUE,                          # smooth lines
+    densify           = 3,                             # densify linestring points
+    rm_self_intersect = TRUE,                          # remove self intersecting transects
+    fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+    #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+    # terminal_id       = NULL,
+    # braid_threshold   = NULL,
+    # version           = 2,
+    # braid_method      = "comid",
+    # precision         = 1,
+    add               = TRUE                           # whether to add back the original data
+  )
+# }
+  
+  # add cs_source column and rename cs_widths to cs_lengthm
+  transects <- 
+    transects %>%
+    dplyr::mutate(
+      cs_source = CS_SOURCE
+    )
+
+  # ---------------------------------------------------------------------
+  # --- Extend transects out to FEMA 100yr floodplains
+  # ---------------------------------------------------------------------
+  message("Reading in FEMA polygons...") 
+  
+  # unique(flowlines$VPUID)
+  
+  # fema polygons and transect lines
+  # fema <- sf::read_sf(vpu_fema_file)
+  
+  # read each FEMA geopackage into a list 
+  fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+  
+  fema <- 
+    fema %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::mutate(
+      fema_id = 1:dplyr::n()
+    ) 
+
+  message("Simplifying FEMA polygons...")
+  message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+  
+  # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+  # keep 1% of the original points for speed
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+  
+  message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+  message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+  
+  transects <- 
+    transects  %>%
+    dplyr::left_join(
+      dplyr::select(sf::st_drop_geometry(flowlines),
+                    dplyr::any_of(CROSSWALK_ID),
+                    mainstem
+      ),
+      by = CROSSWALK_ID
+    )
+  
+  # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  transects <- hydrofabric3D::extend_transects_to_polygons(
+    transect_lines         = transects, 
+    polygons               = fema, 
+    flowlines              = flowlines, 
+    crosswalk_id           = CROSSWALK_ID,
+    grouping_id            = "mainstem", 
+    max_extension_distance = 3000 
+  )
+  
+  # mapview::mapview(transects, color = "green") + 
+   # mapview::mapview(transects2, color = "red")
+  
+  transects <- 
+    transects %>% 
+    hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+    dplyr::mutate(is_extended = FALSE) %>%
+    dplyr::select(
+      dplyr::any_of(CROSSWALK_ID),
+      cs_id, 
+      cs_lengthm,
+      cs_source,
+      cs_measure,
+      is_extended,
+      geometry
+    )
+  
+  out_path <- paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/",   
+                     paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" 
+                     )
+  
+  message("Writting transect lines for VPU group: '", VPU, "'",
+          "\n > '", out_path, "'")
+  
+  sf::write_sf(transects, out_path)
+  
+  message("Finished writting transects!")
+  
+}
+# ---------------------------------------------------------------------
+# --- Bind together transects into single dataset
+# ---------------------------------------------------------------------
+
+transect_paths <- list.files(DOMAIN_WITH_FEMA_TRANSECTS_DIR, full.names = T)
+flowline_paths <- list.files(DOMAIN_WITH_FEMA_FLOWLINES_DIR, full.names = T)
+flowline_paths <- flowline_paths[!basename(flowline_paths) %in% c("flowlines_subset.gpkg", "ls_conus.gpkg")]
+
+# read each transect geopackages into a list 
+transects <- lapply(transect_paths, function(gpkg) sf::read_sf(gpkg))
+transects[[5]]
+
+transects <- 
+  transects %>% 
+  dplyr::bind_rows()
+
+paths_df <- data.frame(
+  t = transect_paths,
+  f = flowline_paths
+) %>% 
+  dplyr::mutate(
+    vpu = gsub("_transects.gpkg", "", basename(t))
+  )
+
+for (i in 1:nrow(paths_df)) {
+  
+  VPU     <- paths_df$vpu[i]
+  t_path  <- paths_df$t[i]
+  f_path  <- paths_df$f[i]
+  
+  message("Creating VPU ", VPU, " cross section points:", 
+          "\n - flowpaths: '", f_path, "'",
+          "\n - transects: '", t_path, "'"
+  )
+  
+  ################### 
+  message("Reading in transects...\n > ", t_path)
+  # read in transects data
+  transects <- sf::read_sf(t_path)
+  
+  message("Reading in flowlines... \n > ", f_path)
+  # read in nextgen data
+  flines <- sf::read_sf(f_path)
+  
+  message("Extracting cross section points...")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 1: Extract cs points from DEM ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # system.time({
+  # get cross section point elevations
+  cs_pts <- hydrofabric3D::cross_section_pts(
+    cs             = transects,
+    crosswalk_id   = CROSSWALK_ID,
+    points_per_cs  = NULL,
+    min_pts_per_cs = 10,
+    dem            = DEM_URL
+  )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points  ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # cs_pts2 %>% 
+  #   dplyr::slice(1:200) %>% 
+  #   dplyr::rename(hy_id = id) %>% 
+  #   hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type")
+  
+  cs_pts <- 
+    # cs_pts2 <- 
+    cs_pts %>% 
+    hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% 
+    hydrofabric3D::classify_points(
+      crosswalk_id             = CROSSWALK_ID, 
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+    )  
+  
+  # })
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+  # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+  
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # system.time({
+    fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = flines,    # original flowline network
+      # net            = flines,    # original flowline network
+      transects      = transects, # original transect lines
+      crosswalk_id   = CROSSWALK_ID,
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_URL, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
+  # })
+  
+  ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Update transects with extended transects (if exists) ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  out_transects <- match_transects_to_extended_cs_pts(
+    transect_lines = transects, 
+    fixed_cs_pts   = fixed_pts, 
+    crosswalk_id   = CROSSWALK_ID
+  )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Re enumerate the transects & cross section points "cs_id" ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+  # out_transects  <- hydrofabric3D:::renumber_cs_ids(
+  #                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+  #                                                   crosswalk_id  = "hy_id"
+  #                                                   ) %>% 
+  #                                                   dplyr::select(-pt_id)
+  
+  fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+  out_transects  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 4: Update transects with extended transects (if exists) ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  fixed_pts <-
+    fixed_pts %>% 
+    dplyr::mutate(
+      X = sf::st_coordinates(.)[,1],
+      Y = sf::st_coordinates(.)[,2]
+    ) %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(
+      dplyr::any_of(CROSSWALK_ID),
+      cs_id, 
+      pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    )
+  
+  # add Z_source column for source of elevation data
+  fixed_pts <-
+    fixed_pts %>%
+    dplyr::mutate(
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(
+      dplyr::any_of(CROSSWALK_ID),
+      cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
+  
+  ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID)
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  
+  fixed_pts <- hydrofabric3D::classify_points(
+    cs_pts                    = fixed_pts, 
+    crosswalk_id              = CROSSWALK_ID,
+    pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+  
+  if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+    message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  } else {
+    message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  }
+  
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  sf::write_sf(
+    out_transects,
+    paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/", VPU, "_transects.gpkg") 
+  )
+  
+  arrow::write_parquet(
+    fixed_pts,
+    paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/", VPU, "_cs_pts.gpkg") 
+  )
+  
+}
+
+# TODO: Save all cross section points + transects into single geopackages (transects.gpkg, cs_pts.gpkg)
+
+
diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R
new file mode 100644
index 0000000..5aae38e
--- /dev/null
+++ b/runners/cs_runner/new_domain.R
@@ -0,0 +1,510 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# # # # load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+# install.packages("devtools")
+
+# # transect bucket prefix
+# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+
+# paths to NEW DOMAIN datasets 
+# NEW_DOMAIN_FILES    <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE)
+
+# ---------------------------------------------------------------------
+# --- Read in flowlines
+# ---------------------------------------------------------------------
+# read in nextgen data
+flines <- sf::read_sf(NEW_DOMAIN_FLOWLINES_PATH, layer = "flowpaths")
+
+VPUS           <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines))
+vpu_flines_int <- sf::st_intersects(VPUS, flines)
+vpu_aoi        <- VPUS[lengths(vpu_flines_int) > 0, ]
+
+# VPU IDs of interest 
+VPU_IDS        <- vpu_aoi$VPUID
+
+# sf::write_sf(aoi, "all_diffusive_combined_vpus.gpkg")
+# VPUS[lengths(vpu_fl_int) > 0, ] %>% mapview::mapview()
+# nhdplusTools::vpu_boundaries
+
+# flines %>% 
+#   dplyr::filter(order >= 3) %>% 
+# # flines %>% 
+#   sf::st_transform(4326) %>% 
+#   mapview::mapview()
+# # ggplot2::ggplot() +
+# #   ggplot2::geom_sf()
+#   
+# 
+# flines %>% 
+#   sf::st_transform(4326) %>% 
+#   sf::st_bbox() %>% 
+#   sf::st_as_sfc() %>% 
+#   sf::st_as_sf() %>% 
+#   mapview::mapview()
+
+# ---------------------------------------------------------------------
+# --- Add bankful width estimate
+# ---------------------------------------------------------------------
+
+# calculate bankfull width
+flines <-
+  flines %>%
+  sf::st_transform(26904) %>% 
+  dplyr::mutate(
+    bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+  ) %>%
+  dplyr::select(
+    hy_id = id,
+    lengthkm,
+    tot_drainage_areasqkm,
+    bf_width,
+    mainstem,
+    geometry = geom
+  )
+
+# ---------------------------------------------------------------------
+# --- Create transect lines
+# ---------------------------------------------------------------------
+system.time({
+  
+# create transect lines
+transects <- hydrofabric3D::cut_cross_sections(
+  net               = flines,                        # flowlines network
+  id                = "hy_id",                       # Unique feature ID
+  cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+  # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+  num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+  smooth            = TRUE,                          # smooth lines
+  densify           = 3,                             # densify linestring points
+  rm_self_intersect = TRUE,                          # remove self intersecting transects
+  fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+  #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+  # terminal_id       = NULL,
+  # braid_threshold   = NULL,
+  # version           = 2,
+  # braid_method      = "comid",
+  # precision         = 1,
+  add               = TRUE                           # whether to add back the original data
+)
+
+})
+
+# add cs_source column and rename cs_widths to cs_lengthm
+transects <- 
+  transects %>%
+  dplyr::mutate(
+    cs_source = CS_SOURCE
+  )
+
+# ---------------------------------------------------------------------
+# --- Extract cross section points from DEM 
+# ---------------------------------------------------------------------
+# dem <- terra::rast(NEW_DOMAIN_DEM_PATH)
+
+system.time({
+
+# get cross section point elevations
+cs_pts <- hydrofabric3D::cross_section_pts(
+  
+  cs             = transects,
+  crosswalk_id   = "hy_id",
+  points_per_cs  = NULL,
+  min_pts_per_cs = 10,
+  dem            = NEW_DOMAIN_DEM_PATH
+)
+
+})
+
+
+# ---------------------------------------------------------------------
+# --- Classify cross section points
+# ---------------------------------------------------------------------
+# STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+
+cs_pts <-
+# cs_pts2 <-
+  cs_pts %>% 
+  # dplyr::group_by(hy_id, cs_id) %>% 
+  # dplyr::filter(!any(is.na(Z))) %>% 
+  # dplyr::ungroup() %>% 
+  hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+  hydrofabric3D::classify_points(
+    crosswalk_id             = "hy_id", 
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )  
+
+# cs_pts3 <-
+#   cs_pts %>% 
+#   # dplyr::group_by(hy_id, cs_id) %>% 
+#   # dplyr::filter(!any(is.na(Z))) %>% 
+#   # dplyr::ungroup() %>% 
+#   hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+#   hydrofabric3D::classify_points2(
+#     crosswalk_id             = "hy_id", 
+#     pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+#   )  
+# 
+# old_plot <- 
+#   cs_pts2 %>% 
+#   dplyr::slice(1:100) %>% 
+#   hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4)+
+#   ggplot2::labs(title = "OLD CLASSES")
+# 
+# new_plot <- 
+#   cs_pts3 %>% 
+#   dplyr::slice(1:100) %>% 
+#   hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4) +
+#   ggplot2::labs(title = "NEW CLASSES")
+# 
+# library(patchwork)
+# old_plot / new_plot
+# })
+
+ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id  
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Improve cross section points based on the bank validity & amount of relief in a cross section
+# ----------------------------------------------------------------------------------------------------------------
+
+# system.time({
+fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  net            = flines,     # original flowline network
+  transects      = transects, # original transect lines
+  crosswalk_id   = "hy_id",
+  points_per_cs  = NULL, 
+  min_pts_per_cs = 10, # number of points per cross sections
+  dem            = NEW_DOMAIN_DEM_PATH, # DEM to extract points from
+  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  fix_ids = FALSE,
+  verbose = TRUE
+)
+# })
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+
+out_transects <- match_transects_to_extended_cs_pts(
+                      transect_lines = transects, 
+                      fixed_cs_pts   = fixed_pts, 
+                      crosswalk_id   = "hy_id"
+                      )
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+# out_transects  <- hydrofabric3D:::renumber_cs_ids(
+#                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+#                                                   crosswalk_id  = "hy_id"
+#                                                   ) %>% 
+#                                                   dplyr::select(-pt_id)
+
+fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+out_transects  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = "hy_id")
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 4: Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    hy_id, 
+    cs_id, 
+    pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  )
+
+# add Z_source column for source of elevation data
+fixed_pts <-
+  fixed_pts %>%
+  dplyr::mutate(
+    Z_source = CS_SOURCE
+  ) %>%
+  dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                  class, point_type, 
+                  bottom, left_bank, right_bank, valid_banks, has_relief)
+
+ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+message("Aligning banks and smoothing bottoms...")
+fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts)
+
+ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+message("Reclassifying cross section points...")
+fixed_pts <- hydrofabric3D::classify_points(
+  cs_pts                    = fixed_pts, 
+  crosswalk_id              = "hy_id",
+  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+} else {
+  message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+}
+
+if(all(ids_before_align %in% ids_after_align)) {
+  message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+} else {
+  message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+}
+
+if(all(ids_after_align %in% ids_after_reclassify)) {
+  message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+} else {
+  message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+}
+
+sf::write_sf(
+  out_transects,
+  paste0(NEW_DOMAIN_TRANSECTS_DIR, "/hi_transects.gpkg"), 
+)
+
+arrow::write_parquet(
+  fixed_pts,
+  paste0(NEW_DOMAIN_CS_PTS_DIR, "/hi_cs_pts.parquet"), 
+)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 4: Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# get the counts of each point type to add this data to the transects dataset
+point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts)
+
+# # check the number of cross sections that were extended
+message("Subsetting cross section points generated after extending transects...")
+
+# extract cross section points that have an "is_extended" value of TRUE
+extended_pts <-
+  fixed_pts %>% 
+  dplyr::filter(is_extended) %>% 
+  hydrofabric3D::add_tmp_id()
+
+# extract transects that have a "hy_id" in the "extended_pts" dataset
+update_transects <-
+  transects %>% 
+  hydrofabric3D::add_tmp_id() %>% 
+  dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+
+# if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
+if (nrow(update_transects) > 0) {
+  message("Updating ", nrow(update_transects), " transects")
+  
+  update_transects <-
+    update_transects %>%
+    # apply extend_by_percent function to each transect line: 
+    hydrofabric3D:::extend_by_percent(
+      pct        = EXTENSION_PCT,
+      length_col = "cs_lengthm"
+    )
+  
+  # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+  # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
+  # and then replace with old transects with the "update_transects" 
+  out_transects <- 
+    transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+    dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+    dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+    dplyr::bind_rows( # bring in the new updated extended transects
+      dplyr::mutate(
+        update_transects,
+        is_extended = TRUE
+      )
+    )  
+  
+} else {
+  
+  out_transects <- 
+    transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+    dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+}
+
+# finalize new transects
+out_transects <- 
+  out_transects %>% 
+  dplyr::left_join(
+    point_type_counts,
+    by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::left_join(
+    dplyr::ungroup(
+      dplyr::slice( 
+        
+        dplyr::group_by(
+          dplyr::select(sf::st_drop_geometry(fixed_pts), 
+                        hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), 
+          hy_id, cs_id),
+        1)
+    ),
+    by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
+                # sinuosity, 
+                is_extended, 
+                left_bank_count, right_bank_count, channel_count, bottom_count, 
+                bottom, left_bank, right_bank, valid_banks, has_relief,
+                geom
+  )
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+renumb <- hydrofabric3D:::renumber_cs_ids(fixed_pts, crosswalk_id = "hy_id")
+unumb <- renumb %>% get_unique_tmp_ids()
+nnumb <- fixed_pts %>% get_unique_tmp_ids()
+all(unumb %in% nnumb)
+all(nnumb %in% unumb)
+
+
+length(unumb)
+length(nnumb)
+
+# make a dataframe that has a new_cs_id column that has 
+# the cs_id renumbered to fill in any missing IDs,
+# so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
+# The dataframe below will be used to join the "new_cs_id" with 
+# the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
+renumbered_ids <-
+  fixed_pts %>% 
+  sf::st_drop_geometry() %>% 
+  # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
+  dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
+  dplyr::group_by(hy_id, cs_id) %>% 
+  dplyr::slice(1) %>% 
+  dplyr::ungroup() %>% 
+  hydrofabric3D::add_tmp_id() %>% 
+  dplyr::group_by(hy_id) %>% 
+  dplyr::mutate(
+    new_cs_id = 1:dplyr::n()
+  ) %>% 
+  dplyr::ungroup() %>% 
+  dplyr::select(new_cs_id, tmp_id)
+
+# Renumber the transects to have correct CS IDs
+out_transects2 <- dplyr::left_join(
+  hydrofabric3D::add_tmp_id(out_transects),
+  renumbered_ids,
+  by = "tmp_id"
+) %>% 
+  dplyr::select(-cs_id, -tmp_id) %>% 
+  dplyr::select(hy_id, cs_source, 
+                cs_id = new_cs_id, 
+                cs_measure, cs_lengthm, 
+                # sinuosity,
+                is_extended, 
+                left_bank_count, right_bank_count, channel_count, bottom_count, 
+                bottom, left_bank, right_bank, valid_banks, has_relief,
+                geometry = geom
+  )
+
+# Renumber the cross sections points to have correct CS IDs
+fixed_pts <- 
+  dplyr::left_join(
+      hydrofabric3D::add_tmp_id(fixed_pts),
+      renumbered_ids,
+      by = "tmp_id"
+  ) %>% 
+  dplyr::select(-cs_id, -tmp_id) %>% 
+  dplyr::rename(cs_id = new_cs_id)
+
+renumbered_ids <- 
+  df %>%
+  sf::st_drop_geometry() %>%
+  dplyr::select(
+    # hy_id, 
+    dplyr::any_of(crosswalk_id),
+    cs_id, pt_id, cs_measure
+  ) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  dplyr::slice(1) %>%
+  dplyr::ungroup() %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+  # dplyr::group_by(hy_id) %>%
+  dplyr::mutate(
+    new_cs_id = 1:dplyr::n()
+    # tmp_id = paste0(hy_id, "_", cs_id)
+  ) %>%
+  add_tmp_id(x = get(crosswalk_id)) %>% 
+  dplyr::ungroup() %>%
+  dplyr::select(new_cs_id, tmp_id)
+
+# Join the new cs_ids back with the final output data to replace the old cs_ids
+df <- dplyr::left_join(
+  add_tmp_id(df, x = get(crosswalk_id)),
+  # dplyr::mutate(df,tmp_id = paste0(hy_id, "_", cs_id)),
+  renumbered_ids,
+  by = "tmp_id"
+) %>%
+  dplyr::select(-cs_id, -tmp_id) %>%
+  dplyr::rename("cs_id" = "new_cs_id") %>%
+  dplyr::relocate(dplyr::any_of(crosswalk_id), cs_id)
+# dplyr::relocate(hy_id, cs_id)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Cross section points parquet to S3 ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    hy_id, cs_id, pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
index c4495d7..9d27a61 100644
--- a/runners/cs_runner/utils.R
+++ b/runners/cs_runner/utils.R
@@ -1,3 +1,73 @@
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist)
+
+# Directory tree:
+# base_dir/
+#   └── domain_dirname/
+#     ├── flowlines/
+#     ├── dem/
+#     ├── transects/
+#     ├── cross_sections/
+#     └── cs_pts/
+create_new_domain_dirs <- function(base_dir, domain_dirname) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  
+  create_if_not_exists <- function(dir_path) {
+    if (!dir.exists(dir_path)) {
+      dir.create(dir_path, recursive = TRUE)
+      message("Created directory: '", dir_path, "'\n")
+    }
+  }
+  
+  # create directories
+  create_if_not_exists(domain_dir)
+  create_if_not_exists(flowlines_dir)
+  create_if_not_exists(domain_subset_dir)
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cs_pts_dir)
+}
+
+# get path strings for a domain dir (based of a base dir and domain dirname)
+# NOTE: this does NOT guarentee that these folders exist, 
+# NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
+get_new_domain_paths <- function(base_dir, domain_dirname) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  
+  # named list of file paths
+  return(
+    list(
+      base_dir           = base_dir, 
+      domain_dir         = domain_dir,
+      flowlines_dir      = flowlines_dir,
+      domain_subset_dir  = domain_subset_dir,
+      dem_dir            = dem_dir,
+      transects_dir      = transects_dir,
+      cross_sections_dir = cross_sections_dir,
+      cs_pts_dir         = cs_pts_dir
+    )
+  )
+  
+}
+
 # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
 # make sure they are aligned and in the same order
 # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
@@ -147,6 +217,31 @@ wb_intersects <- function(flowlines, trans, waterbodies) {
   # )
 }
 
+add_intersects_ids <- function(x, y, id_col) {
+  # make sure the crs are tjhe same
+  y <- sf::st_transform(y, sf::st_crs(x))
+  
+  # Perform the intersection
+  intersections <- sf::st_intersects(x, y)
+  
+  # add the intersected values to the first dataframe
+  x[[id_col]] <- unlist(lapply(intersections, function(idx) {
+    if (length(idx) > 0) {
+      paste0(unlist(y[[id_col]][idx]), collapse = ", ")
+    } else {
+      NA
+    }
+  }))
+  
+  return(x)
+}
+
+unnest_ids <- function(ids) {
+  return(
+    unique(unlist(strsplit(unique(ids), ", ")))
+  )
+}
+
 #' Get the polygons that interesect with any of the linestring geometries
 #' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes
 #' @param polygons polygon sf object. Default is NULL
@@ -292,3 +387,137 @@ add_predicate_group_id <- function(polys, predicate) {
   
 }
 
+
+# utility function for getting transects extended and 
+# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
+# returns the extended version of the transects 
+match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
+  # transect_lines = transects
+  # fixed_cs_pts   = fixed_pts
+  # crosswalk_id   = CROSSWALK_ID
+  
+  fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+  transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+  
+  # get the counts of each point type to add this data to the transect_lines dataset
+  point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+                                                            crosswalk_id = crosswalk_id)
+  
+  # Check the number of cross sections that were extended
+  message("Subsetting cross section points generated after extending transect_lines...")
+  
+  # extract cross section points that have an "is_extended" value of TRUE
+  extended_pts <- 
+    fixed_cs_pts %>%
+    dplyr::filter(is_extended) %>%
+    hydrofabric3D::add_tmp_id(x = crosswalk_id)
+  
+  # extended_pts %>% 
+  #   get_unique_tmp_ids() %>% 
+  #   length()
+  
+  # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+  update_transect_lines <- 
+    transect_lines %>%
+    hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+    dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+  
+  cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+  
+  # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+  if (nrow(update_transect_lines) > 0) {
+    message("Updating ", nrow(update_transect_lines), " transect_lines")
+    
+    
+    update_transect_lines <- 
+      update_transect_lines %>% 
+      dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+    
+    update_transect_lines <- 
+      update_transect_lines %>%
+      # apply extend_by_percent function to each transect line:
+      hydrofabric3D:::extend_by_percent(
+        pct        = EXTENSION_PCT,
+        length_col = "cs_lengthm"
+      )
+    
+    update_transect_lines <- 
+      update_transect_lines %>%  
+      dplyr::rename(!!sym(crosswalk_id) := hy_id)
+
+    # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+    # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+    
+    # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+    # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+    # and then replace with old transect_lines with the "update_transect_lines"
+    out_transect_lines <-
+      transect_lines %>%
+      hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+      dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+      dplyr::bind_rows(
+        dplyr::mutate(update_transect_lines, is_extended = TRUE)
+      )
+    
+    # transect_lines %>% 
+    #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+    #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+    #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+    #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+    #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+    #     dplyr::mutate(
+    #       update_transect_lines,
+    #       is_extended = TRUE
+    #     )
+    #   )  
+  } else {
+    # If no transect_lines were extended
+    out_transect_lines <- 
+      transect_lines %>%
+      hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+      dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+      # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+  }
+  
+  # Finalize new transect_lines
+  out_transect_lines <- 
+    out_transect_lines %>%
+    dplyr::left_join(
+      point_type_counts, 
+      by = c(crosswalk_id, "cs_id")
+    ) %>%
+    dplyr::left_join(
+      dplyr::ungroup(
+        dplyr::slice(
+          dplyr::group_by(
+            dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+                          dplyr::any_of(crosswalk_id), 
+                          cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+            ),
+            dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+          ),
+          1
+        )
+      ),
+      by = c(crosswalk_id, "cs_id")
+    ) %>%
+    dplyr::select(
+      dplyr::any_of(crosswalk_id),
+      cs_source, cs_id, cs_measure, cs_lengthm,
+      # sinuosity,
+      is_extended,
+      left_bank_count, right_bank_count, channel_count, bottom_count,
+      bottom, left_bank, right_bank, valid_banks, has_relief,
+      geometry
+    ) %>% 
+    dplyr::mutate(
+      is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+    )  
+  
+  return(out_transect_lines)
+}
+
+
+

From f8ff3d499ec5693c66a0f36bb3fb287d5388e658 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 27 Sep 2024 16:23:34 -0700
Subject: [PATCH 58/64] updated new domain runner to use new ML inputs and
 added a loop for replacing DEM points with diffusive DEM points where needed

---
 runners/cs_runner/03_inject_ml.R     |   5 +-
 runners/cs_runner/config.R           | 163 +++++-
 runners/cs_runner/config_vars.R      |  43 ++
 runners/cs_runner/domain_with_fema.R | 765 ++++++++++++++++++++++++++-
 runners/cs_runner/download_fema100.R |   2 +-
 runners/cs_runner/utils.R            |  53 +-
 6 files changed, 989 insertions(+), 42 deletions(-)

diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index 9b34c03..8c4940b 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -296,8 +296,8 @@ for (i in 1:nrow(path_df)) {
                         # tmp_id %in% random_uids) 
   # hydrofabric3D::classify_points(cs_subset) %>%  hydrofabric3D::plot_cs_pts(color = "point_type")
   
-  starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts)
-  ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs)
+  starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = "hy_id")
+  ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = "hy_id")
 
   has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
   all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
@@ -374,5 +374,6 @@ for (i in 1:nrow(path_df)) {
   
 }
   
+
     
   
\ No newline at end of file
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index 5a435b3..c70320d 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -112,31 +112,174 @@ list_ref_features <- paste0('#!/bin/bash
             echo "$S3_OBJECTS"'
 )
 
+# --------------------------------------------------------------------------
+# ---- Get a list of reference features geopackages ----
+# --------------------------------------------------------------------------
+
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+ref_features <- system(list_ref_features, intern = TRUE)
+
+# ref features datasets
+ref_features_keys  <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features)
+ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features)
+
 # --------------------------------------------------------------------------
 # ---- Create empty file structure for a "new_domain" ----
 # --------------------------------------------------------------------------
 
-create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME)
+create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME, with_output = TRUE)
 
 # --------------------------------------------------------------------------
 # ---- Create empty file structure for a "domain_with_fema" ----
 # --------------------------------------------------------------------------
 
-create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME)
+create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME, with_output = TRUE)
 
 # --------------------------------------------------------------------------
-# ---- Create empty file structure for a "new_conus_domain" ----
+# ---- Get locations of ML parquet files in S3 ---
 # --------------------------------------------------------------------------
 
-create_new_domain_dirs(BASE_DIR, NEW_CONUS_DOMAIN_DIRNAME)
+VPU_ML_BATHYMETRY_URIS <- unlist(
+  lapply(VPU_ML_BATHYMETRY_S3_DIRS, function(s3_dir) {
+    s3_file <- list_s3_objects(s3_dir, ".parquet$", AWS_PROFILE)
+    paste0(s3_dir, s3_file)
+  })
+)
+
+# -------------------------------------------------------------------------------------
+# ---- Download ML parquets for DOMAIN_WITH_FEMA ----
+# -------------------------------------------------------------------------------------
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (s3_uri in VPU_ML_BATHYMETRY_URIS) {
+  # message(s3_uri)
+  # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=18/part-0.parquet"
+  # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=03W/part-0.parquet"
+  # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=21/"
+  
+  is_parquet <- endsWith(s3_uri, ".parquet")
+  vpu_id     <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
+  
+  message("Checking S3 bucket for VPU ", vpu_id, " ML data...")
+  
+  if (is_parquet) {
+    s3_file    <- basename(s3_uri)
+    # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
+    new_file_name <-     paste0(vpu_id, "_ml.parquet")
+    # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file))
+    
+    local_save_path <- paste0(DOMAIN_WITH_FEMA_ML_DIR, "/", new_file_name)
+    
+    if(!file.exists(local_save_path)) {
+      copy_cmd <- paste0('aws s3 cp ', s3_uri,
+                         " ", local_save_path, " --profile ", AWS_PROFILE)
+      
+      message("S3 object:\n > '", s3_uri, "'")
+      message("Downloading S3 object to:\n > '", local_save_path, "'")
+      # message("Copy command:\n > '", copy_cmd, "'")
+      
+      system(copy_cmd)
+      
+      message(" > '", new_file_name, "' download complete!")
+      message("----------------------------------")
+    } else {
+      message("File already exists at:\n > '", local_save_path, "'")
+    }
+    
+  } else {
+    message("No S3 bucket ML data for VPU ", vpu_id, "...")
+  }
+
+}
+
+# VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+# 
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#       vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#       arrow::read_parquet(prq) %>% 
+#         dplyr::mutate(vpu_id = vpu_id) 
+#       }
+#     ) %>% 
+#   dplyr::bind_rows()
 
 # --------------------------------------------------------------------------
-# ---- Get a list of reference features geopackages ----
+# ---- Get locations of diffusive domain DEM files in S3 ----
 # --------------------------------------------------------------------------
 
-# Run the script to get a list of the nextgen geopackages that matched the regular expression above
-ref_features <- system(list_ref_features, intern = TRUE)
+COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, 
+                                       list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)
+                                       )
+
+# -------------------------------------------------------------------------------------
+# ---- Download diffusive domain DEM files from S3 for DOMAIN_WITH_FEMA ----
+# -------------------------------------------------------------------------------------
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (s3_uri in COASTAL_BATHY_DEM_S3_URIS) {
+  message(s3_uri)
+  
+  is_tif <- endsWith(s3_uri, ".tif")
+  # vpu_id     <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
+  
+  message("Checking S3 bucket for DEM data...")
+  
+  if (is_tif) {
+    
+    s3_file    <- basename(s3_uri)
+    # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
+    # new_file_name <-     paste0(vpu_id, "_ml.parquet")
+    # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file))
+    
+    local_save_path <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", s3_file)
+    
+    if (!file.exists(local_save_path)) {
+      copy_cmd <- paste0('aws s3 cp ', s3_uri,
+                         " ", local_save_path, " --profile ", AWS_PROFILE)
+      
+      message("S3 object:\n > '", s3_uri, "'")
+      message("Downloading S3 object to:\n > '", local_save_path, "'")
+      # message("Copy command:\n > '", copy_cmd, "'")
+      
+      system(copy_cmd)
+      
+      message(" > '", s3_file, "' download complete!")
+      message("----------------------------------")
+    } else {
+      message("File already exists at:\n > '", local_save_path, "'")
+    }
+    
+  } else {
+    message("No S3 bucket DEM data... ")
+  }
+  
+}
+
+COASTAL_BATHY_DEM_PATHS <- list.files(DOMAIN_WITH_FEMA_DEM_DIR, full.names = TRUE)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-# ref features datasets
-ref_features_keys  <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features)
-ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features)
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index dab3c25..7b86b59 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -236,7 +236,50 @@ DOMAIN_WITH_FEMA_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/S
 DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME         <- "transects"
 DOMAIN_WITH_FEMA_CS_PTS_DIRNAME            <- "cs_pts"
 DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+DOMAIN_WITH_FEMA_OUTPUT_DIRNAME            <- "outputs"
+DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME       <- "vpu-subsets"
+DOMAIN_WITH_FEMA_ML_DIRNAME                <- "ml"
 
 DOMAIN_WITH_FEMA_TRANSECTS_DIR          <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME)
 DOMAIN_WITH_FEMA_CS_PTS_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME)
 DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME)
+DOMAIN_WITH_FEMA_OUTPUT_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME)
+DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR        <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME)
+DOMAIN_WITH_FEMA_ML_DIR                 <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME)
+
+ML_AUXILIARY_DATA_S3_URI <- paste0(S3_BUCKET_BASE_URI, "bathymetry/ml_auxiliary_data")
+S3_BUCKET_BASE_URI
+
+LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI   <- "s3://lynker-hydrofabric/"
+ML_BATHYMETRY_S3_DATA_DIR               <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/"
+ML_BATHYMETRY_S3_URI                    <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR)
+VPU_ML_BATHYMETRY_S3_DIRS               <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/")
+
+COASTAL_BATHY_DEM_S3_DIR     <- "coastal_bathy/diffusive_domain/"
+COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR)
+# COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+
+
+# # # --------------------------------------------------------------------------
+# # # ---- Get locations of diffusive domain DEM files in S3 ----
+# # # --------------------------------------------------------------------------
+# COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+# 
+# # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, 
+# #                                        list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)
+# #                                        )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R
index b425de5..51ed175 100644
--- a/runners/cs_runner/domain_with_fema.R
+++ b/runners/cs_runner/domain_with_fema.R
@@ -2,17 +2,10 @@
 source("runners/cs_runner/config.R")
 source("runners/cs_runner/utils.R")
 
-# # # # load libraries
+# load libraries
 # library(hydrofabric3D)
 # library(dplyr)
 # library(sf)
-# install.packages("devtools")
-
-# # transect bucket prefix
-# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
-
-# paths to NEW DOMAIN datasets 
-# NEW_DOMAIN_FILES    <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE)
 
 # Unique Flowline ID column name
 CROSSWALK_ID <- "id"
@@ -63,6 +56,10 @@ flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths",
                       # query = query
                       )
 
+# bad_ids <- c("wb-14538", "wb-14686",  "wb-14687")
+# flines <- 
+#   flines %>% 
+#   dplyr::filter(id %in% bad_ids)
 # ---------------------------------------------------------------------
 # --- Split flowlines by VPU
 # ---------------------------------------------------------------------
@@ -73,16 +70,31 @@ VPU_boundaries   <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(fl
 # add a VPU ID column to each flowline
 flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID")
 
+# TODO: improve this, manual remap some VPUIDs so that there are 
+# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected 
+flines <- 
+  flines %>% 
+  dplyr::mutate(
+    VPUID = dplyr::case_when(
+      VPUID == "12"     ~ "11, 12, 13",
+      VPUID == "12, 13" ~ "11, 12, 13",
+      VPUID == "11, 12" ~ "11, 12, 13",
+      VPUID == "01"     ~ "01, 02",
+      TRUE              ~ VPUID
+    )
+  ) 
+
 # set of unique VPUs
 VPU_IDS <- unnest_ids(flines$VPUID)
+VPU_IDS
 
 # all possible FEMA dirs
 AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ]
 
-# flines %>% 
-#   sf::st_drop_geometry() %>% 
-#   dplyr::group_by(VPUID) %>% 
-#   dplyr::count()
+flines %>%
+  sf::st_drop_geometry() %>%
+  dplyr::group_by(VPUID) %>%
+  dplyr::count()
 # unique(flines$VPUID)
 
 # calculate bankfull width
@@ -92,6 +104,7 @@ flines <-
     bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm)
     # bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
   ) %>%
+  # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
   dplyr::select(
     dplyr::any_of(CROSSWALK_ID),
     VPUID,
@@ -115,11 +128,12 @@ fline_groups <- dplyr::group_split(flines, VPUID)
 
 # loop through each VPU group of flowlines and save a local copy
 for (i in seq_along(fline_groups)) {
+  
   flowlines  <- fline_groups[[i]]
   VPU        <- unique(flowlines$VPUID)
   
   # save the flowlines subset 
-  vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg")
+  vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg")
   
   sf::write_sf(
     flowlines, 
@@ -149,7 +163,7 @@ for (i in seq_along(fline_groups)) {
     net               = flowlines,                        # flowlines network
     id                = CROSSWALK_ID,                     # Unique feature ID
     cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
-    # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
     num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
     smooth            = TRUE,                          # smooth lines
     densify           = 3,                             # densify linestring points
@@ -241,7 +255,7 @@ for (i in seq_along(fline_groups)) {
       geometry
     )
   
-  out_path <- paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/",   
+  out_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/",   
                      paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" 
                      )
   
@@ -253,17 +267,18 @@ for (i in seq_along(fline_groups)) {
   message("Finished writting transects!")
   
 }
+
 # ---------------------------------------------------------------------
 # --- Bind together transects into single dataset
 # ---------------------------------------------------------------------
+vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T)
 
-transect_paths <- list.files(DOMAIN_WITH_FEMA_TRANSECTS_DIR, full.names = T)
-flowline_paths <- list.files(DOMAIN_WITH_FEMA_FLOWLINES_DIR, full.names = T)
-flowline_paths <- flowline_paths[!basename(flowline_paths) %in% c("flowlines_subset.gpkg", "ls_conus.gpkg")]
+transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)]
+flowline_paths <- vpu_subset_paths[grepl("_flowlines.gpkg", vpu_subset_paths)]
 
 # read each transect geopackages into a list 
 transects <- lapply(transect_paths, function(gpkg) sf::read_sf(gpkg))
-transects[[5]]
+# transects[[5]]
 
 transects <- 
   transects %>% 
@@ -278,7 +293,7 @@ paths_df <- data.frame(
   )
 
 for (i in 1:nrow(paths_df)) {
-  
+  # i = 2
   VPU     <- paths_df$vpu[i]
   t_path  <- paths_df$t[i]
   f_path  <- paths_df$f[i]
@@ -295,13 +310,14 @@ for (i in 1:nrow(paths_df)) {
   
   message("Reading in flowlines... \n > ", f_path)
   # read in nextgen data
-  flines <- sf::read_sf(f_path)
+  flowlines <- sf::read_sf(f_path)
   
   message("Extracting cross section points...")
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 1: Extract cs points from DEM ----
   # ----------------------------------------------------------------------------------------------------------------
   # system.time({
+  
   # get cross section point elevations
   cs_pts <- hydrofabric3D::cross_section_pts(
     cs             = transects,
@@ -344,8 +360,8 @@ for (i in 1:nrow(paths_df)) {
   # system.time({
     fixed_pts <- hydrofabric3D::get_improved_cs_pts(
       cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
-      net            = flines,    # original flowline network
-      # net            = flines,    # original flowline network
+      net            = flowlines,    # original flowline network
+      # net            = flowlines,    # original flowline network
       transects      = transects, # original transect lines
       crosswalk_id   = CROSSWALK_ID,
       points_per_cs  = NULL, 
@@ -456,16 +472,715 @@ for (i in 1:nrow(paths_df)) {
   
   sf::write_sf(
     out_transects,
-    paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/", VPU, "_transects.gpkg") 
+    paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") 
   )
   
   arrow::write_parquet(
     fixed_pts,
-    paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/", VPU, "_cs_pts.gpkg") 
+    paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") 
   )
   
 }
 
 # TODO: Save all cross section points + transects into single geopackages (transects.gpkg, cs_pts.gpkg)
 
+# ---------------------------------------------------------------------------------
+# ---- Merge all the transects / cs points into single files  
+# ---------------------------------------------------------------------------------
+
+vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T)
+
+# ---------------------------------------------------------------------------------
+# ---- Read all transects and save to single gpkg ----
+# ---------------------------------------------------------------------------------
+
+transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)]
+
+transects <- lapply(transect_paths, function(gpkg) {sf::read_sf(gpkg) }) %>% 
+  dplyr::bind_rows() %>% 
+  hydroloom::rename_geometry("geometry")
+
+TRANSECTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") 
+
+sf::write_sf(
+  transects,
+  TRANSECTS_OUTPUT_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Read all CS PTs and save to single parquet ----
+# ---------------------------------------------------------------------------------
+
+cs_pts_paths   <- vpu_subset_paths[grepl("_cs_pts.parquet", vpu_subset_paths)]
+cs_pts <- lapply(cs_pts_paths, function(prq) arrow::read_parquet(prq)) %>% 
+  dplyr::bind_rows()
+
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+
+arrow::write_parquet(
+  cs_pts,
+  CS_PTS_OUTPUT_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Inject ML predicted top widths / Dingman's R ----
+# ---------------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------------
+# ---- Read in ML data ----
+# ---------------------------------------------------------------------------------
+VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+
+ML_CROSSWALK_ID <- "id"
+
+ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+  vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+  arrow::read_parquet(prq) %>% 
+    dplyr::mutate(vpu_id = vpu_id) 
+}
+) %>% 
+  dplyr::bind_rows() %>% 
+  dplyr::select(
+    dplyr::any_of(ML_CROSSWALK_ID),
+    vpu_id,
+    owp_y_bf, owp_y_inchan, 
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+                )
+
+# rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+
+# Keep only distinct ID rows
+ml_outputs <- 
+  ml_outputs %>% 
+  dplyr::distinct(
+    dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+    vpu_id, 
+    owp_y_bf, owp_y_inchan, 
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+  )
+
+# ---------------------------------------------------------------------------------
+# ---- Read in CS PTS data ----
+# ---------------------------------------------------------------------------------
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+
+cs_pts  <- arrow::read_parquet(CS_PTS_OUTPUT_PATH)
+
+# ---------------------------------------------------------------------------------
+# ---- Join CS PTS  with ML data ---
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+# ml_outputs %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::count(id) %>% 
+#   dplyr::arrange(-n)
+
+# join the ML outputs data to the cross section points
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    dplyr::select(ml_outputs, 
+                  dplyr::any_of(CROSSWALK_ID),
+                  owp_tw_inchan, 
+                  owp_y_inchan, 
+                  owp_tw_bf, 
+                  owp_y_bf, 
+                  owp_dingman_r
+                  ),
+    by = CROSSWALK_ID
+  ) 
+
+# ---------------------------------------------------------------------------------
+# ---- Fixing negative depths/widths estimates ----
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+
+cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID)
+
+# TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+# TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    cs_bottom_lengths, 
+    by = c(CROSSWALK_ID, "cs_id")
+    # by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::mutate(
+    owp_tw_inchan = dplyr::case_when(
+      owp_tw_inchan <= 0 ~ bottom_length,
+      TRUE               ~ owp_tw_inchan
+    ),
+    owp_tw_bf = dplyr::case_when(
+      owp_tw_bf <= 0     ~ bottom_length,
+      TRUE               ~ owp_tw_bf
+    )
+  ) %>% 
+  dplyr::select(-bottom_length)
+
+# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+# TODO: look at this stuff with Arash (04/09/2024)
+missing_cs <- 
+  cs_pts %>% 
+  dplyr::filter(
+          is.na(.data[[CROSSWALK_ID]]) | 
+          is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+          is.na(owp_tw_bf) | is.na(owp_y_bf) |
+          is.na(owp_dingman_r)
+        ) %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID)
+
+# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id"
+# readr::write_csv(
+#   dplyr::select(missing_cs, -tmp_id), 
+#   paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+# )
+
+# Split the cross sections into 2 groups:
+# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+# - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+inchannel_cs <-
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(valid_banks & has_relief) %>% 
+  # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+  # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+  # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+  dplyr::rename(        
+    TW        = owp_tw_inchan,    
+    DEPTH     = owp_y_inchan,
+    DINGMAN_R = owp_dingman_r
+  )
+
+bankful_cs   <- 
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(!valid_banks | !has_relief) %>% 
+  dplyr::rename(
+    TW        = owp_tw_bf, 
+    DEPTH     = owp_y_bf,
+    DINGMAN_R = owp_dingman_r
+  )
+
+
+# sanity check that all rows are accounted for after splitting up data
+split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs))
+# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+
+if (!split_kept_all_rows) {
+  warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+                 "\nsome points were not put in either group.", 
+                 "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+                 "values or contain values other than TRUE/FALSE")
+  )
+}
+
+message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+# tmp <- 
+#   inchannel_cs %>% 
+#   dplyr::slice(1:10000)
+system.time({
+  
+# Add bathymetry using "inchannel" estimates
+inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+  cross_section_pts = inchannel_cs,
+  # cross_section_pts = tmp,
+  crosswalk_id      = CROSSWALK_ID
+)
+
+})
+
+# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+# ml_subset %>%
+#   dplyr::filter(hy_id == "wb-1005207") %>%
+#   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+#   .$owp_y_inchan
+message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+system.time({
+  
+# Add bathymetry using "bankful" estimates
+bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+  cross_section_pts = bankful_cs,
+  # cross_section_pts = dplyr::slice(bankful_cs, 1:10000),
+  crosswalk_id      = CROSSWALK_ID
+)
+
+})
+
+# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+final_cs <- dplyr::bind_rows(
+                dplyr::select(
+                  inchannel_cs,
+                  # inchannel_cs2,
+                  # -hf_id, 
+                  -TW, -DEPTH, -DINGMAN_R, 
+                  # -is_dem_point,
+                  -dplyr::starts_with("owp")
+                ),
+                dplyr::select(
+                  bankful_cs,
+                  # bankful_cs2,
+                  # -hf_id, 
+                  -TW, -DEPTH, -DINGMAN_R, 
+                  # -is_dem_point,
+                  -dplyr::starts_with("owp")
+                ),
+                dplyr::select(
+                  dplyr::mutate(
+                    missing_cs,
+                    is_dem_point = FALSE
+                  ),
+                  # -hf_id, 
+                  # -is_dem_point,
+                  -dplyr::starts_with("owp"), 
+                  -tmp_id
+                )
+              ) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  tidyr::fill(
+    c(cs_lengthm, Z_source)
+  ) %>%
+  dplyr::ungroup() %>%
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet")
+# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet")
+
+
+message(round(Sys.time()), " - Reclassifying cross section point types...")
+
+# reclassify
+final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                            crosswalk_id = CROSSWALK_ID,
+                                            pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                                            )
+
+starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID)
+ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID)
+
+has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
+all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
+all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids)
+
+# throw some warnings if:
+# - the number of uids in the input is different from the output
+# - there are missing hy_id/cs_id
+if (!has_same_number_of_uids) {
+  warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                 "starting cross section points from the final cross section points:",
+                 "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                 "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+  ))
+}
+
+if (!all_starting_uids_in_ending_uids) {
+  number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+  
+  # starting_uids %in% ending_uids
+  warning(
+    paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+           "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+    )
+  )
+  
+  # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+  #          "is missing from the output cross section points"))
+  
+}
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---------------------------------------------------------------------------------
+
+CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'")
+
+# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet)
+arrow::write_parquet(
+  dplyr::select(final_cs, 
+                -is_dem_point
+  ), 
+  CROSS_SECTIONS_OUTPUT_PATH
+)
+
+INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet")
+arrow::write_parquet(
+  final_cs,
+  INTERNAL_CROSS_SECTIONS_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Substitue diffusive domain DEMs Z values ---- 
+# ---------------------------------------------------------------------------------
+final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+
+bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) {
+                      r       <- terra::rast(path)
+                      extent  <- terra::ext(r)
+                      # r <- terra::project(r, "EPSG:5070")
+                      # terra::set.crs(r, "EPSG:5070")
+                      ext_df          <- data.frame(lapply(extent, function(i) {i}))
+                      ext_df$crs      <- terra::crs(r)
+                      ext_df$file     <- basename(path)
+                      ext_df$path     <- path 
+                      
+                      return(ext_df)
+              }) %>% 
+  dplyr::bind_rows()
+# final_cs
+
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet")
+# arrow::write_parquet(
+#   final_cs,
+#   INTERNAL_CROSS_SECTIONS_PATH
+# )
+
+# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+# 
+# arrow::write_parquet(
+#   final_cs,
+#   START_PATH
+# )
+
+# rm(i, cs, df, pts_subset, has_pts_in_bb, bb, updated_depths)
+
+for (i in 1:nrow(bb_df)) {
+  
+  # i = 1
+  
+  message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...")
+  
+  df <- bb_df[i, ]
+  
+  START_EPSG_CODE <- 5070
+  
+  cs <- arrow::read_parquet(START_PATH)
+  cs <- 
+    cs %>% 
+    # dplyr::slice(1:10000) %>% 
+    sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE)
+  
+  # convert to bounding box CRS 
+  cs <- 
+    cs %>% 
+    sf::st_transform(df$crs[1])
+  
+  # create bounding box shape
+  bb <- sf::st_as_sf(
+          sf::st_as_sfc(
+            sf::st_bbox(
+                c(xmin =   df$xmin, 
+                  xmax =   df$xmax, 
+                  ymax =   df$ymax, 
+                  ymin =   df$ymin
+                  ), 
+                crs = sf::st_crs(df$crs)
+              )
+            )
+          )
+  
+  # get pts that are in the bounding box
+  pts_subset <- sf::st_filter(
+    cs, 
+    bb
+  )  
+  
+  has_pts_in_bb <- nrow(pts_subset) > 0
+  
+  message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box")
+  
+  if(!has_pts_in_bb) {
+    message("   > No points to update!")
+    next
+  }
+  
+  if(has_pts_in_bb) {
+    
+    message("   > Loading Raster")
+    
+    # load DEM
+    dem <- terra::rast(df$path)   
+    
+    message("   > Extracting new cross section depth values from DEM...")
+    
+    updated_depths <- 
+      pts_subset %>% 
+      dplyr::mutate(
+        Z2 = hydrofabric3D:::extract_pt_val(dem, .),
+        Z_source2 = df$file
+        # Z = extract_pt_val(terra::rast(dem), .)
+        ) %>% 
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id, 
+        # Z,
+        Z2,
+        # Z_source,
+        Z_source2
+        )
+    
+    message("   > Replacing old depth values with new depth values...")
+    cs <- 
+      cs %>% 
+      dplyr::left_join(
+        updated_depths,
+        by = c(CROSSWALK_ID, "cs_id", "pt_id")
+      ) %>% 
+      dplyr::mutate(
+        Z = dplyr::case_when(
+          !is.na(Z2)  ~ Z2,
+          TRUE        ~ Z
+        ),
+        Z_source = dplyr::case_when(
+          !is.na(Z_source2)  ~ Z_source2,
+          TRUE               ~ Z_source
+        )
+      ) %>% 
+      dplyr::select(-Z2, -Z_source2)
+    
+    message("   > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...")
+    cs <- 
+      cs %>% 
+      sf::st_transform(START_EPSG_CODE) 
+    
+    message("   > Dropping point geometries and preserving X / Y coordinates...")
+    
+    cs <- 
+      cs %>% 
+      dplyr::mutate(
+        X = sf::st_coordinates(.)[,1],
+        Y = sf::st_coordinates(.)[,2]
+      ) %>%
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        # hy_id, 
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id,
+        cs_lengthm,
+        relative_distance,
+        X, Y, 
+        Z,
+        class, point_type,
+        Z_source,
+        bottom, left_bank, right_bank, valid_banks,
+        has_relief 
+        
+        # newly added columns (03/06/2024)
+      )
+    
+    message("   > Overwritting original cross section points parquet with updated depth values ...")
+    message("      > '", START_PATH, "'")
+    
+    arrow::write_parquet(cs, START_PATH)
+    
+  }
+  
+  }
+
+cross_sections <- arrow::read_parquet(START_PATH)
+
+cross_sections <-
+  cross_sections %>% 
+  dplyr::select(
+          -point_type,
+          -class,
+          -bottom, -left_bank, -right_bank,
+          -has_relief, -valid_banks
+          )
+
+# reclassify
+# system.time({
+cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, 
+                                           crosswalk_id = CROSSWALK_ID,
+                                           pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                                           )
+# })
+
+# final_cross_sections %>% 
+#   dplyr::filter(id == "wb-1000") %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(color = "point_type")
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---- Diffusive Domain DEM + FEMA + ML 
+# ---------------------------------------------------------------------------------
+
+CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+
+message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'")
+
+# sum(is.na(final_cross_sections$id))
+# sum(is.na(final_cross_sections$cs_id))
+# sum(is.na(final_cross_sections$pt_id))
+# sum(is.na(final_cross_sections$X))
+
+# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet)
+arrow::write_parquet(
+  # dplyr::select(final_cs, 
+  #               -is_dem_point
+  # ), 
+  cross_sections,
+  CROSS_SECTIONS_ML_OUTPUT_PATH
+)
+
+# cross_sections %>% 
+#   dplyr::select(id, Z_source) %>% 
+#   dplyr::group_by(Z_source) %>% 
+#   dplyr::count(Z_source) %>% 
+#   dplyr::arrange(-n)
+
+# CROSS_SECTIONS_ML_OUTPUT_PATH
+# 
+# CROSS_SECTIONS_OUTPUT_PATH
+# 
+# CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+# 
+# CROSS_SECTIONS_PATH      <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+# DEM_CROSS_SECTIONS_PATH  <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross_sections.parquet")
+# 
+# cs <- arrow::read_parquet(CROSS_SECTIONS_PATH) %>% 
+#   dplyr::rename(hy_id = id)
+# 
+# dem_cs <- arrow::read_parquet(DEM_CROSS_SECTIONS_PATH) %>% 
+#   dplyr::rename(hy_id = id)
+# 
+# z_ids <- 
+#   cs %>% 
+#   dplyr::group_by(Z_source) %>% 
+#   dplyr::slice(1:4) %>% 
+#   dplyr::pull(hy_id)
+# 
+# # subset_ids <- z_ids[1:1]
+# # z_ids <- z_ids[1:2]
+# # for (i in seq_along(z_ids)) {
+# for (i in seq_along(z_ids)) {
+#   message(i) 
+#   # i = 1
+#   subset_id <- z_ids[i]
+#   
+#   new_cs <- 
+#     cs %>% 
+#     dplyr::filter(hy_id %in% subset_id)
+#   
+#   old_cs <- 
+#     dem_cs %>% 
+#     dplyr::filter(hy_id %in% subset_id)
+#   
+#   title_str <-  paste0(
+#                 unique(new_cs$Z_source)[1], " vs. ",
+#                 unique(old_cs$Z_source)[1]
+#               )
+#   
+#   save_path = paste0(
+#     "/Users/anguswatters/Desktop/z_source_plots/",
+#                   gsub(".tif", "", unique(new_cs$Z_source)[1]), 
+#                   "_vs_",
+#                   unique(old_cs$Z_source)[1], "_", i, ".png"
+#                   )
+#   
+#   message("making plot at \n > '", save_path, "'")
+#   
+#   comp_plot <- dplyr::bind_rows(
+#                   new_cs,
+#                   old_cs
+#                 ) %>% 
+#                   ggplot2::ggplot() + 
+#                   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                                    y = Z,
+#                                                    color = Z_source
+#                   ),
+#                   size = 3,
+#                   alpha = 0.9
+#                   ) + 
+#                   ggplot2::labs(title = title_str) + 
+#                   ggplot2::facet_wrap(hy_id~cs_id) +
+#                   ggplot2::theme_bw() +
+#                   ggplot2::theme(
+#                     plot.title = ggplot2::element_text(size = 14),
+#                     legend.text  = ggplot2::element_text(size = 14),
+#                     legend.title = ggplot2::element_text(size = 14, face = "bold")
+#                   )
+#   
+#   ggplot2::ggsave(plot = comp_plot,
+#                   filename = save_path,
+#                   scale = 1
+#                   )
+#   
+#   }
+# 
+# 
+# dplyr::bind_rows(
+#   cs %>% 
+#     dplyr::filter(hy_id %in% subset_ids),
+#   dem_cs %>% 
+#     dplyr::filter(hy_id %in% subset_ids) 
+#     
+# ) %>% 
+#   ggplot2::ggplot() + 
+#   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                    y = Z,
+#                                    color = Z_source
+#                                    
+#   )) + 
+#   ggplot2::labs(title = "al_nwfl_ncei_1 vs hydrofabric3D") + 
+#   ggplot2::facet_wrap(hy_id~cs_id)
+# 
+# cs %>% 
+#   dplyr::filter(hy_id %in% subset_ids) %>% 
+#   ggplot2::ggplot() + 
+#   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                    y = Z,
+#                                    color = point_type
+#                                    
+#   )) + 
+#   ggplot2::facet_wrap(hy_id~cs_id)
+# 
+# 
+# dem_cs %>% 
+#   dplyr::filter(hy_id %in% subset_ids) %>% 
+#   ggplot2::ggplot() + 
+#   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                    y = Z,
+#                                    color = point_type
+#                                    
+#                                    )) + 
+#   ggplot2::facet_wrap(hy_id~cs_id)
+# 
+# 
+# 
+# 
+# 
+# 
+
+
+
+
+
+
+
+
+
+
+
+
 
diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
index 9fa2f8a..b23b438 100644
--- a/runners/cs_runner/download_fema100.R
+++ b/runners/cs_runner/download_fema100.R
@@ -105,7 +105,7 @@ for (key in FEMA_BUCKET_KEYS) {
     message("Downloading S3 object to:\n > '", local_save_path, "'")
     # message("Copy command:\n > '", copy_cmd, "'")
     
-    # system(copy_cmd)
+    system(copy_cmd)
     
     message(" > '", key, "' download complete!")
     message("----------------------------------")
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
index 9d27a61..e52352b 100644
--- a/runners/cs_runner/utils.R
+++ b/runners/cs_runner/utils.R
@@ -10,7 +10,7 @@
 #     ├── transects/
 #     ├── cross_sections/
 #     └── cs_pts/
-create_new_domain_dirs <- function(base_dir, domain_dirname) {
+create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) {
   
   # build paths
   domain_dir         <- paste0(base_dir, "/", domain_dirname)
@@ -20,6 +20,11 @@ create_new_domain_dirs <- function(base_dir, domain_dirname) {
   transects_dir      <- paste0(domain_dir, "/transects")
   cross_sections_dir <- paste0(domain_dir, "/cross_sections")
   cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  
+  if(with_output) {
+    output_dir       <- paste0(domain_dir, "/outputs")
+  }
   
   create_if_not_exists <- function(dir_path) {
     if (!dir.exists(dir_path)) {
@@ -36,12 +41,18 @@ create_new_domain_dirs <- function(base_dir, domain_dirname) {
   create_if_not_exists(transects_dir)
   create_if_not_exists(cross_sections_dir)
   create_if_not_exists(cs_pts_dir)
+  create_if_not_exists(vpu_subsets_dir)
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
 }
 
 # get path strings for a domain dir (based of a base dir and domain dirname)
 # NOTE: this does NOT guarentee that these folders exist, 
 # NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
-get_new_domain_paths <- function(base_dir, domain_dirname) {
+get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) {
   
   # build paths
   domain_dir         <- paste0(base_dir, "/", domain_dirname)
@@ -51,6 +62,9 @@ get_new_domain_paths <- function(base_dir, domain_dirname) {
   transects_dir      <- paste0(domain_dir, "/transects")
   cross_sections_dir <- paste0(domain_dir, "/cross_sections")
   cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  output_dir         <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA)
+
   
   # named list of file paths
   return(
@@ -62,12 +76,41 @@ get_new_domain_paths <- function(base_dir, domain_dirname) {
       dem_dir            = dem_dir,
       transects_dir      = transects_dir,
       cross_sections_dir = cross_sections_dir,
-      cs_pts_dir         = cs_pts_dir
+      cs_pts_dir         = cs_pts_dir,
+      vpu_subsets_dir    = vpu_subsets_dir,
+      output_dir         = output_dir
     )
   )
   
 }
 
+list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
+  
+  profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""
+  
+  if (is.null(pattern) || pattern == "") {
+    grep_command <- ""  # no filtering if empty or NULL
+  } else {
+    grep_command <- paste0(" | grep -E \"", pattern, "\"")  # grep if a pattern is given
+  }
+  
+  cmd <- paste0(
+    '#!/bin/bash\n',
+    'S3_BUCKET="', s3_bucket, '"\n',
+    'PATTERN="', pattern, '"\n',
+    'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n',
+    'echo "$S3_OBJECTS"'
+  )
+  # cmd <- paste0(
+  #   '#!/bin/bash\n',
+  #   'S3_BUCKET="', s3_bucket, '"\n',
+  #   'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n',
+  #   'echo "$S3_OBJECTS"'
+  # )
+  ls_output <- system(cmd, intern = TRUE)
+  return(ls_output)
+}
+
 # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
 # make sure they are aligned and in the same order
 # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
@@ -392,6 +435,7 @@ add_predicate_group_id <- function(polys, predicate) {
 # matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
 # returns the extended version of the transects 
 match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
+  
   # transect_lines = transects
   # fixed_cs_pts   = fixed_pts
   # crosswalk_id   = CROSSWALK_ID
@@ -402,7 +446,6 @@ match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, cro
   # get the counts of each point type to add this data to the transect_lines dataset
   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
                                                             crosswalk_id = crosswalk_id)
-  
   # Check the number of cross sections that were extended
   message("Subsetting cross section points generated after extending transect_lines...")
   
@@ -441,6 +484,8 @@ match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, cro
         length_col = "cs_lengthm"
       )
     
+    update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+    
     update_transect_lines <- 
       update_transect_lines %>%  
       dplyr::rename(!!sym(crosswalk_id) := hy_id)

From 3b61de2221ae193665d48748cf73bb71ce7863d5 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 30 Sep 2024 21:38:11 -0700
Subject: [PATCH 59/64] made a temp bathy.R file that ill delete tomorrow that
 was for running cs point generation from terminal

---
 runners/cs_runner/02_cs_pts.R        |   4 +-
 runners/cs_runner/bathy.R            | 337 +++++++++++++++++++++++++++
 runners/cs_runner/config_vars.R      |   3 +-
 runners/cs_runner/domain_with_fema.R |  86 +++++--
 4 files changed, 407 insertions(+), 23 deletions(-)
 create mode 100644 runners/cs_runner/bathy.R

diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 4713c4b..264a2ab 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -200,8 +200,10 @@ for (i in 20:nrow(path_df)) {
     )
   # })
   
+  # transects %>% 
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::group_by(id, cs_id)
   # fixed_pts2$is_extended %>% sum()
-  
   ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
 
   # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more
diff --git a/runners/cs_runner/bathy.R b/runners/cs_runner/bathy.R
new file mode 100644
index 0000000..7a550f9
--- /dev/null
+++ b/runners/cs_runner/bathy.R
@@ -0,0 +1,337 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+
+# Unique Flowline ID column name
+CROSSWALK_ID <- "id"
+
+VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+
+ML_CROSSWALK_ID <- "id"
+
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#   vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#   arrow::read_parquet(prq) %>% 
+#     dplyr::mutate(vpu_id = vpu_id) 
+# }
+# ) %>% 
+#   dplyr::bind_rows() %>% 
+#   dplyr::select(
+#     dplyr::any_of(ML_CROSSWALK_ID),
+#     vpu_id,
+#     owp_y_bf, owp_y_inchan, 
+#     owp_tw_bf, owp_tw_inchan,
+#     owp_dingman_r
+#                 )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+# ml_outputs <- 
+# ml_outputs %>%
+# dplyr::distinct(
+#   dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+#   vpu_id,
+#   owp_y_bf, owp_y_inchan,
+#   owp_tw_bf, owp_tw_inchan,
+#   owp_dingman_r
+# )
+
+# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+# rm(ml_outputs, ml)
+ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml")
+
+ml_outputs <- 
+  ml_outputs %>% 
+  dplyr::select(
+    dplyr::any_of(ML_CROSSWALK_ID),
+    vpuid,
+    owp_y_bf = YCC, 
+    owp_y_inchan = Y,
+    owp_tw_bf = TopWdthCC, 
+    owp_tw_inchan = TopWdth,
+    owp_dingman_r = dingman_r
+  )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+ml_outputs <-
+  ml_outputs %>%
+  dplyr::distinct(
+    dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+    vpu_id,
+    owp_y_bf, owp_y_inchan,
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+  )
+
+# ---------------------------------------------------------------------------------
+# ---- Read in CS PTS data ----
+# ---------------------------------------------------------------------------------
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+
+cs_pts  <- arrow::read_parquet(CS_PTS_OUTPUT_PATH)
+
+# ---------------------------------------------------------------------------------
+# ---- Join CS PTS  with ML data ---
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+# ml_outputs %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::count(id) %>% 
+#   dplyr::arrange(-n)
+
+# join the ML outputs data to the cross section points
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    dplyr::select(ml_outputs, 
+                  dplyr::any_of(CROSSWALK_ID),
+                  owp_tw_inchan, 
+                  owp_y_inchan, 
+                  owp_tw_bf, 
+                  owp_y_bf, 
+                  owp_dingman_r
+    ),
+    by = CROSSWALK_ID
+  ) 
+
+# ---------------------------------------------------------------------------------
+# ---- Fixing negative depths/widths estimates ----
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+
+cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID)
+
+# TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+# TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    cs_bottom_lengths, 
+    by = c(CROSSWALK_ID, "cs_id")
+    # by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::mutate(
+    owp_tw_inchan = dplyr::case_when(
+      owp_tw_inchan <= 0 ~ bottom_length,
+      TRUE               ~ owp_tw_inchan
+    ),
+    owp_tw_bf = dplyr::case_when(
+      owp_tw_bf <= 0     ~ bottom_length,
+      TRUE               ~ owp_tw_bf
+    )
+  ) %>% 
+  dplyr::select(-bottom_length)
+
+# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+# TODO: look at this stuff with Arash (04/09/2024)
+missing_cs <- 
+  cs_pts %>% 
+  dplyr::filter(
+    is.na(.data[[CROSSWALK_ID]]) | 
+      is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+      is.na(owp_tw_bf) | is.na(owp_y_bf) |
+      is.na(owp_dingman_r)
+  ) %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID)
+
+# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id"
+# readr::write_csv(
+#   dplyr::select(missing_cs, -tmp_id), 
+#   paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+# )
+
+# Split the cross sections into 2 groups:
+# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+# - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+inchannel_cs <-
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(valid_banks & has_relief) %>% 
+  # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+  # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+  # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+  dplyr::rename(        
+    TW        = owp_tw_inchan,    
+    DEPTH     = owp_y_inchan,
+    DINGMAN_R = owp_dingman_r
+  ) 
+  # dplyr::slice(1:1000)
+
+bankful_cs   <- 
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(!valid_banks | !has_relief) %>% 
+  dplyr::rename(
+    TW        = owp_tw_bf, 
+    DEPTH     = owp_y_bf,
+    DINGMAN_R = owp_dingman_r
+  ) 
+  # dplyr::slice(1:1000)
+
+# # Replace any topwidth values that are GREATER than the actual cross section length (meters)
+# bankful_cs2 <- hydrofabric3D:::fix_oversized_topwidths(
+#   cross_section_pts = bankful_cs2,
+#   crosswalk_id = CROSSWALK_ID
+# )
+# bankful_cs %>% 
+#   dplyr::filter(TW >= cs_lengthm) %>% 
+#   dplyr::select(TW)
+
+# sanity check that all rows are accounted for after splitting up data
+split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs))
+# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+
+if (!split_kept_all_rows) {
+  warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+                 "\nsome points were not put in either group.", 
+                 "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+                 "values or contain values other than TRUE/FALSE")
+  )
+}
+
+message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+# tmp <- 
+#   inchannel_cs %>% 
+#   dplyr::slice(1:10000)
+# system.time({
+  
+  # Add bathymetry using "inchannel" estimates
+  inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = inchannel_cs,
+    # cross_section_pts = tmp,
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+# })
+
+  gc()
+
+
+# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+# ml_subset %>%
+#   dplyr::filter(hy_id == "wb-1005207") %>%
+#   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+#   .$owp_y_inchan
+message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+# system.time({
+  
+  # Add bathymetry using "bankful" estimates
+  bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = bankful_cs,
+    # cross_section_pts = dplyr::slice(bankful_cs, 1:10000),
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+# })
+
+# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+final_cs <- dplyr::bind_rows(
+  dplyr::select(
+    inchannel_cs,
+    # inchannel_cs2,
+    # -hf_id, 
+    -TW, -DEPTH, -DINGMAN_R, 
+    # -is_dem_point,
+    -dplyr::starts_with("owp")
+  ),
+  dplyr::select(
+    bankful_cs,
+    # bankful_cs2,
+    # -hf_id, 
+    -TW, -DEPTH, -DINGMAN_R, 
+    # -is_dem_point,
+    -dplyr::starts_with("owp")
+  ),
+  dplyr::select(
+    dplyr::mutate(
+      missing_cs,
+      is_dem_point = FALSE
+    ),
+    # -hf_id, 
+    # -is_dem_point,
+    -dplyr::starts_with("owp"), 
+    -tmp_id
+  )
+) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  tidyr::fill(
+    c(cs_lengthm, Z_source)
+  ) %>%
+  dplyr::ungroup() %>%
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet")
+# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet")
+
+
+message(round(Sys.time()), " - Reclassifying cross section point types...")
+
+# reclassify
+final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                           crosswalk_id = CROSSWALK_ID,
+                                           pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID)
+ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID)
+
+has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
+all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
+all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids)
+
+# throw some warnings if:
+# - the number of uids in the input is different from the output
+# - there are missing hy_id/cs_id
+if (!has_same_number_of_uids) {
+  warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                 "starting cross section points from the final cross section points:",
+                 "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                 "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+  ))
+}
+
+if (!all_starting_uids_in_ending_uids) {
+  number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+  
+  # starting_uids %in% ending_uids
+  warning(
+    paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+           "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+    )
+  )
+  
+  # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+  #          "is missing from the output cross section points"))
+  
+}
+tmp_path <-   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs.parquet")
+message("saving file", tmp_path)
+
+arrow::write_parquet(
+  final_cs,
+  tmp_path
+)
+
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 7b86b59..d95e534 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -248,8 +248,7 @@ DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR        <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOM
 DOMAIN_WITH_FEMA_ML_DIR                 <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME)
 
 ML_AUXILIARY_DATA_S3_URI <- paste0(S3_BUCKET_BASE_URI, "bathymetry/ml_auxiliary_data")
-S3_BUCKET_BASE_URI
-
+# S3_BUCKET_BASE_URI
 LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI   <- "s3://lynker-hydrofabric/"
 ML_BATHYMETRY_S3_DATA_DIR               <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/"
 ML_BATHYMETRY_S3_URI                    <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR)
diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R
index 51ed175..ec13e6e 100644
--- a/runners/cs_runner/domain_with_fema.R
+++ b/runners/cs_runner/domain_with_fema.R
@@ -533,32 +533,63 @@ VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
 
 ML_CROSSWALK_ID <- "id"
 
-ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
-  vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
-  arrow::read_parquet(prq) %>% 
-    dplyr::mutate(vpu_id = vpu_id) 
-}
-) %>% 
-  dplyr::bind_rows() %>% 
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#   vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#   arrow::read_parquet(prq) %>% 
+#     dplyr::mutate(vpu_id = vpu_id) 
+# }
+# ) %>% 
+#   dplyr::bind_rows() %>% 
+#   dplyr::select(
+#     dplyr::any_of(ML_CROSSWALK_ID),
+#     vpu_id,
+#     owp_y_bf, owp_y_inchan, 
+#     owp_tw_bf, owp_tw_inchan,
+#     owp_dingman_r
+#                 )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+# ml_outputs <- 
+  # ml_outputs %>%
+  # dplyr::distinct(
+  #   dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+  #   vpu_id,
+  #   owp_y_bf, owp_y_inchan,
+  #   owp_tw_bf, owp_tw_inchan,
+  #   owp_dingman_r
+  # )
+
+# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+# rm(ml_outputs, ml)
+ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml")
+
+ml_outputs <- 
+  ml_outputs %>% 
   dplyr::select(
     dplyr::any_of(ML_CROSSWALK_ID),
-    vpu_id,
-    owp_y_bf, owp_y_inchan, 
-    owp_tw_bf, owp_tw_inchan,
-    owp_dingman_r
+    vpuid,
+    owp_y_bf = YCC, 
+    owp_y_inchan = Y,
+    owp_tw_bf = TopWdthCC, 
+    owp_tw_inchan = TopWdth,
+    owp_dingman_r = dingman_r
                 )
-
-# rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
-# TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
 names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
-
-# Keep only distinct ID rows
-ml_outputs <- 
-  ml_outputs %>% 
+# 
+# # Keep only distinct ID rows
+ml_outputs <-
+  ml_outputs %>%
   dplyr::distinct(
     dplyr::across(dplyr::any_of(CROSSWALK_ID)),
-    vpu_id, 
-    owp_y_bf, owp_y_inchan, 
+    vpu_id,
+    owp_y_bf, owp_y_inchan,
     owp_tw_bf, owp_tw_inchan,
     owp_dingman_r
   )
@@ -699,6 +730,8 @@ inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
 
 })
 
+
+
 # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
 # ml_subset %>%
 #   dplyr::filter(hy_id == "wb-1005207") %>%
@@ -803,6 +836,19 @@ if (!all_starting_uids_in_ending_uids) {
   
 }
 
+# tmp_path <-   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# message("saving file", tmp_path)
+# 
+# final_cs <- arrow::read_parquet(tmp_path)
+# 
+# # save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet)
+# arrow::write_parquet(
+#   dplyr::select(final_cs, 
+#                 -is_dem_point
+#   ), 
+#   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# )
+
 # ---------------------------------------------------------------------------------
 # ---- Write final cross section points data ----
 # ---------------------------------------------------------------------------------

From 7c8ad1026901fbfe2eae4c2d7bbe792a1b9b7a8a Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 8 Oct 2024 10:16:24 -0700
Subject: [PATCH 60/64] removed excess code from new_domain.R cs runner file

---
 runners/cs_runner/config_vars.R |   2 +-
 runners/cs_runner/new_domain.R  | 219 +-------------------------------
 2 files changed, 2 insertions(+), 219 deletions(-)

diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index d95e534..1a834a4 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -239,7 +239,7 @@ DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
 DOMAIN_WITH_FEMA_OUTPUT_DIRNAME            <- "outputs"
 DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME       <- "vpu-subsets"
 DOMAIN_WITH_FEMA_ML_DIRNAME                <- "ml"
-
+# aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt
 DOMAIN_WITH_FEMA_TRANSECTS_DIR          <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME)
 DOMAIN_WITH_FEMA_CS_PTS_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME)
 DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME)
diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R
index 5aae38e..4d2f223 100644
--- a/runners/cs_runner/new_domain.R
+++ b/runners/cs_runner/new_domain.R
@@ -290,221 +290,4 @@ arrow::write_parquet(
 
 # ----------------------------------------------------------------------------------------------------------------
 # ----------------------------------------------------------------------------------------------------------------
-# ----------------------------------------------------------------------------------------------------------------
-
-# ----------------------------------------------------------------------------------------------------------------
-# ---- STEP 4: Update transects with extended transects (if exists) ----
-# ----------------------------------------------------------------------------------------------------------------
-
-# get the counts of each point type to add this data to the transects dataset
-point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts)
-
-# # check the number of cross sections that were extended
-message("Subsetting cross section points generated after extending transects...")
-
-# extract cross section points that have an "is_extended" value of TRUE
-extended_pts <-
-  fixed_pts %>% 
-  dplyr::filter(is_extended) %>% 
-  hydrofabric3D::add_tmp_id()
-
-# extract transects that have a "hy_id" in the "extended_pts" dataset
-update_transects <-
-  transects %>% 
-  hydrofabric3D::add_tmp_id() %>% 
-  dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
-
-# if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
-if (nrow(update_transects) > 0) {
-  message("Updating ", nrow(update_transects), " transects")
-  
-  update_transects <-
-    update_transects %>%
-    # apply extend_by_percent function to each transect line: 
-    hydrofabric3D:::extend_by_percent(
-      pct        = EXTENSION_PCT,
-      length_col = "cs_lengthm"
-    )
-  
-  # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
-  # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
-  # and then replace with old transects with the "update_transects" 
-  out_transects <- 
-    transects %>% 
-    hydrofabric3D::add_tmp_id() %>% 
-    # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-    dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
-    dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
-    dplyr::bind_rows( # bring in the new updated extended transects
-      dplyr::mutate(
-        update_transects,
-        is_extended = TRUE
-      )
-    )  
-  
-} else {
-  
-  out_transects <- 
-    transects %>% 
-    hydrofabric3D::add_tmp_id() %>% 
-    dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
-    dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
-}
-
-# finalize new transects
-out_transects <- 
-  out_transects %>% 
-  dplyr::left_join(
-    point_type_counts,
-    by = c("hy_id", "cs_id")
-  ) %>% 
-  dplyr::left_join(
-    dplyr::ungroup(
-      dplyr::slice( 
-        
-        dplyr::group_by(
-          dplyr::select(sf::st_drop_geometry(fixed_pts), 
-                        hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), 
-          hy_id, cs_id),
-        1)
-    ),
-    by = c("hy_id", "cs_id")
-  ) %>% 
-  dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
-                # sinuosity, 
-                is_extended, 
-                left_bank_count, right_bank_count, channel_count, bottom_count, 
-                bottom, left_bank, right_bank, valid_banks, has_relief,
-                geom
-  )
-
-# ----------------------------------------------------------------------------------------------------------------
-# ---- Re enumerate the transects & cross section points "cs_id" ----
-# ----------------------------------------------------------------------------------------------------------------
-renumb <- hydrofabric3D:::renumber_cs_ids(fixed_pts, crosswalk_id = "hy_id")
-unumb <- renumb %>% get_unique_tmp_ids()
-nnumb <- fixed_pts %>% get_unique_tmp_ids()
-all(unumb %in% nnumb)
-all(nnumb %in% unumb)
-
-
-length(unumb)
-length(nnumb)
-
-# make a dataframe that has a new_cs_id column that has 
-# the cs_id renumbered to fill in any missing IDs,
-# so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
-# The dataframe below will be used to join the "new_cs_id" with 
-# the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
-renumbered_ids <-
-  fixed_pts %>% 
-  sf::st_drop_geometry() %>% 
-  # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
-  dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
-  dplyr::group_by(hy_id, cs_id) %>% 
-  dplyr::slice(1) %>% 
-  dplyr::ungroup() %>% 
-  hydrofabric3D::add_tmp_id() %>% 
-  dplyr::group_by(hy_id) %>% 
-  dplyr::mutate(
-    new_cs_id = 1:dplyr::n()
-  ) %>% 
-  dplyr::ungroup() %>% 
-  dplyr::select(new_cs_id, tmp_id)
-
-# Renumber the transects to have correct CS IDs
-out_transects2 <- dplyr::left_join(
-  hydrofabric3D::add_tmp_id(out_transects),
-  renumbered_ids,
-  by = "tmp_id"
-) %>% 
-  dplyr::select(-cs_id, -tmp_id) %>% 
-  dplyr::select(hy_id, cs_source, 
-                cs_id = new_cs_id, 
-                cs_measure, cs_lengthm, 
-                # sinuosity,
-                is_extended, 
-                left_bank_count, right_bank_count, channel_count, bottom_count, 
-                bottom, left_bank, right_bank, valid_banks, has_relief,
-                geometry = geom
-  )
-
-# Renumber the cross sections points to have correct CS IDs
-fixed_pts <- 
-  dplyr::left_join(
-      hydrofabric3D::add_tmp_id(fixed_pts),
-      renumbered_ids,
-      by = "tmp_id"
-  ) %>% 
-  dplyr::select(-cs_id, -tmp_id) %>% 
-  dplyr::rename(cs_id = new_cs_id)
-
-renumbered_ids <- 
-  df %>%
-  sf::st_drop_geometry() %>%
-  dplyr::select(
-    # hy_id, 
-    dplyr::any_of(crosswalk_id),
-    cs_id, pt_id, cs_measure
-  ) %>%
-  dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
-  # dplyr::group_by(hy_id, cs_id) %>%
-  dplyr::slice(1) %>%
-  dplyr::ungroup() %>%
-  dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
-  # dplyr::group_by(hy_id) %>%
-  dplyr::mutate(
-    new_cs_id = 1:dplyr::n()
-    # tmp_id = paste0(hy_id, "_", cs_id)
-  ) %>%
-  add_tmp_id(x = get(crosswalk_id)) %>% 
-  dplyr::ungroup() %>%
-  dplyr::select(new_cs_id, tmp_id)
-
-# Join the new cs_ids back with the final output data to replace the old cs_ids
-df <- dplyr::left_join(
-  add_tmp_id(df, x = get(crosswalk_id)),
-  # dplyr::mutate(df,tmp_id = paste0(hy_id, "_", cs_id)),
-  renumbered_ids,
-  by = "tmp_id"
-) %>%
-  dplyr::select(-cs_id, -tmp_id) %>%
-  dplyr::rename("cs_id" = "new_cs_id") %>%
-  dplyr::relocate(dplyr::any_of(crosswalk_id), cs_id)
-# dplyr::relocate(hy_id, cs_id)
-
-# ----------------------------------------------------------------------------------------------------------------
-# ---- Cross section points parquet to S3 ----
-# ----------------------------------------------------------------------------------------------------------------
-
-# classify the cross section points
-fixed_pts <-
-  fixed_pts %>% 
-  dplyr::mutate(
-    X = sf::st_coordinates(.)[,1],
-    Y = sf::st_coordinates(.)[,2]
-  ) %>%
-  sf::st_drop_geometry() %>% 
-  dplyr::select(
-    hy_id, cs_id, pt_id,
-    cs_lengthm,
-    relative_distance,
-    X, Y, Z,
-    class, point_type,
-    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
-  )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+# ----------------------------------------------------------------------------------------------------------------
\ No newline at end of file

From 3441b885ff7e9a3fa01ca21d528bb5765ada4e8f Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Wed, 16 Oct 2024 15:54:01 -0700
Subject: [PATCH 61/64] updated inject ml script to use new version of
 hydrofabric3D

---
 runners/cs_runner/03_inject_ml.R | 35 +++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index 8c4940b..ec777ae 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -40,7 +40,7 @@ ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
 for (i in 1:nrow(path_df)) {
-  
+  # i = 8
   start <- round(Sys.time())
   
   # nextgen file and full path
@@ -157,7 +157,7 @@ for (i in 1:nrow(path_df)) {
   
   message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
   
-  cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cs_pts)
+  cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = "hy_id")
   
   # TODO: for now we replace any negative TW values with the length of the bottom of the cross section
   # TODO: This method + the negative model output values both need to be looked into (04/05/2024)
@@ -241,23 +241,31 @@ for (i in 1:nrow(path_df)) {
     )
   }
   message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
-
-  # Add bathymetry using "inchannel" estimates
-  inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
-      cross_section_pts = inchannel_cs
-    )
+  
+  # system.time({
+    
+    # Add bathymetry using "inchannel" estimates
+    inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+        cross_section_pts = inchannel_cs,
+        crosswalk_id = "hy_id"
+      )
+    
+  # })
   # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
   # ml_subset %>%
   #   dplyr::filter(hy_id == "wb-1005207") %>%
   #   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
   #   .$owp_y_inchan
   message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
-
+  
+  # system.time({
   # Add bathymetry using "bankful" estimates
   bankful_cs <- hydrofabric3D::add_cs_bathymetry(
-    cross_section_pts = bankful_cs
+    cross_section_pts = bankful_cs,
+    crosswalk_id = "hy_id"
   )
-    
+  # })
+  
   # combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
   final_cs <- dplyr::bind_rows(
                 dplyr::select(
@@ -287,9 +295,12 @@ for (i in 1:nrow(path_df)) {
   
   message(round(Sys.time()), " - Reclassifying cross section point types...")
   
+  # system.time({
   # reclassify
-  final_cs <- hydrofabric3D::classify_points(final_cs, crosswalk_id = "hy_id")
-
+  final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                             crosswalk_id = "hy_id")
+  # })
+  
   # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids()
   # random_uids <- sample(x=final_uids, size=12)
   # cs_subset <-  dplyr::filter(hydrofabric3D::add_tmp_id(final_cs), 

From 22cabf362dffec94d2a948ebf02ad61bcb759219 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Fri, 15 Nov 2024 14:54:50 -0800
Subject: [PATCH 62/64] slowly moving all cs runner files and code over to a
 new folder for cleanliness, going to delete old folder eventually when done
 moving stuff over/making changes

---
 runners/cs_runner/01_transects.R              |   25 +-
 runners/cs_runner/02_cs_pts.R                 |   10 +-
 runners/cs_runner/03_inject_ml.R              |    4 +-
 runners/cs_runner/config.R                    |    6 +-
 runners/cs_runner/config_vars.R               |   51 +-
 runners/cs_runner/domain_with_fema.R          |  363 ++-
 runners/cs_runner/domain_with_fema2.R         | 2245 +++++++++++++++
 runners/cs_runner/download_fema100.R          |  123 -
 runners/cs_runner/new_domain.R                |    2 +-
 runners/cs_runner/test_cs_pts.R               |  256 ++
 runners/cs_runner/test_fix_cs_pts.R           |  278 ++
 runners/cs_runner/utils.R                     | 2315 +++++++++++++--
 runners/cs_runner2/base_variables.R           |  396 +++
 runners/cs_runner2/config_env.R               |   26 +
 runners/cs_runner2/download_conus_nextgen.R   |   28 +
 .../cs_runner2/download_conus_ref_features.R  |   31 +
 runners/cs_runner2/download_dem_from_vrt.R    |   32 +
 runners/cs_runner2/download_fema100.R         |  123 +
 .../partition_fema_by_vpu.R                   |  384 ++-
 runners/cs_runner2/utils.R                    | 2562 +++++++++++++++++
 20 files changed, 8764 insertions(+), 496 deletions(-)
 create mode 100644 runners/cs_runner/domain_with_fema2.R
 delete mode 100644 runners/cs_runner/download_fema100.R
 create mode 100644 runners/cs_runner/test_cs_pts.R
 create mode 100644 runners/cs_runner/test_fix_cs_pts.R
 create mode 100644 runners/cs_runner2/base_variables.R
 create mode 100644 runners/cs_runner2/config_env.R
 create mode 100644 runners/cs_runner2/download_conus_nextgen.R
 create mode 100644 runners/cs_runner2/download_conus_ref_features.R
 create mode 100644 runners/cs_runner2/download_dem_from_vrt.R
 create mode 100644 runners/cs_runner2/download_fema100.R
 rename runners/{cs_runner => cs_runner2}/partition_fema_by_vpu.R (69%)
 create mode 100644 runners/cs_runner2/utils.R

diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 01bc78a..ee609aa 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -9,7 +9,7 @@ source("runners/cs_runner/utils.R")
 # install.packages("devtools")
 
 # # transect bucket prefix
-# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
 NEXTGEN_FILES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
@@ -57,9 +57,10 @@ for(i in 1:nrow(path_df)) {
   # calculate bankfull width
   flines <-
     flines %>%
-    dplyr::mutate(
-      bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
-    ) %>%
+    hydrofabric3D::add_powerlaw_bankful_width(
+      total_drainage_area_sqkm_col = "tot_drainage_areasqkm", 
+      min_bf_width = 50
+      ) %>% 
     dplyr::select(
       hy_id = id,
       lengthkm,
@@ -68,6 +69,17 @@ for(i in 1:nrow(path_df)) {
       mainstem,
       geometry = geom
     )
+    # dplyr::mutate(
+    #   bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+    # ) %>%
+    # dplyr::select(
+    #   hy_id = id,
+    #   lengthkm,
+    #   tot_drainage_areasqkm,
+    #   bf_width,
+    #   mainstem,
+    #   geometry = geom
+    # )
 
   # flines$bf_width <- ifelse(is.na(flines$bf_width),  exp(0.700    + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width)
 
@@ -76,8 +88,9 @@ for(i in 1:nrow(path_df)) {
   # create transect lines
   transects <- hydrofabric3D::cut_cross_sections(
     net               = flines,                        # flowlines network
-    id                = "hy_id",                       # Unique feature ID
-    cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+    crosswalk_id      = "hy_id",                       # Unique feature ID
+    cs_widths         = flines$bf_width,     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
     # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
     num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
     smooth            = TRUE,                          # smooth lines
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index 264a2ab..1614e10 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -113,7 +113,7 @@ for (i in 20:nrow(path_df)) {
     crosswalk_id   = "hy_id",
     points_per_cs  = NULL,
     min_pts_per_cs = 10,
-    dem            = DEM_URL
+    dem            = DEM_PATH
   )
 
   # })
@@ -175,7 +175,7 @@ for (i in 20:nrow(path_df)) {
   #   transects      = transects, # original transect lines
   #   points_per_cs  = NULL, 
   #   min_pts_per_cs = 10, # number of points per cross sections
-  #   dem            = DEM_URL, # DEM to extract points from
+  #   dem            = DEM_PATH, # DEM to extract points from
   #   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
   #   pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
   #   fix_ids = FALSE,
@@ -192,7 +192,7 @@ for (i in 20:nrow(path_df)) {
       crosswalk_id   = "hy_id",
       points_per_cs  = NULL, 
       min_pts_per_cs = 10, # number of points per cross sections
-      dem            = DEM_URL, # DEM to extract points from
+      dem            = DEM_PATH, # DEM to extract points from
       scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
       pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
       fix_ids = FALSE,
@@ -214,7 +214,7 @@ for (i in 20:nrow(path_df)) {
   #   transects      = transects, # original transect lines
   #   points_per_cs  = NULL, 
   #   min_pts_per_cs = 10, # number of points per cross sections
-  #   dem            = DEM_URL, # DEM to extract points from
+  #   dem            = DEM_PATH, # DEM to extract points from
   #   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
   #   pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
   #   fix_ids = FALSE,
@@ -407,7 +407,7 @@ for (i in 20:nrow(path_df)) {
   ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
   
   message("Aligning banks and smoothing bottoms...")
-  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts)
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
   # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
   
   ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
index ec777ae..f29efc7 100644
--- a/runners/cs_runner/03_inject_ml.R
+++ b/runners/cs_runner/03_inject_ml.R
@@ -11,8 +11,8 @@ source("runners/cs_runner/config.R")
 source("runners/cs_runner/utils.R")
 
 # cross section bucket prefix
-S3_CS_ML_PTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/")
-# S3_CS_PTS_DIR <- paste0(S3_BUCKET_URI, "v20/3D/dem-cross-sections/")
+S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/")
+# S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, "v20/3D/dem-cross-sections/")
 
 ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE)
 
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index c70320d..b70ef37 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -21,16 +21,16 @@ sf::sf_use_s2(FALSE)
 # # -------------------------------------------------------------------------------------
 
 # # AWS S3 bucket URI 
-# S3_BUCKET_URI <- "s3://lynker-spatial/"
+# LYNKER_SPATIAL_HF_S3_URI <- "s3://lynker-spatial/"
 
 # # name of bucket with nextgen data
-# S3_BUCKET_NAME <- "lynker-spatial"
+# LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial"
 
 # # the name of the folder in the S3 bucket with the nextgen data
 # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
 
 # # full URI to the S3 bucket folder with the nextgen data 
-# S3_BUCKET_NEXTGEN_DIR_URI  <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR)
+# S3_BUCKET_NEXTGEN_DIR_URI  <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR)
 
 # # reference features S3 bucket prefix
 # S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/"
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 1a834a4..bee5b45 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -1,4 +1,4 @@
-### EDIT BASE_DIR, AWS_PROFILE, and DEM_URL ###
+### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ###
 
 # ---------------------------------------------------------------------------------
 # ---- General paths and constants variables ----
@@ -7,7 +7,6 @@
 # - AWS_PROFILE: AWS profile to run CLI commands
 # - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for
 # ---------------------------------------------------------------------------------
-
 # Base directory for local file storage
 BASE_DIR           <- '/Users/anguswatters/Desktop/lynker-spatial'
 
@@ -18,16 +17,16 @@ AWS_PROFILE        <- "angus-lynker"
 VERSION            <- "v20.1"
 
 # string to fill in "CS_SOURCE" column in output datasets
-CS_SOURCE <- "hydrofabric3D"
+CS_SOURCE          <- "hydrofabric3D"
 
 # name of bucket with nextgen data
-S3_BUCKET_NAME     <- "lynker-spatial"
-S3_BUCKET_SUBDIR   <- "hydrofabric"
+LYNKER_SPATIAL_S3_BUCKET_NAME     <- "lynker-spatial"
+LYNKER_SPATIAL_HF_S3_PREFIX   <- "hydrofabric"
 
 # AWS S3 bucket URI 
-S3_BUCKET_BASE_URI <- paste0("s3://", S3_BUCKET_NAME, "/")
-S3_BUCKET_URI      <- paste0(S3_BUCKET_BASE_URI, S3_BUCKET_SUBDIR, "/")
-# S3_BUCKET_URI      <- "s3://lynker-spatial/"
+LYNKER_SPATIAL_BASE_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/")
+LYNKER_SPATIAL_HF_S3_URI      <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/")
+# LYNKER_SPATIAL_HF_S3_URI      <- "s3://lynker-spatial/"
 
 # -------------------------------------------------------------------------------------
 # ---- S3 output directories -----
@@ -37,13 +36,13 @@ S3_BUCKET_URI      <- paste0(S3_BUCKET_BASE_URI, S3_BUCKET_SUBDIR, "/")
 # -------------------------------------------------------------------------------------
 
 # transect bucket prefix
-S3_TRANSECTS_DIR   <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+S3_TRANSECTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
 
 # cross section bucket prefix
-S3_CS_PTS_DIR      <- paste0(S3_BUCKET_URI, VERSION, "/3D/dem-cross-sections/")
+S3_CS_PTS_DIR      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/")
 
 # cross section bucket prefix
-S3_CS_ML_PTS_DIR   <- paste0(S3_BUCKET_URI, VERSION, "/3D/cross-sections/")
+S3_CS_ML_PTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/")
 
 # -------------------------------------------------------------------------------------
 # ---- S3 nextgen data paths / directories -----
@@ -54,10 +53,10 @@ S3_BUCKET_NEXTGEN_DIR       <- paste0(VERSION, "/gpkg/")
 # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
 
 # full URI to the S3 bucket folder with the nextgen data 
-S3_BUCKET_NEXTGEN_DIR_URI   <- paste0(S3_BUCKET_URI, S3_BUCKET_NEXTGEN_DIR)
+S3_BUCKET_NEXTGEN_DIR_URI   <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR)
 
 # reference features S3 bucket prefix
-S3_BUCKET_REF_FEATURES_URI  <- paste0("s3://", S3_BUCKET_NAME, "/00_reference_features/gpkg/")
+S3_BUCKET_REF_FEATURES_URI  <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/")
 # S3_BUCKET_REF_FEATURES_URI  <- "s3://lynker-spatial/00_reference_features/gpkg/"
 
 # ----------------------------------------------------------------------------
@@ -69,16 +68,18 @@ ML_OUTPUTS_S3_FILE   <- "channel_ml_outputs.parquet"
 # ML_OUTPUTS_S3_DIR    <- paste0(VERSION, "/3D/ml-outputs/")
 # ML_OUTPUTS_S3_DIR  <- "v20.1/3D/ml-outputs/"
 
-ML_OUTPUTS_S3_URI      <- paste0(S3_BUCKET_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE)
-# ML_OUTPUTS_S3_URI    <- paste0(S3_BUCKET_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE)
+ML_OUTPUTS_S3_URI      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# ML_OUTPUTS_S3_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE)
 
 ML_OUTPUTS_PATH      <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE)
 
 # path to the remote CONUS net parquet file
 CONUS_NETWORK_FILE   <- "conus_net.parquet"
-CONUS_NETWORK_URI    <- paste0(S3_BUCKET_URI, VERSION, "/", CONUS_NETWORK_FILE)
+CONUS_NETWORK_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE)
 
 # ----------------------------------------------------------------------------
+
+
 # -------------------------------------------------------------------------------------
 # ---- Local directory / path variables ----
 # -------------------------------------------------------------------------------------
@@ -104,14 +105,18 @@ REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/")
 # make a directory for the ML outputs data
 ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
 
+DEM_DIR      <- paste0(BASE_DIR, "/dem")
+DEM_VRT_DIR  <- paste0(DEM_DIR, "/vrt")
+DEM_TIF_DIR  <- paste0(DEM_DIR, "/tif")
+
 # -------------------------------------------------------------------------------------
 # ---- Create local directory / path variables (FEMA data) ----
 # -------------------------------------------------------------------------------------
 
 # location of FEMA 100 year flood plain FGB files
 FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
-FEMA_S3_BUCKET_PREFIX  <- "FEMA100/"
-FEMA_S3_DIR            <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX)
+LYNKER_HF_FEMA_S3_PREFIX  <- "FEMA100/"
+LYNKER_HF_FEMA_S3_URI            <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX)
 
 # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
 FEMA_FGB_PATH        <- paste0(BASE_DIR, "/FEMA100")
@@ -132,6 +137,10 @@ FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
 #                           )
 #                         )
 
+CS_EXTENSION_POLYGONS_DIR <- paste0(BASE_DIR, "/cs-extension-polygons")
+CONUS_FEMA_GPKG_PATH      <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg')
+
+# paste0(BASE_DIR/ 
 # -------------------------------------------------------------------------------------
 # ---- OVERWRITE_FEMA_FILES constant logicals----
 # ---- > if TRUE, processing steps will be run again 
@@ -148,7 +157,7 @@ DELETE_STAGING_GPKGS  <- TRUE # remove intermediary files from the main output f
 # ----------------------------------------------------------------------------
 
 # DEM URL
-DEM_URL        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
 
 # scale argument for cross_section_pts() function. 
 # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
@@ -247,8 +256,8 @@ DOMAIN_WITH_FEMA_OUTPUT_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOM
 DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR        <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME)
 DOMAIN_WITH_FEMA_ML_DIR                 <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME)
 
-ML_AUXILIARY_DATA_S3_URI <- paste0(S3_BUCKET_BASE_URI, "bathymetry/ml_auxiliary_data")
-# S3_BUCKET_BASE_URI
+ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data")
+# LYNKER_SPATIAL_BASE_S3_URI
 LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI   <- "s3://lynker-hydrofabric/"
 ML_BATHYMETRY_S3_DATA_DIR               <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/"
 ML_BATHYMETRY_S3_URI                    <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR)
diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R
index ec13e6e..f673ffb 100644
--- a/runners/cs_runner/domain_with_fema.R
+++ b/runners/cs_runner/domain_with_fema.R
@@ -83,9 +83,13 @@ flines <-
       TRUE              ~ VPUID
     )
   ) 
+# rm(flines2)
+# unnest_ids(flines2$VPUID2)
 
 # set of unique VPUs
+
 VPU_IDS <- unnest_ids(flines$VPUID)
+# VPU_IDS <- unnest_ids(flines2$VPUID2)
 VPU_IDS
 
 # all possible FEMA dirs
@@ -98,13 +102,9 @@ flines %>%
 # unique(flines$VPUID)
 
 # calculate bankfull width
-flines <-
-  flines %>%
-  dplyr::mutate(
-    bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm)
-    # bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
-  ) %>%
-  # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
+flines <- 
+  flines %>% 
+  hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
   dplyr::select(
     dplyr::any_of(CROSSWALK_ID),
     VPUID,
@@ -116,8 +116,309 @@ flines <-
   ) %>% 
   hydroloom::rename_geometry("geometry")
 
+transects <- hydrofabric3D::cut_cross_sections(
+  net               = flines,                        # flowlines network
+  crosswalk_id      = CROSSWALK_ID,                     # Unique feature ID
+  cs_widths         = flines$bf_width,
+  # cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+  # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+  num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+  smooth            = TRUE,                          # smooth lines
+  densify           = 3,                             # densify linestring points
+  rm_self_intersect = TRUE,                          # remove self intersecting transects
+  fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+  #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+  # terminal_id       = NULL,
+  # braid_threshold   = NULL,
+  # version           = 2,
+  # braid_method      = "comid",
+  # precision         = 1,
+  add               = TRUE                           # whether to add back the original data
+)
+
+sf::write_sf(
+  transects,
+  "/Users/anguswatters/Desktop/tmp.gpkg"
+)
+
+`# flines <-
+#   flines %>%
+#   dplyr::mutate(
+#     bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm)
+#     # bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+#   ) %>%
+#   # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
+#   dplyr::select(
+#     dplyr::any_of(CROSSWALK_ID),
+#     VPUID,
+#     # hy_id = id,
+#     lengthkm,
+#     tot_drainage_areasqkm,
+#     bf_width,
+#     mainstem
+#   ) %>% 
+#   hydroloom::rename_geometry("geometry")
+
+
+transects <- 
+  transects %>% 
+  dplyr::left_join(
+    flines %>% 
+      dplyr::select(id, VPUID, tot_drainage_areasqkm) %>% 
+      sf::st_drop_geometry(),
+    by = "id"
+  )
+
+# read each FEMA geopackage into a list 
+fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+transects$VPUID %>% unique()
+
+GROUP_VPU_IDS <- unnest_ids(transects$VPUID)
+
+# all FEMA dirs for the current area
+GROUP_FEMA_DIRS  <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ]
+GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))]
+
+fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+
+fema <- 
+  fema %>% 
+  dplyr::bind_rows() %>% 
+  dplyr::mutate(
+    fema_id = 1:dplyr::n()
+  ) 
+
+message("Simplifying FEMA polygons...")
+message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+
+# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+# keep 1% of the original points for speed
+fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+
+message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+
+transects <- 
+  transects  %>%
+  dplyr::left_join(
+    dplyr::select(sf::st_drop_geometry(flines),
+                  dplyr::any_of(CROSSWALK_ID),
+                  mainstem
+    ),
+    by = CROSSWALK_ID
+  )
+
+# TODO: make sure this 3000m extension distance is appropriate across VPUs 
+# TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+ext_transects <- hydrofabric3D::extend_transects_to_polygons(
+  transect_lines         = transects, 
+  polygons               = fema, 
+  flowlines              = flines, 
+  crosswalk_id           = CROSSWALK_ID,
+  grouping_id            = "mainstem", 
+  max_extension_distance = 3000 
+)
+
+sf::write_sf(
+  ext_transects,
+  "/Users/anguswatters/Desktop/tmp_ext.gpkg"
+)
+
+ext_transects <- 
+  ext_transects %>% 
+  dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, lengthm, sinuosity, geometry)
+
+# get cross section point elevations
+cs_pts <- hydrofabric3D::cross_section_pts(
+  cs             = ext_transects,
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL,
+  min_pts_per_cs = 10,
+  dem            = DEM_PATH
+)
+
+sf::write_sf(
+  cs_pts,
+  "/Users/anguswatters/Desktop/tmp_cs_pts.gpkg"
+)
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points  ----
+# ----------------------------------------------------------------------------------------------------------------
+# cs_pts2 %>% 
+#   dplyr::slice(1:200) %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type")
+
+cs_pts <- 
+  # cs_pts2 <- 
+  cs_pts %>% 
+  hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% 
+  hydrofabric3D::classify_points(
+    crosswalk_id             = CROSSWALK_ID, 
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )  
+
+# })
+
+ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+
+# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# system.time({
+fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  net            = flines,    # original flowline network
+  # net            = flowlines,    # original flowline network
+  transects      = ext_transects, # original transect lines
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL, 
+  min_pts_per_cs = 10, # number of points per cross sections
+  dem            = DEM_PATH, # DEM to extract points from
+  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  fix_ids = FALSE,
+  verbose = TRUE
+)
+# })
+
+ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+ext_transects <- 
+  ext_transects %>% 
+  dplyr::mutate(cs_source = CS_SOURCE)
+
+out_transects <- match_transects_to_extended_cs_pts(
+  transect_lines = ext_transects, 
+  fixed_cs_pts   = fixed_pts, 
+  crosswalk_id   = CROSSWALK_ID,
+  extension_pct  = EXTENSION_PCT
+)
+
+sf::write_sf(
+  out_transects,
+  "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+)
+
+trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id")
+ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id")
+fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id")
+
+all(trans_uids %in% ext_trans_uids)
+all(ext_trans_uids %in% trans_uids)
+
+all(trans_uids %in% fixed_pts_uids)
+all(fixed_pts_uids %in% trans_uids)
+
+all(ext_trans_uids %in% fixed_pts_uids)
+all(fixed_pts_uids %in% ext_trans_uids)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+# out_transects  <- hydrofabric3D:::renumber_cs_ids(
+#                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+#                                                   crosswalk_id  = "hy_id"
+#                                                   ) %>% 
+#                                                   dplyr::select(-pt_id)
+
+fixed_pts2      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+out_transects2  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+sf::write_sf(
+  out_transects2,
+  "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg"
+)
+object.size(out_transects2)
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, 
+    pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  )
+
+# add Z_source column for source of elevation data
+fixed_pts <-
+  fixed_pts %>%
+  dplyr::mutate(
+    Z_source = CS_SOURCE
+  ) %>%
+  dplyr::relocate(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+    class, point_type, 
+    bottom, left_bank, right_bank, valid_banks, has_relief)
+
+ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Aligning banks and smoothing bottoms...")
+fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID)
+
+ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Reclassifying cross section points...")
+
+fixed_pts <- hydrofabric3D::classify_points(
+  cs_pts                    = fixed_pts, 
+  crosswalk_id              = CROSSWALK_ID,
+  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+} else {
+  message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+}
+
+if(all(ids_before_align %in% ids_after_align)) {
+  message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+} else {
+  message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+}
+
+if(all(ids_after_align %in% ids_after_reclassify)) {
+  message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+} else {
+  message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+}
+
+sf::write_sf(
+  out_transects,
+  paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") 
+)
+
+arrow::write_parquet(
+  fixed_pts,
+  paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") 
+)
 # save the flowlines subset 
 DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg")
+
 sf::write_sf(
   flines, 
   DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH
@@ -142,7 +443,8 @@ for (i in seq_along(fline_groups)) {
 }
 
 for (i in seq_along(fline_groups)) {
-
+  
+  # i = 3
   flowlines  <- fline_groups[[i]]
   VPU        <-  unique(flowlines$VPUID)
   
@@ -158,11 +460,14 @@ for (i in seq_along(fline_groups)) {
   # GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)
   # GROUP_FEMA_FILES <- GROUP_FEMA_FILES[grepl("_output.gpkg", GROUP_FEMA_FILES)]
   
+  # ((flowlines$bf_width) / 11)[1] * 11
+  
   # create transect lines
   transects <- hydrofabric3D::cut_cross_sections(
     net               = flowlines,                        # flowlines network
-    id                = CROSSWALK_ID,                     # Unique feature ID
-    cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+    crosswalk_id      = CROSSWALK_ID,                     # Unique feature ID
+    cs_widths         = flowlines$bf_width,
+    # cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
     # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
     num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
     smooth            = TRUE,                          # smooth lines
@@ -229,7 +534,7 @@ for (i in seq_along(fline_groups)) {
   
   # TODO: make sure this 3000m extension distance is appropriate across VPUs 
   # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
-  transects <- hydrofabric3D::extend_transects_to_polygons(
+  ext_transects <- hydrofabric3D::extend_transects_to_polygons(
     transect_lines         = transects, 
     polygons               = fema, 
     flowlines              = flowlines, 
@@ -241,8 +546,10 @@ for (i in seq_along(fline_groups)) {
   # mapview::mapview(transects, color = "green") + 
    # mapview::mapview(transects2, color = "red")
   
-  transects <- 
-    transects %>% 
+  # transects <- 
+  #   transects %>% 
+  ext_transects <- 
+    ext_transects %>% 
     hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
     dplyr::mutate(is_extended = FALSE) %>%
     dplyr::select(
@@ -259,10 +566,28 @@ for (i in seq_along(fline_groups)) {
                      paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" 
                      )
   
+  # ext_transects %>%
+  #   dplyr::filter(id == "wb-2414904") %>%
+  #   .$geometry %>% 
+  #   mapview::mapview()
+  
+  # ext_transects %>%
+  #   dplyr::slice(90000:100000) %>% 
+  #   .$geometry %>% 
+  #   mapview::mapview()
+  
+  # wb-2414904
+  
+  # length_data <- 
+  #   ext_transects %>% 
+  #   dplyr::mutate(
+  #     line_len = as.numeric(sf::st_length(geometry))
+  #   )
+  
   message("Writting transect lines for VPU group: '", VPU, "'",
           "\n > '", out_path, "'")
   
-  sf::write_sf(transects, out_path)
+  sf::write_sf(ext_transects, out_path)
   
   message("Finished writting transects!")
   
@@ -293,6 +618,7 @@ paths_df <- data.frame(
   )
 
 for (i in 1:nrow(paths_df)) {
+  # i =3
   # i = 2
   VPU     <- paths_df$vpu[i]
   t_path  <- paths_df$t[i]
@@ -324,7 +650,7 @@ for (i in 1:nrow(paths_df)) {
     crosswalk_id   = CROSSWALK_ID,
     points_per_cs  = NULL,
     min_pts_per_cs = 10,
-    dem            = DEM_URL
+    dem            = DEM_PATH
   )
   
   # ----------------------------------------------------------------------------------------------------------------
@@ -366,7 +692,7 @@ for (i in 1:nrow(paths_df)) {
       crosswalk_id   = CROSSWALK_ID,
       points_per_cs  = NULL, 
       min_pts_per_cs = 10, # number of points per cross sections
-      dem            = DEM_URL, # DEM to extract points from
+      dem            = DEM_PATH, # DEM to extract points from
       scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
       pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
       fix_ids = FALSE,
@@ -383,7 +709,8 @@ for (i in 1:nrow(paths_df)) {
   out_transects <- match_transects_to_extended_cs_pts(
     transect_lines = transects, 
     fixed_cs_pts   = fixed_pts, 
-    crosswalk_id   = CROSSWALK_ID
+    crosswalk_id   = CROSSWALK_ID,
+    extension_pct = EXTENSION_PCT
   )
   
   # ----------------------------------------------------------------------------------------------------------------
@@ -400,6 +727,8 @@ for (i in 1:nrow(paths_df)) {
   fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
   out_transects  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
   
+  # out_transects %>% 
+    # dplyr::filter(id == "wb-2425607") %>% .$geometry %>% plot()
   # ----------------------------------------------------------------------------------------------------------------
   # ---- STEP 4: Update transects with extended transects (if exists) ----
   # ----------------------------------------------------------------------------------------------------------------
diff --git a/runners/cs_runner/domain_with_fema2.R b/runners/cs_runner/domain_with_fema2.R
new file mode 100644
index 0000000..3d574e0
--- /dev/null
+++ b/runners/cs_runner/domain_with_fema2.R
@@ -0,0 +1,2245 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+
+# Unique Flowline ID column name
+CROSSWALK_ID <- "id"
+
+# ---------------------------------------------------------------------
+# --- Read in flowlines
+# ---------------------------------------------------------------------
+
+# Subsetting area
+aoi     <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides")
+aoi     <- rmapshaper::ms_simplify(aoi, keep = 0.05)
+
+id_col <- "id"
+# id_col <- "divide_id"
+
+# query the conus.gpkg for matching IDs
+ids        <- unique(aoi[[id_col]])
+query_ids  <- paste0(paste0("'", ids, "'"), collapse= ", ")
+
+gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+layer       <- gpkg_layers$name[gpkg_layers$name == "flowpaths"]
+
+wkt <- 
+  aoi %>% 
+  rmapshaper::ms_dissolve() %>% 
+  # rmapshaper::ms_explode() %>% 
+  # sf::st_as_sfc() %>%
+  sf::st_sf() %>%
+  sf::st_geometry() %>%
+  sf::st_as_text()
+
+# read in flowlines based on IDs in AOI
+flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths",
+                      wkt_filter = wkt
+                      # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids),
+                      # query = query
+                      )
+
+# flines <- 
+#   flines %>% 
+#   dplyr::slice(1:1000)
+
+# bad_ids <- c("wb-14538", "wb-14686",  "wb-14687")
+# flines <- 
+#   flines %>% 
+#   dplyr::filter(id %in% bad_ids)
+# ---------------------------------------------------------------------
+# --- Split flowlines by VPU
+# ---------------------------------------------------------------------
+
+# VPUs polygons
+VPU_boundaries   <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines))
+
+# add a VPU ID column to each flowline
+flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID")
+
+# TODO: improve this, manual remap some VPUIDs so that there are 
+# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected 
+flines <- 
+  flines %>% 
+  dplyr::mutate(
+    VPUID = dplyr::case_when(
+      VPUID == "12"     ~ "11, 12, 13",
+      VPUID == "12, 13" ~ "11, 12, 13",
+      VPUID == "11, 12" ~ "11, 12, 13",
+      VPUID == "01"     ~ "01, 02",
+      TRUE              ~ VPUID
+    )
+  ) 
+
+# rm(flines2)
+# unnest_ids(flines2$VPUID2)
+
+# set of unique VPUs
+
+VPU_IDS <- unnest_ids(flines$VPUID)
+# VPU_IDS <- unnest_ids(flines2$VPUID2)
+VPU_IDS
+
+# all possible FEMA dirs
+AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ]
+
+flines %>%
+  sf::st_drop_geometry() %>%
+  dplyr::group_by(VPUID) %>%
+  dplyr::count()
+
+flines %>%
+  sf::st_drop_geometry() %>%
+  dplyr::group_by(order) %>%
+  dplyr::count()
+# unique(flines$VPUID)
+
+# calculate bankfull width
+flines <- 
+  flines %>% 
+  hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    VPUID,
+    # hy_id = id,
+    lengthkm,
+    tot_drainage_areasqkm,
+    bf_width,
+    order,
+    mainstem
+  ) %>% 
+  hydroloom::rename_geometry("geometry")
+
+
+sf::write_sf(
+  flines,
+  paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg")
+)
+
+transects <- hydrofabric3D::cut_cross_sections(
+  net               = flines,                        # flowlines network
+  crosswalk_id      = CROSSWALK_ID,                     # Unique feature ID
+  cs_widths         = flines$bf_width,
+  # cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+  # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+  num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+  smooth            = TRUE,                          # smooth lines
+  densify           = 3,                             # densify linestring points
+  rm_self_intersect = TRUE,                          # remove self intersecting transects
+  fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+  #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+  # terminal_id       = NULL,
+  # braid_threshold   = NULL,
+  # version           = 2,
+  # braid_method      = "comid",
+  # precision         = 1,
+  add               = TRUE                           # whether to add back the original data
+)
+
+transects <- 
+  transects %>% 
+  dplyr::mutate(cs_source = CS_SOURCE) %>% 
+  dplyr::select(id,
+                cs_source,
+                cs_id,
+                cs_measure,
+                ds_distance,
+                cs_lengthm,
+                sinuosity,
+                geometry
+  )
+
+# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre_extension_transects.gpkg")
+
+sf::write_sf(
+  transects,
+  paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg")
+)
+
+# [1] "wb-2416492" "wb-2414869" "wb-2415765" "wb-2416657" "wb-2415479" "wb-2420307" "wb-2425952" "wb-2421269" "wb-2425750"
+# [10] "wb-2422028" "wb-2419212" "wb-2426092" "wb-2414971" "wb-2416120" "wb-2414960" "wb-2433259" "wb-14381"   "wb-18022"  
+# [19] "wb-15173"   "wb-15160"   "wb-571"     "wb-2413739" "wb-508015"  "wb-507770"  "wb-2424752" "wb-2424545" "wb-2422849"
+# [28] "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714" "wb-2419650" "wb-2419650" "wb-2416492" "wb-2414869" "wb-2415765"
+# [37] "wb-2416657" "wb-2415479" "wb-2420307" "wb-2419694" "wb-2419694" "wb-2419974" "wb-2419974" "wb-2420065" "wb-2420065"
+# [46] "wb-2425952" "wb-2420678" "wb-2420678" "wb-2421269" "wb-2419695" "wb-2419695" "wb-2425750" "wb-2422028" "wb-2419212"
+# [55] "wb-2426091" "wb-2414971" "wb-2407999" "wb-2407999" "wb-2426131" "wb-2426131" "wb-2416120" "wb-2433525" "wb-2433525"
+# [64] "wb-2433259" "wb-14381"   "wb-14576"   "wb-14576"   "wb-18022"   "wb-15173"   "wb-15160"   "wb-11097"   "wb-11097"  
+# [73] "wb-14804"   "wb-14804"   "wb-571"     "wb-2408123" "wb-2408123" "wb-2413739" "wb-508015"  "wb-507770"  "wb-2424752"
+# [82] "wb-2435791" "wb-2435791" "wb-2435798" "wb-2435798" "wb-2435779" "wb-2435779" "wb-2435781" "wb-2435781" "wb-2424545"
+# [91] "wb-2422849" "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714"
+
+# transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg"))
+
+# out_transects
+# out_transects2
+
+transects <- 
+  transects %>% 
+  dplyr::left_join(
+    flines %>% 
+      dplyr::select(id, VPUID, mainstem, tot_drainage_areasqkm) %>% 
+      sf::st_drop_geometry(),
+    by = "id"
+  )
+
+# sf::write_sf(
+#   transects,
+#   paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg")
+# )
+
+GROUP_VPU_IDS <- unnest_ids(transects$VPUID)
+
+# all FEMA dirs for the current area
+GROUP_FEMA_DIRS  <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ]
+GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))]
+
+
+# read each FEMA geopackage into a list 
+fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+# transects$VPUID %>% unique()
+
+fema <- 
+  fema %>% 
+  dplyr::bind_rows() %>% 
+  dplyr::mutate(
+    fema_id = 1:dplyr::n()
+  ) 
+
+message("Simplifying FEMA polygons...")
+message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+
+# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+# keep 1% of the original points for speed
+fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+
+# sf::write_sf(
+#   fema,
+#   paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg")
+# )
+
+message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+
+# transects <- 
+#   transects  %>%
+#   dplyr::left_join(
+#     dplyr::select(sf::st_drop_geometry(flines),
+#                   dplyr::any_of(CROSSWALK_ID),
+#                   mainstem
+#     ),
+#     by = CROSSWALK_ID
+#   )
+
+# test_ids <- c("wb-2416492", "wb-2414869", "wb-2415765" , "wb-2416657" , "wb-2415479",
+#               "wb-2420307" , "wb-2425952" , "wb-2421269" , "wb-2425750",
+#               "wb-14804",   "wb-14804",   "wb-571",     "wb-2408123", "wb-2408123"
+#               )
+# test_trans <- 
+#   transects %>% 
+#   dplyr::filter(id %in% test_ids)
+# 
+# test_flines <- 
+#   flines %>% 
+#   dplyr::filter(id %in% test_ids)
+
+# test_fema
+# test_trans$VPUID
+
+# TODO: make sure this 3000m extension distance is appropriate across VPUs 
+# TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+# transects <- hydrofabric3D::extend_transects_to_polygons(
+ext_transects <- hydrofabric3D::extend_transects_to_polygons(
+  transect_lines         = transects,
+  polygons               = fema,
+  flowlines              = flines,
+  # transect_lines         = test_trans, 
+  # polygons               = fema, 
+  # flowlines              = test_flines,
+  crosswalk_id           = CROSSWALK_ID,
+  grouping_id            = "mainstem", 
+  max_extension_distance = 3000 
+)
+
+# sf::write_sf(
+#   flines,
+#   paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg")
+# )
+
+# flines <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg"))
+# transects <- sf::read_sf(paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg"))
+# fema <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg"))
+# 
+# transect_lines <- transects
+# polygons <- fema
+# flowlines <- flines
+# bad_id   <- "wb-10813"
+# mainstem <- "1977479"
+# crosswalk_id           = "id"
+# grouping_id            = "mainstem"
+# max_extension_distance = 3000 
+
+
+# hydrofabric3D:::rm_self_intersections(ext_transects)
+# transects <- 
+#   transects %>% 
+ext_transects <-
+  ext_transects %>%
+  dplyr::mutate(cs_source = CS_SOURCE) %>% 
+  dplyr::select(id,
+                cs_source,
+                cs_id,
+                cs_measure,
+                ds_distance,
+                cs_lengthm,
+                sinuosity,
+                geometry
+                )
+
+# ext_transects <-
+#   ext_transects %>% 
+#   dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, sinuosity, 
+#                 # tot_drainage_areasqkm, 
+#                 geometry)
+
+sf::write_sf(
+  # transects,
+  ext_transects,
+  paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg")
+)
+
+# get cross section point elevations
+cs_pts <- hydrofabric3D::cross_section_pts(
+  # cs             = transects,
+  cs             = ext_transects,
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL,
+  min_pts_per_cs = 10,
+  dem            = DEM_PATH
+)
+
+# mapview::mapview(cs_pts) + test_trans
+
+sf::write_sf(
+  cs_pts,
+  paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/raw-cs-pts.gpkg")
+)
+
+# DOMAIN_WITH_FEMA_CS_PTS_DIR
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points  ----
+# ----------------------------------------------------------------------------------------------------------------
+# cs_pts2 %>% 
+#   dplyr::slice(1:200) %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type")
+
+cs_pts <- 
+  # cs_pts2 <- 
+  cs_pts %>% 
+  hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% 
+  hydrofabric3D::classify_points(
+    crosswalk_id             = CROSSWALK_ID, 
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )  
+
+# cs_pts %>% 
+#   dplyr::slice(150:250) %>% 
+#   hydrofabric3D::plot_cs_pts("id", color = "point_type", size = 4)
+
+# })
+
+ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+
+# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+# ----------------------------------------------------------------------------------------------------------------
+
+system.time({
+fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  net            = flines,    # original flowline network
+  # net = test_flines,
+  # net            = flowlines,    # original flowline network
+  # transects      = transects, # original transect lines
+  transects      = ext_transects, # original transect lines
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL, 
+  min_pts_per_cs = 10, # number of points per cross sections
+  dem            = DEM_PATH, # DEM to extract points from
+  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  fix_ids = FALSE,
+  verbose = TRUE
+)
+})
+
+# sf::write_sf(
+#   test_flines,
+#   "/Users/anguswatters/Desktop/test_flines.gpkg"
+# )
+# 
+# sf::write_sf(
+#   test_trans,
+#   "/Users/anguswatters/Desktop/test_start_trans.gpkg"
+# )
+# 
+# sf::write_sf(
+#   ext_transects,
+#   "/Users/anguswatters/Desktop/test_ext_trans.gpkg"
+# )
+# 
+# sf::write_sf(
+#   cs_pts,
+#   "/Users/anguswatters/Desktop/test_cs_pts.gpkg"
+# )
+
+# mapview::mapview(fixed_pts) +
+#   mapview::mapview(ext_transects, color = "green") +
+#   mapview::mapview(out_transects, color = "red")
+
+fixed_pts$Z %>% is.null() %>% any()
+fixed_pts$Z %>% is.na() %>% any()
+
+ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id  
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+# ext_transects <- 
+#   ext_transects %>% 
+#   dplyr::mutate(cs_source = CS_SOURCE)
+
+# end_pts <- 
+#   fixed_pts %>% 
+#   dplyr::filter(id == "wb-2435034", cs_id == 1) %>% 
+#   # hydrofabric3D::plot_cs_pts("id")
+#   dplyr::slice(
+#     which.min(pt_id),
+#     which.max(pt_id)
+#   )
+# mapview::mapview(end_pts) + tt
+# tt <- 
+#   ext_transects %>% 
+#   dplyr::filter(id == "wb-2435034", cs_id == 1)
+# hydrofabric3D:::match_transects_to_extended_cs_pts()
+
+out_transects <- hydrofabric3D:::match_transects_to_extended_cs_pts(
+  # transect_lines = transects, 
+  transect_lines = ext_transects,
+  fixed_cs_pts   = fixed_pts, 
+  crosswalk_id   = CROSSWALK_ID
+  # extension_pct  = EXTENSION_PCT
+)
+
+# sf::write_sf(
+#   out_transects,
+#   paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+#   # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+# )
+
+# fixed_pts2      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+# out_transects2  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+
+fixed_pts <- 
+  fixed_pts %>% 
+# fixed_pts2 <- 
+  # fixed_pts %>% 
+  hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% 
+  dplyr::group_by(id) %>% 
+  dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+  dplyr::ungroup() %>% 
+  dplyr::group_by(id, cs_id) %>% 
+  dplyr::arrange(pt_id, .by_group = TRUE) %>% 
+  dplyr::ungroup()
+
+
+out_transects <- 
+  out_transects %>% 
+# out_transects2 <- 
+#   out_transects %>% 
+  hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% 
+  dplyr::group_by(id) %>% 
+  dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+  dplyr::ungroup() %>% 
+  dplyr::select(
+    -left_bank_count,
+    -right_bank_count, 
+    -channel_count,
+    -bottom_count,
+    -bottom,
+    -left_bank,
+    -right_bank,
+    -valid_banks,
+    -has_relief,
+    -is_extended
+  ) 
+
+sf::write_sf(
+  out_transects,
+  # out_transects2,
+  paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+  # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+)
+
+# sf::write_sf(
+#   fixed_pts2,
+#   paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg")
+# )
+
+# transects %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+#   dplyr::ungroup()
+# 
+# out_transects2 %>% 
+#   # dplyr::filter(id %in% c("wb-2435034", "wb-2435045")) %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+#   dplyr::ungroup()
+
+# sf::write_sf(
+#   out_transects2,
+#   paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+#   # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+# )
+# 
+# sf::write_sf(
+#   fixed_pts2,
+#   paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg")
+# )
+
+# trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id")
+# ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id")
+# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id")
+
+# all(trans_uids %in% ext_trans_uids)
+# all(ext_trans_uids %in% trans_uids)
+# all(trans_uids %in% fixed_pts_uids)
+# all(fixed_pts_uids %in% trans_uids)
+# all(ext_trans_uids %in% fixed_pts_uids)
+# all(fixed_pts_uids %in% ext_trans_uids)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+# out_transects  <- hydrofabric3D:::renumber_cs_ids(
+#                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+#                                                   crosswalk_id  = "hy_id"
+#                                                   ) %>% 
+#                                                   dplyr::select(-pt_id)
+# length(out_transects_uids)
+# length(fixed_pts_uids)
+# 
+# out_transects_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id")
+# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id")
+# 
+# length(out_transects_uids) == length(fixed_pts_uids) & all(out_transects_uids %in% fixed_pts_uids) & all(fixed_pts_uids %in% out_transects_uids)
+# 
+# fixed_pts2      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+# out_transects2  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+# 
+# out_transects2_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects2, "id")
+# fixed_pts2_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts2, "id")
+# 
+# length(out_transects2_uids) == length(fixed_pts2_uids) & all(out_transects2_uids %in% fixed_pts2_uids) & all(fixed_pts2_uids %in% out_transects2_uids)
+
+  # paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/extended_transects.gpkg")
+  
+# sf::write_sf(
+#   out_transects2,
+#   # "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg"
+#   paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/final_transects.gpkg")
+# )
+
+# object.size(out_transects2)
+
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+# fixed_pts2 <-
+  # fixed_pts2 %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, 
+    pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  ) %>% 
+  dplyr::mutate(
+    Z_source = CS_SOURCE
+  ) %>%
+  dplyr::relocate(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief
+  )
+
+# # add Z_source column for source of elevation data
+# fixed_pts2 <-
+#   fixed_pts2 %>%
+#   dplyr::mutate(
+#     Z_source = CS_SOURCE
+#   ) %>%
+#   dplyr::relocate(
+#     dplyr::any_of(CROSSWALK_ID),
+#     cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source
+#     # class, point_type, 
+#     # bottom, left_bank, right_bank, valid_banks, has_relief
+#     )
+
+ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Aligning banks and smoothing bottoms...")
+fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID)
+
+ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Reclassifying cross section points...")
+
+fixed_pts <- hydrofabric3D::classify_points(
+  cs_pts                    = fixed_pts, 
+  crosswalk_id              = CROSSWALK_ID,
+  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+} else {
+  message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+}
+
+if(all(ids_before_align %in% ids_after_align)) {
+  message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+} else {
+  message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+}
+
+if(all(ids_after_align %in% ids_after_reclassify)) {
+  message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+} else {
+  message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+}
+
+# sf::write_sf(
+#   out_transects,
+#   paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") 
+# )
+# 
+
+arrow::write_parquet(
+  fixed_pts,
+  paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")
+)
+# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")
+
+
+# ---------------------------------------------------------------------------------
+# ---- Inject ML predicted top widths / Dingman's R ----
+# ---------------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------------
+# ---- Read in ML data ----
+# ---------------------------------------------------------------------------------
+
+VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+
+ML_CROSSWALK_ID <- "id"
+
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#   vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#   arrow::read_parquet(prq) %>% 
+#     dplyr::mutate(vpu_id = vpu_id) 
+# }
+# ) %>% 
+#   dplyr::bind_rows() %>% 
+#   dplyr::select(
+#     dplyr::any_of(ML_CROSSWALK_ID),
+#     vpu_id,
+#     owp_y_bf, owp_y_inchan, 
+#     owp_tw_bf, owp_tw_inchan,
+#     owp_dingman_r
+#                 )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+# ml_outputs <- 
+# ml_outputs %>%
+# dplyr::distinct(
+#   dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+#   vpu_id,
+#   owp_y_bf, owp_y_inchan,
+#   owp_tw_bf, owp_tw_inchan,
+#   owp_dingman_r
+# )
+
+# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+# rm(ml_outputs, ml)
+ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml")
+
+ml_outputs <- 
+  ml_outputs %>% 
+  dplyr::select(
+    dplyr::any_of(ML_CROSSWALK_ID),
+    vpuid,
+    owp_y_bf = YCC, 
+    owp_y_inchan = Y,
+    owp_tw_bf = TopWdthCC, 
+    owp_tw_inchan = TopWdth,
+    owp_dingman_r = dingman_r
+  )
+
+
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+ml_outputs <-
+  ml_outputs %>%
+  dplyr::distinct(
+    dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+    vpu_id,
+    owp_y_bf, owp_y_inchan,
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+  )
+
+# ---------------------------------------------------------------------------------
+# ---- Read in CS PTS data ----
+# ---------------------------------------------------------------------------------
+
+# CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")
+
+cs_pts  <- arrow::read_parquet(CS_PTS_OUTPUT_PATH)
+
+# ---------------------------------------------------------------------------------
+# ---- Join CS PTS  with ML data ---
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+# ml_outputs %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::count(id) %>% 
+#   dplyr::arrange(-n)
+
+# join the ML outputs data to the cross section points
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    dplyr::select(ml_outputs, 
+                  dplyr::any_of(CROSSWALK_ID),
+                  owp_tw_inchan, 
+                  owp_y_inchan, 
+                  owp_tw_bf, 
+                  owp_y_bf, 
+                  owp_dingman_r
+    ),
+    by = CROSSWALK_ID
+  ) 
+
+# ---------------------------------------------------------------------------------
+# ---- Fixing negative depths/widths estimates ----
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+
+cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID)
+
+# TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+# TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    cs_bottom_lengths, 
+    by = c(CROSSWALK_ID, "cs_id")
+    # by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::mutate(
+    owp_tw_inchan = dplyr::case_when(
+      owp_tw_inchan <= 0 ~ bottom_length,
+      TRUE               ~ owp_tw_inchan
+    ),
+    owp_tw_bf = dplyr::case_when(
+      owp_tw_bf <= 0     ~ bottom_length,
+      TRUE               ~ owp_tw_bf
+    )
+  ) %>% 
+  dplyr::select(-bottom_length)
+
+# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+# TODO: look at this stuff with Arash (04/09/2024)
+missing_cs <- 
+  cs_pts %>% 
+  dplyr::filter(
+    is.na(.data[[CROSSWALK_ID]]) | 
+      is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+      is.na(owp_tw_bf) | is.na(owp_y_bf) |
+      is.na(owp_dingman_r)
+  ) %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID)
+
+# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id"
+# readr::write_csv(
+#   dplyr::select(missing_cs, -tmp_id), 
+#   paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+# )
+
+# Split the cross sections into 2 groups:
+# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+# - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+inchannel_cs <-
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(valid_banks & has_relief) %>% 
+  # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+  # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+  # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+  dplyr::rename(        
+    TW        = owp_tw_inchan,    
+    DEPTH     = owp_y_inchan,
+    DINGMAN_R = owp_dingman_r
+  )
+
+bankful_cs   <- 
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(!valid_banks | !has_relief) %>% 
+  dplyr::rename(
+    TW        = owp_tw_bf, 
+    DEPTH     = owp_y_bf,
+    DINGMAN_R = owp_dingman_r
+  )
+
+
+# sanity check that all rows are accounted for after splitting up data
+split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs))
+# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+
+if (!split_kept_all_rows) {
+  warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+                 "\nsome points were not put in either group.", 
+                 "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+                 "values or contain values other than TRUE/FALSE")
+  )
+}
+
+message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+# tmp <- 
+#   inchannel_cs %>% 
+#   dplyr::slice(1:10000)
+system.time({
+  
+  # Add bathymetry using "inchannel" estimates
+  inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = inchannel_cs,
+    # cross_section_pts = tmp,
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+})
+
+
+
+# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+# ml_subset %>%
+#   dplyr::filter(hy_id == "wb-1005207") %>%
+#   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+#   .$owp_y_inchan
+message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+
+system.time({
+  
+  # Add bathymetry using "bankful" estimates
+  bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = bankful_cs,
+    # cross_section_pts = dplyr::slice(bankful_cs, 1:10000),
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+})
+
+# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+final_cs <- dplyr::bind_rows(
+              dplyr::select(
+                inchannel_cs,
+                # inchannel_cs2,
+                # -hf_id, 
+                -TW, -DEPTH, -DINGMAN_R, 
+                # -is_dem_point,
+                -dplyr::starts_with("owp")
+              ),
+              dplyr::select(
+                bankful_cs,
+                # bankful_cs2,
+                # -hf_id, 
+                -TW, -DEPTH, -DINGMAN_R, 
+                # -is_dem_point,
+                -dplyr::starts_with("owp")
+              ),
+              dplyr::select(
+                dplyr::mutate(
+                  missing_cs,
+                  is_dem_point = FALSE
+                ),
+                # -hf_id, 
+                # -is_dem_point,
+                -dplyr::starts_with("owp"), 
+                -tmp_id
+              )
+            ) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  tidyr::fill(
+    c(cs_lengthm, Z_source)
+  ) %>%
+  dplyr::ungroup() %>%
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet")
+# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet")
+
+
+message(round(Sys.time()), " - Reclassifying cross section point types...")
+
+# reclassify
+final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                           crosswalk_id = CROSSWALK_ID,
+                                           pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+starting_uids  <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID)
+ending_uids    <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID)
+
+has_same_number_of_uids           <- length(starting_uids) == length(ending_uids)
+all_starting_uids_in_ending_uids  <- all(starting_uids %in% ending_uids)
+all_ending_uids_in_starting_uids  <- all(ending_uids %in% starting_uids)
+
+# throw some warnings if:
+# - the number of uids in the input is different from the output
+# - there are missing hy_id/cs_id
+if (!has_same_number_of_uids) {
+  warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                 "starting cross section points from the final cross section points:",
+                 "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                 "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+  ))
+}
+
+if (!all_starting_uids_in_ending_uids) {
+  number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+  
+  # starting_uids %in% ending_uids
+  warning(
+    paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+           "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+    )
+  )
+  
+  # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+  #          "is missing from the output cross section points"))
+  
+}
+
+# tmp_path <-   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# message("saving file", tmp_path)
+# 
+# final_cs <- arrow::read_parquet(tmp_path)
+# 
+# # save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet)
+# arrow::write_parquet(
+#   dplyr::select(final_cs,
+#                 -is_dem_point
+#   ),
+#   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# )
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---------------------------------------------------------------------------------
+
+# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet")
+CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'")
+
+# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet)
+arrow::write_parquet(
+  dplyr::select(final_cs, 
+                -is_dem_point
+  ), 
+  CROSS_SECTIONS_OUTPUT_PATH
+)
+
+INTERNAL_CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections-is-dem-point.parquet")
+
+arrow::write_parquet(
+  final_cs,
+  INTERNAL_CROSS_SECTIONS_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Substitue diffusive domain DEMs Z values ---- 
+# ---------------------------------------------------------------------------------
+
+final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+
+bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) {
+  r       <- terra::rast(path)
+  extent  <- terra::ext(r)
+  # r <- terra::project(r, "EPSG:5070")
+  # terra::set.crs(r, "EPSG:5070")
+  ext_df          <- data.frame(lapply(extent, function(i) {i}))
+  ext_df$crs      <- terra::crs(r)
+  ext_df$file     <- basename(path)
+  ext_df$path     <- path 
+  
+  return(ext_df)
+}) %>% 
+  dplyr::bind_rows()
+
+# final_cs
+
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross-sections-is-dem-point.parquet")
+# arrow::write_parquet(
+#   final_cs,
+#   INTERNAL_CROSS_SECTIONS_PATH
+# )
+
+# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet")
+# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+# 
+# arrow::write_parquet(
+#   final_cs,
+#   START_PATH
+# )
+
+# rm(i, count, cs, df, tmp, aoi, new_tmp, pts_subset, dem, has_pts_in_bb, bb, updated_depths)
+
+
+# CROSS_SECTIONS_OUTPUT_PATH
+# START_CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+START_PATH    <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+UPDATED_PATH  <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/coastal-bathy_cross-sections.parquet")
+
+count <- 0
+
+for (i in 1:nrow(bb_df)) {
+  
+  # i = 1
+  
+  is_first_iter <- count == 0
+  count         <- count + 1
+  CURR_PATH     <- ifelse(is_first_iter, START_PATH, UPDATED_PATH)
+  
+  # CURR_PATH
+  
+  message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...")
+  
+  df <- bb_df[i, ]
+  
+  START_EPSG_CODE <- 5070
+  
+  cs <- arrow::read_parquet(CURR_PATH)
+  
+  cs <- 
+    cs %>% 
+    # dplyr::slice(1:10000) %>% 
+    sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE)
+  
+  # convert to bounding box CRS 
+  cs <- 
+    cs %>% 
+    sf::st_transform(df$crs[1])
+  
+  # create bounding box shape
+  bb <- sf::st_as_sf(
+    sf::st_as_sfc(
+      sf::st_bbox(
+        c(xmin =   df$xmin, 
+          xmax =   df$xmax, 
+          ymax =   df$ymax, 
+          ymin =   df$ymin
+        ), 
+        crs = sf::st_crs(df$crs)
+      )
+    )
+  )
+  
+  # get pts that are in the bounding box
+  pts_subset <- sf::st_filter(
+    cs, 
+    bb
+  )  
+  
+  has_pts_in_bb <- nrow(pts_subset) > 0
+  
+  message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box")
+  
+  if(!has_pts_in_bb) {
+    
+    message("   > No points to update!")
+    message("   > Overwritting original cross section points parquet with updated depth values ...")
+    message("      > '", UPDATED_PATH, "'")
+    
+    cs <- 
+      cs %>% 
+      sf::st_transform(START_EPSG_CODE) %>% 
+      dplyr::mutate(
+        X = sf::st_coordinates(.)[,1],
+        Y = sf::st_coordinates(.)[,2]
+      ) %>%
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        # hy_id, 
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id,
+        cs_lengthm,
+        relative_distance,
+        X, Y, 
+        Z,
+        class, point_type,
+        Z_source,
+        bottom, left_bank, right_bank, valid_banks,
+        has_relief 
+      )
+    
+    arrow::write_parquet(cs, UPDATED_PATH)
+    
+    next
+  }
+  
+  if(has_pts_in_bb) {
+    
+    message("   > Loading Raster")
+    
+    # load DEM
+    dem <- terra::rast(df$path)   
+    
+    message("   > Extracting new cross section depth values from DEM...")
+    
+    updated_depths <- 
+      pts_subset %>% 
+      dplyr::mutate(
+        Z2 = hydrofabric3D:::extract_pt_val(dem, .),
+        Z_source2 = df$file
+        # Z = extract_pt_val(terra::rast(dem), .)
+      ) %>% 
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id, 
+        # Z,
+        Z2,
+        # Z_source,
+        Z_source2
+      )
+    
+    message("   > Replacing old depth values with new depth values...")
+    cs <- 
+      cs %>% 
+      dplyr::left_join(
+        updated_depths,
+        by = c(CROSSWALK_ID, "cs_id", "pt_id")
+      ) %>% 
+      dplyr::mutate(
+        Z = dplyr::case_when(
+          !is.na(Z2)  ~ Z2,
+          TRUE        ~ Z
+        ),
+        Z_source = dplyr::case_when(
+          !is.na(Z_source2)  ~ Z_source2,
+          TRUE               ~ Z_source
+        )
+      ) %>% 
+      dplyr::select(-Z2, -Z_source2)
+    
+    message("   > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...")
+    cs <- 
+      cs %>% 
+      sf::st_transform(START_EPSG_CODE) 
+    
+    message("   > Dropping point geometries and preserving X / Y coordinates...")
+    
+    cs <- 
+      cs %>% 
+      dplyr::mutate(
+        X = sf::st_coordinates(.)[,1],
+        Y = sf::st_coordinates(.)[,2]
+      ) %>%
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        # hy_id, 
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id,
+        cs_lengthm,
+        relative_distance,
+        X, Y, 
+        Z,
+        class, point_type,
+        Z_source,
+        bottom, left_bank, right_bank, valid_banks,
+        has_relief 
+        
+        # newly added columns (03/06/2024)
+      )
+    
+    
+    # cs %>% 
+    #   dplyr::slice(1:27) %>% 
+    #   hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4)
+    # hydrofabric3D::classify_points(crosswalk_id = "id")
+    # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4)
+      
+    # cs <- 
+    #   cs %>% 
+    #   # dplyr::slice(1:27) %>% 
+    #   dplyr::select(-class, -point_type, 
+    #                 -valid_banks, -has_relief, 
+    #                 -bottom, -left_bank, -right_bank
+    #                 ) %>% 
+    #   hydrofabric3D::classify_points(crosswalk_id = "id") %>% 
+    #   dplyr::select(
+    #     # hy_id, 
+    #     dplyr::any_of(CROSSWALK_ID),
+    #     cs_id, pt_id,
+    #     Z,
+    #     relative_distance,
+    #     cs_lengthm,
+    #     class,
+    #     point_type,
+    #     X, Y, 
+    #     Z_source,
+    #     bottom, left_bank, right_bank, 
+    #     valid_banks, has_relief 
+    #     # newly added columns (03/06/2024)
+    #   )
+      # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4)
+      
+    # tmp <- final_cs %>% dplyr::filter(id == "wb-507785", cs_id == 4)
+    # new_tmp <- cs %>% dplyr::filter(id == "wb-507785", cs_id == 4)
+    
+    # hydrofabric3D::plot_cs_pts(tmp, crosswalk_id = "id", color = "point_type", size = 4)
+    # hydrofabric3D::plot_cs_pts(new_tmp, crosswalk_id = "id", color = "point_type", size = 4)
+    
+    message("   > Overwritting original cross section points parquet with updated depth values ...")
+    message("      > '", UPDATED_PATH, "'")
+    
+    arrow::write_parquet(cs, UPDATED_PATH)
+    
+  }
+  
+}
+
+cross_sections <- arrow::read_parquet(UPDATED_PATH)
+
+# cross_sections %>% 
+#   dplyr::select(Z_source) %>% 
+#   dplyr::group_by(Z_source) %>% 
+#   dplyr::count()
+
+cross_sections <-
+  cross_sections %>% 
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# reclassify
+# system.time({
+cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, 
+                                                 crosswalk_id = CROSSWALK_ID,
+                                                 pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+cross_sections <- 
+  cross_sections %>% 
+  dplyr::select(
+    # hy_id, 
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, pt_id,
+    Z,
+    relative_distance,
+    cs_lengthm,
+    class,
+    point_type,
+    X, Y, 
+    Z_source,
+    bottom, left_bank, right_bank, 
+    valid_banks, has_relief 
+    # newly added columns (03/06/2024)
+    )
+# })
+
+# START_PATH    <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+# UPDATED_PATH  <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/coastal-bathy_cross-sections.parquet")
+
+arrow::write_parquet(
+  cross_sections, 
+  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/coastal-bathy_cross-sections.parquet")
+  )
+
+# -------------------------------------------------
+# ---- Move final datasets to outputs/ ----
+# -------------------------------------------------
+
+select_cs_pts <- function(cs_pts, crosswalk_id = NULL) {
+  
+  if(is.null(crosswalk_id)) {
+    # crosswalk_id  <- 'hydrofabric_id'
+    stop("Please provide a valid 'crosswalk_id' which uniquely identifies each cross section in 'cs_pts'")
+  }
+  
+  cs_pts <- 
+    cs_pts %>% 
+    dplyr::select(
+    dplyr::any_of(c(
+                    crosswalk_id,
+                    "cs_id",
+                    "pt_id",
+                    "relative_distance",
+                    "cs_lengthm",
+                    "X", 
+                    "Y",
+                    "Z",
+                    "Z_source",
+                    "class", 
+                    "point_type",
+                    "valid_banks",
+                    "has_relief"
+                  )
+                  )
+    )
+  
+  return(cs_pts)
+}
+
+select_transects <- function(transects, crosswalk_id = NULL) {
+  
+  if(is.null(crosswalk_id)) {
+    # crosswalk_id  <- 'hydrofabric_id'
+    stop("Please provide a valid 'crosswalk_id' which uniquely identifies the flowline associated with each transect in 'transects'")
+  }
+  
+  transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  transects <- 
+    transects %>% 
+    dplyr::select(
+      dplyr::any_of(c(
+        crosswalk_id,
+        "cs_source",
+        "cs_id",
+        "cs_measure",
+        "cs_lengthm",
+        "geometry"
+      )
+      )
+    )
+  
+  return(transects)
+}
+
+
+# arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% 
+#   select_cs_pts("id")
+
+# DEM based cross section points parquet
+# - FEMA extended
+# - CS based extensions
+arrow::write_parquet(
+  arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% 
+    select_cs_pts("id"),
+  # arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")),
+  paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet")
+)
+
+# DEM based transect geometry gpkg 
+# - FEMA extended
+# - CS based extensions
+sf::write_sf(
+  sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) %>% 
+    select_transects("id"),
+    # dplyr::rename(geometry = geom),
+    paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")
+)
+
+
+# DEM + ML cross section points parquet 
+# - FEMA extended
+# - CS based extensions
+# - ML based Z value updates
+arrow::write_parquet(
+  arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet")) %>% 
+    select_cs_pts("id"),
+  paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/ml_cross-sections.parquet")
+)
+
+# DEM + COASTAL BATHY cross section points parquet 
+# - FEMA extended
+# - CS based extensions
+# - ML based Z value updates
+# - Coastal bathy DEM Z value updates (where applicable)
+arrow::write_parquet(
+  arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet")) %>% 
+    select_cs_pts("id"),
+  paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/coastal-bathy_cross-sections.parquet")
+)
+
+# -------------------------------------------------
+# ---- Data validation ----
+# -------------------------------------------------
+
+# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+# new_trans_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")
+
+# sf::st_layers(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg"))
+# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))
+
+# og_transects  <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg"))
+# new_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))
+
+# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR
+
+output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T)
+# rm(transects, dropped_trans, cs_pts, flowlines, lengths_check, transect_lengths, rel_dist_check, wrong_lengths, mismatches, duplicate_ids, cs_pt_lengths, cs_pts_ids)
+
+tmp_ids_list <- lapply(output_files, function(i) {
+    is_gpkg    <- endsWith(i, ".gpkg")
+    is_parquet <- endsWith(i, ".parquet")
+    
+    if(is_gpkg) {
+      x <- 
+        i %>% 
+        sf::read_sf() %>% 
+        hydrofabric3D::get_unique_tmp_ids(x = "id")
+      return(x)
+    }
+    
+    if(is_parquet) {
+      x <- 
+        i %>% 
+        arrow::read_parquet() %>% 
+        hydrofabric3D::get_unique_tmp_ids(x = "id")
+      return(x)
+    }
+  return(NULL)
+    
+    
+  })
+
+
+tmp_ids_list
+
+# tmp_ids_list[[1]] %in% tmp_ids_list[[2]]
+
+res <- list()
+idxs <- seq_along(tmp_ids_list)
+
+for (i in seq_along(tmp_ids_list)) {
+  # idxs <- seq_along(tmp_ids_list)
+  # i = 1
+  curr <- tmp_ids_list[[i]]
+  
+  other_idxs <- idxs[idxs != i]
+  
+  in_all_others <- lapply(tmp_ids_list[other_idxs], function(k) { 
+      all(k %in% curr) & all(curr %in% k) 
+    }) %>% 
+    unlist() %>% 
+    all()
+  res[[i]] <- in_all_others
+}
+
+all_ids_are_matching <- 
+  res %>% 
+  unlist() %>% 
+  all()
+
+# validate_transects(transects, "id")
+if (!all_ids_are_matching) {
+  warning("Not all id/cs_id are included in all transects / cross section point datasets")
+} else {
+ 
+  message("All id / cs_id are correctly within all transects / cross section points datasets") 
+}
+
+trans_path   <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")
+transects    <- sf::read_sf(trans_path) %>% 
+                dplyr::rename(geometry = geom)
+
+is_validated_transects           <- validate_transects(transects, "id")
+
+is_flowline_validated_transects  <- validate_transects_against_flowlines(transects, flines, "id")
+
+if (!(is_validated_transects && is_flowline_validated_transects)) {
+  stop(trans_path, "  transects failed validity check:",
+       "\n > is_validated_transects: ", is_validated_transects,
+       "\n > is_flowline_validated_transects: ", is_flowline_validated_transects
+  )
+} else {
+  message("Transects look good to go!")
+}
+
+output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T)
+cs_pts_files <- output_files[grepl(".parquet", output_files)]
+
+for (i in seq_along(cs_pts_files)) {
+  path <- cs_pts_files[i]
+  message(i, " - Checking cross section points validity for > '",   basename(path), "'")
+  
+  cs_pts <-arrow::read_parquet(path)
+  
+  is_validated_cs_pts              <- validate_cs_pts(cs_pts, crosswalk_id = "id")
+  is_transect_validated_cs_pts     <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id")
+  
+  is_valid <- is_validated_cs_pts & is_transect_validated_cs_pts
+  
+  if(!is_valid) {
+    stop(path, " cs_pts failed validity check:",
+         "\n > is_validated_cs_pts: ", is_validated_cs_pts,
+         "\n > is_transect_validated_cs_pts: ", is_transect_validated_cs_pts
+         )
+  } else {
+    message("'",   basename(path), "' is valid and is good to go!")
+  }
+  
+  # message("'",   basename(path), "' is validated")
+
+  }
+
+# og_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg"))
+# post_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg"))
+# dplyr::filter(transects, id == "wb-10813") %>% 
+#   sf::st_buffer(2500) 
+# 
+# bb <- 
+#   dplyr::filter(transects, id == "wb-10813") %>% 
+#   sf::st_buffer(5000) %>% 
+#   sf::st_bbox() %>% 
+#   sf::st_as_sfc() %>% 
+#   sf::st_sf()
+# 
+#   sf::st_filter(transects, bb) 
+#   sf::st_filter(transects, bb) 
+#   sf::st_filter(transects, bb) 
+# 
+#   mapview::mapview(dplyr::filter(flines, id == "wb-10813")) +
+#   mapview::mapview(    sf::st_filter(transects, bb) , color  = "green") + 
+#   mapview::mapview(  sf::st_filter(og_trans, bb) , color = "red") + 
+#     mapview::mapview(  sf::st_filter(post_trans, bb) , color = "gold")
+# 
+# transects %>% 
+#   sf::st_filter(
+#     bb
+#   ) %>% mapview::mapview()
+# 
+# mapview::mapview() + mapview::mapview(dplyr::filter(flines, id == "wb-10813")) +
+#   mapview::mapview(  dplyr::filter(transects, id == "wb-10813"), color  = "green") + 
+#   mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red")
+# 
+# 
+# dplyr::filter(og_trans, id == "wb-10813")
+# dplyr::filter(post_trans, id == "wb-10813")
+# dplyr::filter(transects, id == "wb-10813")
+# 
+# mapview::mapview(dplyr::filter(flines, id == "wb-10813")) +
+# mapview::mapview(  dplyr::filter(transects, id == "wb-10813"), color  = "green") + 
+# mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red")
+# 
+# transects %>% 
+#   dplyr::group_by(id)  %>% 
+#   dplyr::filter(cs_id == max(cs_id)) %>% 
+#   dplyr::arrange(cs_id)
+# 
+# # cs_pts_files <- 
+# cs_pts <-arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet"))
+# 
+# is_validated_transects           <- validate_transects(transects, "id")
+# is_flowline_validated_transects  <- validate_transects_against_flowlines(transects, flines, "id")
+# 
+# is_validated_cs_pts              <- validate_cs_pts(cs_pts, crosswalk_id = "id")
+# is_transect_validated_cs_pts     <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id")
+
+# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))_
+
+validate_transects_self_intersections <- function(transects) {
+  return(
+    nrow(transects) == nrow(hydrofabric3D:::rm_self_intersections(transects))
+  )
+}
+
+validate_transects_cs_id_enumeration <- function(transects, crosswalk_id = NULL) {
+  
+  # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered
+  mismatches <-
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+    dplyr::arrange(cs_measure, .by_group = TRUE) %>% 
+    dplyr::mutate(
+      new_cs_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(cs_id != new_cs_id)
+  
+  # FALSE if there are any transects with different cs_ids to the newly created cs_id 
+  # Otherwise TRUE
+  has_valid_cs_ids <- !(nrow(mismatches) > 0)
+  
+  return(
+    has_valid_cs_ids
+  )
+  
+}
+
+validate_transects_cs_length <- function(transects, crosswalk_id = NULL) {
+  
+  # re calculate transect geometry length and compare to cs_lengthm column
+  wrong_lengths <-
+    transects %>%
+    dplyr::mutate(
+      new_cs_length = as.numeric(sf::st_length(.)) 
+    ) %>% 
+    dplyr::filter(cs_lengthm != new_cs_length)
+  
+  # FALSE if there are any transects with different cs_lengthm than the freshly calculated new_cs_length
+  has_correct_lengths <- !(nrow(wrong_lengths) > 0)
+  
+  return(
+    has_correct_lengths
+  )
+  
+}
+
+validate_transects_unique_ids <- function(transects, crosswalk_id = NULL) {
+    
+  duplicate_ids <- 
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% 
+    dplyr::select(tmp_id) %>% 
+    dplyr::group_by(tmp_id) %>% 
+    dplyr::count() %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(n > 1)
+    
+  # FALSE if there are ANY rows in the duplicate_ids dataframe above 
+  # (i.e. a count of greater than 1 for any tmp_id (<crosswalk_id>_cs_id))
+  has_unique_ids <- !(nrow(duplicate_ids) > 0)
+  
+  return(
+    has_unique_ids
+  )
+  
+}
+
+validate_transects_cs_measure <- function(transects) {
+  
+  min_cs_measure <-
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::pull(cs_measure) %>% 
+    min()
+  
+  max_cs_measure <-
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::pull(cs_measure) %>% 
+    max()
+  
+  # cs_measure should always be:
+  # greater than or equal to 0 AND 
+  # less than or equal to 100 as its a percentage along a flowline
+  has_valid_cs_measure <- min_cs_measure >= 0 & max_cs_measure <= 100
+  
+  return(
+    has_valid_cs_measure
+  )
+  
+}
+
+validate_transects_has_complete_geometries <- function(transects, crosswalk_id = NULL) {
+  
+  has_empty_geoms <- 
+    transects %>% 
+    sf::st_is_empty() %>% 
+    any()
+  
+  return(
+    !has_empty_geoms
+  )
+  
+}
+
+validate_transects_has_crs <- function(transects) {
+  
+  missing_crs <- 
+    transects %>% 
+    sf::st_crs() %>% 
+    is.na()
+    
+  return(
+    !missing_crs
+  )
+  
+}
+
+validate_transects <- function(transects, 
+                               crosswalk_id = NULL
+                               ) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry")
+  
+  # validate dataframe has all correct columns  
+  has_all_valid_cols         <- hydrofabric3D:::validate_df(
+                                          x = transects, 
+                                          cols = REQUIRED_COLS,
+                                          obj_name = "transects"
+                                          )  
+  
+  # Validate every flowline (id) has a cs_id of 1:number of transects
+  has_valid_cs_ids           <- validate_transects_cs_id_enumeration(transects, crosswalk_id = crosswalk_id)
+  
+  # Validate there are no self intersections
+  has_no_self_intersections  <- validate_transects_self_intersections(transects)
+  
+  # validate the cs_lengthm column equals the actual transect geometry length
+  has_correct_lengths        <- validate_transects_cs_length(transects, crosswalk_id = crosswalk_id)
+  
+  # validate no duplicate id / cs_id combos
+  has_unique_ids             <- validate_transects_unique_ids(transects, crosswalk_id = crosswalk_id)
+  
+  # validate cs measure is never greater than 100 (i think)
+  has_valid_cs_measure       <- validate_transects_cs_measure(transects)
+  
+  # make sure transects have no empty geometries 
+  has_complete_geoemetries   <- validate_transects_has_complete_geometries(transects)
+  
+  # make sure transects have a CRS
+  has_crs                    <- validate_transects_has_crs(transects)
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_validated_transects <- all(
+                              c(
+                                has_all_valid_cols,
+                                has_valid_cs_ids,
+                                has_no_self_intersections,
+                                has_correct_lengths,
+                                has_unique_ids,
+                                has_valid_cs_measure,
+                                has_complete_geoemetries,
+                                has_crs
+                              )
+                            )
+  
+  return(is_validated_transects)
+  
+}
+
+# all ids in transects are in flowlines
+validate_transects_ids_in_flowlines <- function(transects, flowlines, crosswalk_id = NULL) {
+  
+  # flowlines <- flines
+  
+  transect_ids <- 
+    transects %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id)) %>% 
+    dplyr::pull(dplyr::any_of(crosswalk_id)) %>% 
+    unique()
+  
+  flowline_ids <-
+    flowlines %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id)) %>% 
+    dplyr::pull(dplyr::any_of(crosswalk_id)) %>% 
+    unique()
+  
+  all_transect_ids_in_flowline_ids <- all(transect_ids %in% flowline_ids)
+  
+  return(
+    all_transect_ids_in_flowline_ids
+  )
+  
+}
+
+# make sure no transect crosses more than a single flowline, a single time
+validate_transects_flowline_intersections <- function(transects, flowlines) {
+  
+  # flowlines <- flines
+  return (
+    nrow(transects) == nrow(hydrofabric3D:::rm_multiflowline_intersections(transects, flowlines)) 
+    )
+}
+
+# validate 2 SF objects have the same CRS
+validate_same_crs <- function(x, y) {
+  
+  return (
+    sf::st_crs(x) == sf::st_crs(y)
+  )
+  
+}
+
+validate_transects_against_flowlines <- function(transects, 
+                                                 flowlines,  
+                                                 crosswalk_id = NULL
+                                                ) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry")
+  
+  # validate dataframe has all correct columns  
+  has_all_valid_cols         <- hydrofabric3D:::validate_df(
+                                                x = transects, 
+                                                cols = REQUIRED_COLS,
+                                                obj_name = "transects"
+                                              )  
+  
+  # all ids in transects are in flowlines
+  all_transect_ids_in_flowline_ids  <- validate_transects_ids_in_flowlines(transects, flowlines, crosswalk_id = crosswalk_id)
+  
+  # transects only intersects a single flowline, a single time
+  has_valid_flowline_intersects     <- validate_transects_flowline_intersections(transects, flowlines)
+  
+  # transects and flowlines have the same CRS
+  has_same_crs                      <- validate_same_crs(transects, flowlines)
+  
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_flowline_validated_transects <- all(
+                                        c(
+                                          has_all_valid_cols,
+                                          all_transect_ids_in_flowline_ids,
+                                          has_valid_flowline_intersects,
+                                          has_same_crs
+                                        )
+                                      )
+  
+  return(is_flowline_validated_transects)
+  
+}
+
+# validate_transects_against_cs_pts <- function(
+#                                           transects, 
+#                                           cs_pts,  
+#                                           crosswalk_id = NULL
+#                                           ) {
+#   
+#   # # standardize geometry name
+#   # transects <- hydroloom::rename_geometry(transects, "geometry")
+#   
+#   REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry")
+#   
+#   # validate dataframe has all correct columns  
+#   has_all_valid_cols         <- hydrofabric3D:::validate_df(
+#     x = transects, 
+#     cols = REQUIRED_COLS,
+#     obj_name = "transects"
+#   )  
+# }
+
+validate_cs_pts_cs_id_enumeration <- function(cs_pts, crosswalk_id = NULL) {
+  
+  # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered
+  mismatches <-
+    cs_pts %>%
+    # dplyr::slice(1:150) %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+    dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+    dplyr::mutate(
+      new_cs_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(cs_id != new_cs_id)
+  
+  # FALSE if there are any transects with different cs_ids to the newly created cs_id 
+  # Otherwise TRUE
+  has_valid_cs_ids <- !(nrow(mismatches) > 0)
+  
+  return(
+    has_valid_cs_ids
+  )
+  
+}
+
+validate_cs_pts_pt_id_enumeration <- function(cs_pts, crosswalk_id = NULL) {
+  
+  # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section
+  mismatches <-
+    cs_pts %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::arrange(pt_id, .by_group = TRUE) %>% 
+    dplyr::mutate(
+      new_pt_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(pt_id != new_pt_id)
+  
+  # FALSE if there are any cs_pts with  pt_ids different from the newly created new_pt_id 
+  # Otherwise TRUE
+  has_valid_pt_ids <- !(nrow(mismatches) > 0)
+  
+  return(
+    has_valid_pt_ids
+  )
+  
+}
+
+validate_cs_pts_relative_distance <- function(cs_pts, crosswalk_id = NULL) {
+  
+  # make sure relative distance is greater than or equal to 0
+  min_relative_distance <-
+    cs_pts %>%
+    # dplyr::slice(1:50) %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::pull(relative_distance) %>% 
+    min()
+  
+  
+  # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section
+  rel_dist_check <-
+    cs_pts %>%
+    # dplyr::slice(1:50) %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance, cs_lengthm) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::summarise(
+      cs_lengthm = max(cs_lengthm),
+      # min_rel_dist = min(relative_distance),
+      max_rel_dist = max(relative_distance)
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::mutate(
+        # TODO: as long as the lengths are within 1 meter, thats equal
+        is_valid_relative_dist = abs(cs_lengthm - max_rel_dist) <= 1
+        # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01)
+      ) 
+    
+    # dplyr::filter(max_rel_dist > cs_lengthm)
+  
+  # relative distance is always greater than or equal to 0 and less than the cross sections length
+  has_valid_relative_dist_min        <- min_relative_distance >= 0
+  has_valid_relative_dist_maximums   <- all(rel_dist_check$is_valid_relative_dist)
+  
+  return(
+    has_valid_relative_dist_min && has_valid_relative_dist_maximums
+  )
+  
+}
+
+validate_cs_pts_point_types <- function(cs_pts) {
+  
+  # make sure only "left_bank", "right_bank", "channel", and "bottom" values exist in cs_pts point_type column
+  valid_point_types  <- c("left_bank", "right_bank", "channel", "bottom")
+  
+  # unique point types in cs_pts
+  unique_point_types <- unique(cs_pts$point_type)
+  
+  has_only_valid_point_types <- all(unique_point_types %in% valid_point_types)
+  
+  return(
+    has_only_valid_point_types
+  )
+  
+}
+
+validate_cs_pts <- function(
+    cs_pts,  
+    crosswalk_id = NULL
+) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", 
+                     "relative_distance", "cs_lengthm", "X", "Y", "Z", "Z_source",
+                     "class", "point_type", "valid_banks", "has_relief"
+  )
+  
+  # validate dataframe has all correct columns  
+  has_all_valid_cols         <- hydrofabric3D:::validate_df(
+    x = cs_pts, 
+    cols = REQUIRED_COLS,
+    obj_name = "cs_pts"
+  )  
+  
+  # make sure valid cs_ids
+  has_valid_cs_pts_cs_ids       <- validate_cs_pts_cs_id_enumeration(cs_pts, crosswalk_id = crosswalk_id)
+  
+  # make sure valid pt_ids
+  has_valid_cs_pts_pt_ids       <- validate_cs_pts_pt_id_enumeration(cs_pts, crosswalk_id = crosswalk_id)
+  
+  # check cs_pts have only valid relative_distance values
+  has_valid_relative_distances  <- validate_cs_pts_relative_distance(cs_pts, crosswalk_id = crosswalk_id)
+  
+  has_valid_point_types         <- validate_cs_pts_point_types(cs_pts)
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_validated_cs_pts <- all(
+    c(
+      has_all_valid_cols,
+      has_valid_cs_pts_cs_ids,
+      has_valid_cs_pts_pt_ids,
+      has_valid_relative_distances,
+      has_valid_point_types
+    )
+  )
+  
+  return(is_validated_cs_pts)
+  
+}
+
+# validate all cs_pts id/cs_ids are in the transects
+validate_cs_pt_ids_in_transects <- function(cs_pts, transects, crosswalk_id = NULL) {
+  
+  # flowlines <- flines
+  cs_pts_ids <-
+    cs_pts %>% 
+    sf::st_drop_geometry() %>% 
+    hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id)
+  
+  transect_ids <-
+    transects %>% 
+    sf::st_drop_geometry() %>% 
+    hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id)
+  
+  
+  all_cs_pts_ids_in_transects <- all(cs_pts_ids %in% transect_ids)
+  all_transect_ids_in_cs_pts  <- all(transect_ids %in% cs_pts_ids)
+  same_number_of_ids          <- length(cs_pts_ids) == length(transect_ids)  
+  
+  is_valid_cs_pts_ids <- all(
+    c(
+      all_cs_pts_ids_in_transects,
+      all_transect_ids_in_cs_pts,
+      same_number_of_ids
+    )
+  )
+  
+  return(
+    is_valid_cs_pts_ids
+  )
+  
+}
+
+validate_cs_pts_length_against_transects <- function(cs_pts, transects, crosswalk_id = NULL) {
+  
+  cs_pt_lengths <-
+    cs_pts %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::rename(
+      cs_pts_lengthm = cs_lengthm
+    )
+  
+  transect_lengths <-
+    transects %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup()
+  
+  lengths_check <- 
+    dplyr::left_join(
+      transect_lengths,
+      cs_pt_lengths,
+      by = c(crosswalk_id, "cs_id")
+    ) %>% 
+    dplyr::mutate(
+      # TODO: as long as the lengths are within 1 meter, thats equal
+      approx_equal_lengths = abs(cs_lengthm - cs_pts_lengthm) <= 1
+      # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01)
+    ) 
+  
+  all_lengths_are_equal <- all(lengths_check$approx_equal_lengths)
+  
+  return(
+    all_lengths_are_equal
+  )
+  
+}
+
+validate_cs_pts_against_transects <- function(
+    cs_pts,  
+    transects, 
+    crosswalk_id = NULL
+) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  # make sure all id/cs_id combos are in both transects and cs_pts
+  has_valid_cs_pts_ids  <- validate_cs_pt_ids_in_transects(cs_pts, transects, crosswalk_id = crosswalk_id)
+  
+  # make sure cs_lengthm matches from transects to cs_pts
+  has_matching_lengths  <- validate_cs_pts_length_against_transects(cs_pts, transects, crosswalk_id = crosswalk_id)
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_transect_validated_cs_pts <- all(
+    c(
+      has_valid_cs_pts_ids,
+      has_matching_lengths
+    )
+  )
+  
+  return(is_transect_validated_cs_pts)
+  
+}
+
+# og_transects   <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg"))
+# new_transects  <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))
+
+# new_transects %>% 
+#   dplyr::arrange(cs_lengthm) %>% 
+#   dplyr::slice(1:1000) %>% 
+#   mapview::mapview()
+# 
+# hydrofabric3D:::rm_self_intersections(new_transects)
+
+# --------------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------------------
+# 
+og_trans <- sf::read_sf( "/Users/anguswatters/Desktop/lynker-spatial/domain_with_fema/transects/pre_extension_transects.gpkg")
+
+transects <-  sf::read_sf(output_files[[4]])
+
+transects2 <- hydrofabric3D:::rm_self_intersections(transects)
+
+old_ids <- transects %>% hydrofabric3D::get_unique_tmp_ids("id")
+new_ids <- transects2 %>% hydrofabric3D::get_unique_tmp_ids("id")
+diff_tmp_ids <- old_ids[!old_ids %in% new_ids]
+strsplit(diff_ids, "_")
+
+diff_ids <- lapply(diff_tmp_ids, function(i) {
+  strsplit(i, "_")[[1]][1]
+}) %>% unlist()
+
+diff_trans <- 
+  transects %>% 
+  # hydrofabric3D::add_tmp_id(x = "id") %>% 
+  dplyr::filter(id %in% diff_ids)
+  # sf::st_buffer(50) %>% 
+  # sf::st_bbox() %>% 
+  # sf::st_as_sfc() %>% 
+  # sf::st_sf()
+
+og_diff_trans <- 
+  og_trans %>% 
+  # hydrofabric3D::add_tmp_id(x = "id") %>% 
+  dplyr::filter(id %in% diff_ids)
+
+mapview::mapview(diff_trans, color = "green") +
+  mapview::mapview(og_diff_trans, color = "red")
+
+old_trans <- transects[lengths(sf::st_intersects(transects, diff_bb)) > 0, ]
+
+# final_cross_sections %>% 
+#   dplyr::filter(id == "wb-1000") %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(color = "point_type")
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---- Diffusive Domain DEM + FEMA + ML 
+# ---------------------------------------------------------------------------------
+
+CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet")
+
+message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'")
+# transects <- 
+# sum(is.na(final_cross_sections$id))
+# sum(is.na(final_cross_sections$cs_id))
+# sum(is.na(final_cross_sections$pt_id))
+# sum(is.na(final_cross_sections$X))
+
+# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet)
+arrow::write_parquet(
+  # dplyr::select(final_cs, 
+  #               -is_dem_point
+  # ), 
+  cross_sections,
+  CROSS_SECTIONS_ML_OUTPUT_PATH
+)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runners/cs_runner/download_fema100.R b/runners/cs_runner/download_fema100.R
deleted file mode 100644
index b23b438..0000000
--- a/runners/cs_runner/download_fema100.R
+++ /dev/null
@@ -1,123 +0,0 @@
-# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR"
-# BASE_DIR is defined within runners/workflow/root_dir.R
-
-# NOTE: The lynker-hydrofabric S3 bucket is private at the moment
-
-# load config variables
-source("runners/cs_runner/config_vars.R")
-
-# -------------------------------------------------------------------------------------
-# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ----
-# -------------------------------------------------------------------------------------
-
-if (!dir.exists(FEMA_FGB_PATH)) {
-  message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'"))
-  dir.create(FEMA_FGB_PATH)
-}
-
-# create geojsons directory (if not exists) 
-if (!dir.exists(FEMA_GEOJSON_PATH)) {
-  message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'"))
-  dir.create(FEMA_GEOJSON_PATH)
-}
-
-# create directory for cleaned FEMA geometries (if not exists) 
-if (!dir.exists(FEMA_CLEAN_PATH)) {
-  message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'"))
-  dir.create(FEMA_CLEAN_PATH)
-}
-
-# create directory for cleaned FEMA geometries as geopackages (if not exists) 
-if (!dir.exists(FEMA_GPKG_PATH)) {
-  message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
-  dir.create(FEMA_GPKG_PATH)
-}
-
-# create simplified geojsons directory (if not exists)
-if (!dir.exists(FEMA_SIMPLIFIED_PATH)) {
-  message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'"))
-  dir.create(FEMA_SIMPLIFIED_PATH)
-}
-
-# create simplified geojsons directory (if not exists)
-if (!dir.exists(FEMA_DISSOLVED_PATH)) {
-  message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'"))
-  dir.create(FEMA_DISSOLVED_PATH)
-}
-
-# create exploded geojsons directory (if not exists)
-if (!dir.exists(FEMA_EXPLODED_PATH)) {
-  message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'"))
-  dir.create(FEMA_EXPLODED_PATH)
-}
-
-# create FEMA GPKG Bounding Boxes directory (if not exists)
-if (!dir.exists(FEMA_GPKG_BB_PATH)) {
-  message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
-  dir.create(FEMA_GPKG_BB_PATH)
-}
-
-if (!dir.exists(FEMA_FGB_BB_PATH)) {
-  message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'"))
-  dir.create(FEMA_FGB_BB_PATH)
-}
-
-# -------------------------------------------------------------------------------------
-# ---- Get list of FEMA FGB files in S3 bucket ----
-# -------------------------------------------------------------------------------------
-
-# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
-fema_list_command <- paste0('#!/bin/bash
-            # AWS S3 Bucket and Directory information
-            S3_BUCKET="', FEMA_S3_DIR, '" 
-            
-            # Regular expression pattern to match object keys
-            PATTERN=".fgb$"
-            
-            # AWS CLI command to list objects in the S3 bucket and use grep to filter them
-            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN")
-            
-            echo "$S3_OBJECTS"'
-)
-
-# -------------------------------------------------------------------------------------
-# ---- Get the S3 buckets object keys for FEMA 100 FGB files ----
-# -------------------------------------------------------------------------------------
-
-# Run the script to get a list of the nextgen geopackages that matched the regular expression above
-FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
-
-# create bucket object URIs
-# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, FEMA_BUCKET_KEYS)
-
-# -------------------------------------------------------------------------------------
-# ---- Download FEMA 100 year FGB files from S3 ----
-# -------------------------------------------------------------------------------------
-
-# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
-for (key in FEMA_BUCKET_KEYS) {
-  local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
-  
-  if(!file.exists(local_save_path)) {
-    copy_cmd <- paste0('aws s3 cp ', FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, " ", local_save_path, " --profile ", AWS_PROFILE)
-    
-    message("S3 object:\n > '", FEMA_S3_BUCKET, FEMA_S3_BUCKET_PREFIX, key, "'")
-    message("Downloading S3 object to:\n > '", local_save_path, "'")
-    # message("Copy command:\n > '", copy_cmd, "'")
-    
-    system(copy_cmd)
-    
-    message(" > '", key, "' download complete!")
-    message("----------------------------------")
-  } else {
-    message("File already exists at:\n > '", local_save_path, "'")
-  }
-}
-
-
-
-  
-
-
-
-
diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R
index 4d2f223..9dda641 100644
--- a/runners/cs_runner/new_domain.R
+++ b/runners/cs_runner/new_domain.R
@@ -9,7 +9,7 @@ source("runners/cs_runner/utils.R")
 # install.packages("devtools")
 
 # # transect bucket prefix
-# S3_TRANSECTS_DIR <- paste0(S3_BUCKET_URI, VERSION, "/3D/transects/")
+# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
 
 # paths to NEW DOMAIN datasets 
 # NEW_DOMAIN_FILES    <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE)
diff --git a/runners/cs_runner/test_cs_pts.R b/runners/cs_runner/test_cs_pts.R
new file mode 100644
index 0000000..7031bb6
--- /dev/null
+++ b/runners/cs_runner/test_cs_pts.R
@@ -0,0 +1,256 @@
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# # load libraries
+library(hydrofabric3D)
+library(dplyr)
+library(sf)
+
+# paths to nextgen datasets
+NEXTGEN_FILES  <- list.files(NEXTGEN_DIR, full.names = FALSE)
+
+# paths to nextgen datasets
+transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
+transect_files <- transect_files[!grepl("updated_", transect_files)]
+
+REF_FEATURES   <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
+
+# reference features dataframe
+ref_df <- data.frame(
+  vpu      = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = REF_FEATURES
+)
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = NEXTGEN_FILES,
+  y    = transect_files,
+  base = BASE_DIR
+) %>% 
+  dplyr::left_join(
+    ref_df,
+    by = "vpu"
+  )
+
+# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# Save parquet locally and upload to specified S3 bucket
+
+# output_path <- paste0(BASE_DIR, "/test_out/")
+
+for (i in 20:nrow(path_df)) {
+    
+  # i = 8
+  
+  start <- Sys.time()
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
+  
+  # model attributes file and full path
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(TRANSECTS_DIR, transect_file)
+  
+  # model attributes file and full path
+  ref_file <- path_df$ref_file[i]
+  ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
+  
+  # current VPU being processed
+  VPU   <- path_df$vpu[i]
+  
+  start <- Sys.time()
+  
+  message("Creating VPU ", VPU, 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - transects: '", transect_file, "'", 
+          "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+  )
+  
+  ################### 
+  message("Reading in transects...\n > ", transect_file)
+  # read in transects data
+  transects <- sf::read_sf(transect_path)
+  
+  message("Reading in flowlines... \n > ", nextgen_file)
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
+  message("Reading in waterbodies... \n > ", ref_file)
+  # read in waterbodies reference features layer
+  waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+  
+  # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+  feature_subsets <- wb_intersects(flines, transects, waterbodies)
+  
+  # replace flowlines and transects objects with updated versions in "updated_features"
+  flines    <- flines[feature_subsets$valid_flowlines, ]
+  transects <- transects[feature_subsets$valid_transects, ]
+  
+  rm(waterbodies)
+  gc()
+  
+  start_cs_pts <- Sys.time()
+  
+  # # ------------------------------------------------------------------------
+  # # ------ TESTING DATA -------
+  # # ------------------------------------------------------------------------
+  #   flines <-
+  #     flines %>%
+  #     dplyr::slice(1:3500)
+  # 
+  # transects <-
+  #   transects %>%
+  #   dplyr::filter(hy_id %in% flines$id)
+  
+  # ------------------------------------------------------------------------
+  
+  message("Extracting cross section points (", start_cs_pts, ")")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 1: Extract cs points from DEM ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # system.time({
+  # tmp <- transects[1:10,]
+  # 
+  # get cross section point elevations
+  cs_pts <- hydrofabric3D::cross_section_pts(
+    
+    cs             = transects,
+    crosswalk_id   = "hy_id",
+    points_per_cs  = NULL,
+    min_pts_per_cs = 10,
+    dem            = DEM_PATH
+  )
+  
+  # })
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg")
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg")
+  
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg")
+  # cs_pts %>% 
+  #   dplyr::group_by(hy_id, cs_id) %>% 
+  #   dplyr::filter(!any(is.na(Z))) %>% 
+  #   dplyr::ungroup()
+  # 
+  # cs_pts %>% 
+  #   hydrofabric3D::drop_incomplete_cs_pts("hy_id")
+  
+  # system.time({
+  
+  # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+  cs_pts <- 
+    # cs_pts2 <- 
+    cs_pts %>% 
+    # dplyr::group_by(hy_id, cs_id) %>% 
+    # dplyr::filter(!any(is.na(Z))) %>% 
+    # dplyr::ungroup() %>% 
+    hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+    hydrofabric3D::classify_points(
+      crosswalk_id             = "hy_id", 
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+    )  
+  
+  # })
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id  
+  
+  # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+  
+  # output_path <- paste0(BASE_DIR, "/test_out/")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  # fixed_pts <-
+  cs_pts <- 
+    cs_pts %>% 
+    hydrofabric3D:::pts_to_XY() %>% 
+    dplyr::select(
+      hy_id, cs_id, pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    ) %>% 
+    dplyr::mutate(
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
+  
+  ids_before_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  cs_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = cs_pts, crosswalk_id = "hy_id")
+  # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  cs_pts <- hydrofabric3D::classify_points(
+    cs_pts                    = cs_pts, 
+    crosswalk_id              = "hy_id",
+    pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
+  # if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  #   message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  # } else {
+  #   message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  # }
+  # 
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  
+  ############################################################################## 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # # name of file and path to save transects gpkg too
+  out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
+  out_path <- paste0(BASE_DIR, "/test_out/", out_file)
+  
+  message("Saving cross section points to:\n - filepath: '", out_path, "'")
+  
+  # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+  arrow::write_parquet(cs_pts, out_path)
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  end <- Sys.time()
+  
+  message("Finished cross section point generation for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(cs_pts)
+  gc()
+  gc()
+}
diff --git a/runners/cs_runner/test_fix_cs_pts.R b/runners/cs_runner/test_fix_cs_pts.R
new file mode 100644
index 0000000..cbfaae8
--- /dev/null
+++ b/runners/cs_runner/test_fix_cs_pts.R
@@ -0,0 +1,278 @@
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# # load libraries
+library(hydrofabric3D)
+library(dplyr)
+library(sf)
+
+# paths to nextgen datasets
+NEXTGEN_FILES  <- list.files(NEXTGEN_DIR, full.names = FALSE)
+
+# paths to nextgen datasets
+transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
+transect_files <- transect_files[!grepl("updated_", transect_files)]
+
+REF_FEATURES   <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
+
+START_CS_PTS_DIR <-  '/Users/anguswatters/Desktop/lynker-spatial/test_out'
+start_cs_pts_paths     <- list.files(START_CS_PTS_DIR, full.names = T)
+
+END_CS_PTS_DIR <-  '/Users/anguswatters/Desktop/lynker-spatial/test_fix_out'
+
+# reference features dataframe
+ref_df <- data.frame(
+  vpu      = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = REF_FEATURES
+)
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = NEXTGEN_FILES,
+  y    = transect_files,
+  base = BASE_DIR
+) %>% 
+  dplyr::left_join(
+    ref_df,
+    by = "vpu"
+  )
+path_df$cs_pts_path <- start_cs_pts_paths
+
+# path_df
+
+# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# Save parquet locally and upload to specified S3 bucket
+
+# output_path <- paste0(BASE_DIR, "/test_out/")
+
+for (i in 15:nrow(path_df)) {
+  # i = 8
+  
+  start <- Sys.time()
+  
+  start_cs_pts_path <- path_df$cs_pts_path[i]
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
+  
+  # model attributes file and full path
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(TRANSECTS_DIR, transect_file)
+  
+  # model attributes file and full path
+  ref_file <- path_df$ref_file[i]
+  ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
+  
+  # current VPU being processed
+  VPU   <- path_df$vpu[i]
+  
+  start <- Sys.time()
+  
+  message("Creating VPU ", VPU, 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - start cs pts: '", basename(start_cs_pts_path), "'", 
+          "'\n - transects: '", transect_file, "'", 
+          "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+  )
+  
+  ################### 
+  message("Reading in starting CS points...\n > ", basename(start_cs_pts_path))
+  cs_pts <- arrow::read_parquet(start_cs_pts_path)
+  
+  message("Reading in transects...\n > ", transect_file)
+  # read in transects data
+  transects <- sf::read_sf(transect_path)
+  
+  message("Reading in flowlines... \n > ", nextgen_file)
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
+  message("Reading in waterbodies... \n > ", ref_file)
+  # read in waterbodies reference features layer
+  waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+  
+  # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+  feature_subsets <- wb_intersects(flines, transects, waterbodies)
+  
+  # replace flowlines and transects objects with updated versions in "updated_features"
+  flines    <- flines[feature_subsets$valid_flowlines, ]
+  transects <- transects[feature_subsets$valid_transects, ]
+  
+  rm(waterbodies)
+  gc()
+  
+  start_cs_pts <- Sys.time()
+  flines <- dplyr::rename(flines, hy_id = id)
+  
+  # test_ids <- 
+  #   flines %>% 
+  #   dplyr::slice(1:100) %>% 
+  #   .$hy_id %>% 
+  #   unique()
+  # flines <-
+  #   flines %>% 
+  #   dplyr::filter(
+  #     hy_id %in% test_ids
+  #   )
+  # transects <-
+  #   transects %>% 
+  #   dplyr::filter(
+  #     hy_id %in% test_ids
+  #   )
+  # cs_pts <-
+  #   cs_pts %>% 
+  #   dplyr::filter(
+  #     hy_id %in% test_ids
+  #   )
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id 
+  
+  # # ------------------------------------------------------------------------
+  # # ------ TESTING DATA -------
+  # # ------------------------------------------------------------------------
+  # system.time({
+    fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = flines,    # original flowline network
+      # net            = flines,    # original flowline network
+      transects      = transects, # original transect lines
+      crosswalk_id   = "hy_id",
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_PATH, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
+  # })
+  # fixed_pts 
+  
+  # fixed_pts %>%
+  #   # cs_pts %>% 
+  #   sf::st_drop_geometry() %>%
+  #   # dplyr::left_join(
+  #   #   stream_order,
+  #   #   by = "hy_id"
+  #   # ) %>%
+  #   dplyr::select(dplyr::any_of("hy_id"), cs_id, 
+  #                 # vpu, order,
+  #                 valid_banks, has_relief) %>%
+  #   dplyr::group_by(
+  #     dplyr::across(dplyr::any_of(c("hy_id", "cs_id")))
+  #   ) %>%
+  #   dplyr::slice(1) %>%
+  #   dplyr::ungroup() %>%
+  #   # dplyr::group_by(
+  #   #   dplyr::across(dplyr::any_of(c("vpu", "order")))
+  #   # ) %>%
+  #   dplyr::count(valid_banks, has_relief)
+  
+  
+  # transects %>% 
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::group_by(id, cs_id)
+  # fixed_pts2$is_extended %>% sum()
+  ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  fixed_pts <-
+    fixed_pts %>% 
+    # dplyr::mutate(
+    #   X = sf::st_coordinates(.)[,1],
+    #   Y = sf::st_coordinates(.)[,2]
+    # ) %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(
+      hy_id, cs_id, pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    )
+  
+  # # Drop point geometries, leaving just X, Y, Z values
+  # fixed_pts <- sf::st_drop_geometry(fixed_pts)
+  
+  # add Z_source column for source of elevation data
+  fixed_pts <-
+    fixed_pts %>%
+    dplyr::mutate(
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
+  
+  ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  fixed_pts <- hydrofabric3D::classify_points(
+    cs_pts                    = fixed_pts, 
+    crosswalk_id              = "hy_id",
+    pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+    message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  } else {
+    message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  }
+  
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  
+  ############################################################################## 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # # name of file and path to save transects gpkg too
+  out_file <- paste0("nextgen_fixed_", path_df$vpu[i], "_cross_sections.parquet")
+  out_path <- paste0(BASE_DIR, "/test_fix_out/", out_file)
+  
+  message("Saving cross section points to:\n - filepath: '", out_path, "'")
+  
+  # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+  arrow::write_parquet(fixed_pts, out_path)
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  end <- Sys.time()
+  
+  message("Finished cross section point generation for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(cs_pts, fixed_pts)
+  gc()
+  gc()
+}
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
index e52352b..ef43e18 100644
--- a/runners/cs_runner/utils.R
+++ b/runners/cs_runner/utils.R
@@ -1,47 +1,160 @@
 # Create an empty file structure 
 # base_dir: character, top level directory path
-# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist)
-
 # Directory tree:
 # base_dir/
-#   └── domain_dirname/
-#     ├── flowlines/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
 #     ├── dem/
-#     ├── transects/
-#     ├── cross_sections/
-#     └── cs_pts/
-create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) {
+#         ├── vrt/
+#         ├── tif/
+#     ├── cs-extension-polygons/
+create_local_hydrofabric_base_dirs <- function(base_dir) {
+  
   
   # build paths
-  domain_dir         <- paste0(base_dir, "/", domain_dirname)
-  flowlines_dir      <- paste0(domain_dir, "/flowlines")
-  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
-  dem_dir            <- paste0(domain_dir, "/dem")
-  transects_dir      <- paste0(domain_dir, "/transects")
-  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
-  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
-  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  hydrofabric_dir <- paste0(base_dir, "/hydrofabric")
   
-  if(with_output) {
-    output_dir       <- paste0(domain_dir, "/outputs")
+  # DEM dirs
+  dem_dir                    <- file.path(base_dir, "dem")
+  dem_vrt_dir                <- file.path(dem_dir, "vrt")
+  dem_tif_dir                <- file.path(dem_dir, "tif")
+  
+  # polygons for transect extensions
+  cs_extension_polygons_dir     <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  fema_dir                      <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  # create base directories
+  create_if_not_exists(base_dir)
+  create_if_not_exists(hydrofabric_dir)
+  
+  # DEM dirs
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(dem_vrt_dir)
+  create_if_not_exists(dem_tif_dir)
+  
+  # extension polygons
+  create_if_not_exists(cs_extension_polygons_dir)
+  
+  
+  # Create FEMA folders
+  create_if_not_exists(fema_dir)
+  create_if_not_exists(fema_fgb_dir)
+  create_if_not_exists(fema_geojson_dir)
+  create_if_not_exists(fema_clean_dir)
+  create_if_not_exists(fema_gpkg_dir)
+  create_if_not_exists(fema_by_vpu_dir)
+  
+  for (path in fema_by_vpu_subdirs) {
+    create_if_not_exists(path)
   }
   
-  create_if_not_exists <- function(dir_path) {
-    if (!dir.exists(dir_path)) {
-      dir.create(dir_path, recursive = TRUE)
-      message("Created directory: '", dir_path, "'\n")
-    }
+}
+
+get_vpu_ids <- function() {
+  VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+                            '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+  # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+  
+  return(VPU_IDS)
+  
+}
+
+# # Base directory for local file storage
+# BASE_DIR    <- '/Volumes/T7SSD/lynker-spatial'
+# base_dir <- BASE_DIR
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb")
+# FEMA_GEOJSON_PATH    <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson")
+# FEMA_CLEAN_PATH      <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean")
+# FEMA_GPKG_PATH       <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg")
+# FEMA_BY_VPU_PATH     <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU")
+# 
+# 
+# VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+#                           '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+# # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+# 
+# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ")
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# Create an empty file structure for a new version within a specified base_dir
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+    #     ├── version_number/
+    #         ├── network/
+    #         ├── transects/
+    #         ├── cross-sections/
+          #         ├── dem/
+          #         ├── dem-ml/
+          #         ├── dem-coastal-bathy/
+          #         ├── dem-points/
+create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
+  # version = "v3.0"
+  # base_dir <- BASE_DIR
+  
+  # build paths
+  hydrofabric_dir   <- paste0(base_dir, "/hydrofabric")
+  version_base_dir  <- paste0(hydrofabric_dir, "/", version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- paste0(version_base_dir, "/ml")
+  
+  # reference features 
+  ref_features_dir           <- paste0(version_base_dir, "/reference-features")
+  
+  # conus network gpkg
+  network_dir                <- paste0(version_base_dir, "/network")
+  
+  # transects
+  transects_dir              <- paste0(version_base_dir, "/transects")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- paste0(version_base_dir, "/cross-sections")
+  cross_sections_dem_dir             <- paste0(cross_sections_dir, "/dem")
+  cross_sections_ml_dir              <- paste0(cross_sections_dir, "/dem-ml")
+  cross_sections_coastal_bathy_dir   <- paste0(cross_sections_dir, "/dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- paste0(cross_sections_dir, "/dem-points")
+  
+  if(with_output) {
+    output_dir       <- paste0(version_base_dir, "/outputs")
   }
   
-  # create directories
-  create_if_not_exists(domain_dir)
-  create_if_not_exists(flowlines_dir)
-  create_if_not_exists(domain_subset_dir)
-  create_if_not_exists(dem_dir)
+  # create version BASE dir
+  create_if_not_exists(version_base_dir)
+  
+  # CONUS dir
+  create_if_not_exists(network_dir)
+  
+  # ML data
+  create_if_not_exists(ml_dir)
+  
+  # reference features data
+  create_if_not_exists(ref_features_dir)
+  
+  # transects
   create_if_not_exists(transects_dir)
+  
+  # CS pts
   create_if_not_exists(cross_sections_dir)
-  create_if_not_exists(cs_pts_dir)
-  create_if_not_exists(vpu_subsets_dir)
+  create_if_not_exists(cross_sections_dem_dir)
+  create_if_not_exists(cross_sections_ml_dir)
+  create_if_not_exists(cross_sections_coastal_bathy_dir)
+  create_if_not_exists(cross_sections_dem_pts_dir)
   
   if(with_output) {
     create_if_not_exists(output_dir)
@@ -49,39 +162,101 @@ create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE
   
 }
 
-# get path strings for a domain dir (based of a base dir and domain dirname)
-# NOTE: this does NOT guarentee that these folders exist, 
-# NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
-get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) {
+create_if_not_exists <- function(dir_path) {
+  if (!dir.exists(dir_path)) {
+    dir.create(dir_path, recursive = TRUE)
+    message("Created directory: '", dir_path, "'\n")
+  }
+}
+
+# get a list of top level directories for main directory
+get_base_dir_paths <- function(base_dir) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
   
-  # build paths
-  domain_dir         <- paste0(base_dir, "/", domain_dirname)
-  flowlines_dir      <- paste0(domain_dir, "/flowlines")
-  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
-  dem_dir            <- paste0(domain_dir, "/dem")
-  transects_dir      <- paste0(domain_dir, "/transects")
-  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
-  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
-  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
-  output_dir         <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA)
+  hydrofabric_dir  <- file.path(base_dir, "hydrofabric")
+  
+  dem_dir          <- file.path(base_dir, "dem")
+  dem_vrt_dir      <- file.path(base_dir, "dem", "vrt")
+  dem_tif_dir      <- file.path(base_dir, "dem", "tif")
+  
+  cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  
+  fema_dir         <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  return(
+      list(
+      hydrofabric_dir = hydrofabric_dir,
+      dem_dir = dem_dir,
+      dem_vrt_dir = dem_vrt_dir, 
+      dem_tif_dir = dem_tif_dir,
+      cs_extension_polygons_dir = cs_extension_polygons_dir,
+      fema_dir = fema_dir,
+      fema_fgb_dir = fema_fgb_dir,
+      fema_geojson_dir = fema_geojson_dir,
+      fema_clean_dir = fema_clean_dir,
+      fema_gpkg_dir = fema_gpkg_dir,
+      fema_by_vpu_dir = fema_by_vpu_dir,
+      fema_by_vpu_subdirs = fema_by_vpu_subdirs
+    )
+  )
+}
 
+# get list of a specific directories in a version directory
+get_version_base_dir_paths <- function(base_dir, version) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir   <- file.path(base_dir, "hydrofabric")
+  
+  version_base_dir  <- file.path(hydrofabric_dir, version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- file.path(version_base_dir, "ml")
+  
+  # reference features 
+  ref_features_dir           <- file.path(version_base_dir, "reference-features")
+  
+  # conus network gpkg
+  network_dir                <- file.path(version_base_dir, "network")
+  
+  # transects
+  transects_dir              <- file.path(version_base_dir, "transects")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- file.path(version_base_dir, "cross-sections")
+  cross_sections_dem_dir             <- file.path(cross_sections_dir, "dem")
+  cross_sections_ml_dir              <- file.path(cross_sections_dir, "dem-ml")
+  cross_sections_coastal_bathy_dir   <- file.path(cross_sections_dir, "dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- file.path(cross_sections_dir, "dem-points")
   
-  # named list of file paths
   return(
     list(
-      base_dir           = base_dir, 
-      domain_dir         = domain_dir,
-      flowlines_dir      = flowlines_dir,
-      domain_subset_dir  = domain_subset_dir,
-      dem_dir            = dem_dir,
+      hydrofabric_dir    = hydrofabric_dir,
+      version_base_dir   = version_base_dir,
+      ref_features_dir   = ref_features_dir, 
+      network_dir        = network_dir,
+      ml_dir = ml_dir,
       transects_dir      = transects_dir,
       cross_sections_dir = cross_sections_dir,
-      cs_pts_dir         = cs_pts_dir,
-      vpu_subsets_dir    = vpu_subsets_dir,
-      output_dir         = output_dir
+      cross_sections_dem_dir     = cross_sections_dem_dir,
+      cross_sections_dem_pts_dir = cross_sections_dem_pts_dir,
+      cross_sections_ml_dir      = cross_sections_ml_dir,
+      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir
     )
   )
-  
 }
 
 list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
@@ -111,6 +286,49 @@ list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
   return(ls_output)
 }
 
+download_tiles <- function(tile_paths, output_dir) {
+  # output_dir <- DEM_TIF_DIR
+  # tile_paths
+  # error_tiles <- 
+  tif_save_paths       <- paste0(output_dir, "/", basename(tile_paths))
+  
+  error_tiles <- data.frame(
+    tile   = basename(tile_paths),
+    status = TRUE
+  )
+  
+  for (i in seq_along(tile_paths)) {
+    # i = 1
+    tile_path         <- tile_paths[i]
+    tif_save_path     <- tif_save_paths[i]
+    
+    message("[", i, "]", 
+            "\n > Tile: ", basename(tile_path),
+            "\n > Output path: ", tif_save_path
+    )
+    
+    download_tif_cmd    <- paste0("curl -o ", tif_save_path, " ", tile_path)
+    
+    tryCatch({
+      
+      tif_download_output <- system(download_tif_cmd, intern = TRUE)
+      message("   > Succesfully downloaded tile: ", basename(tile_path))
+      
+    }, error = function(e) {
+      
+      message("Error downloading tile: ", basename(tile_path))
+      message("ERROW below: \n  ", e)
+      
+      error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE
+      
+    })
+    
+  }
+  
+  return(error_tiles)
+  
+}
+
 # Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
 # make sure they are aligned and in the same order
 # x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
@@ -430,139 +648,1872 @@ add_predicate_group_id <- function(polys, predicate) {
   
 }
 
+# TODO: DElete these NEW DOMAIN functions...
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist)
 
-# utility function for getting transects extended and 
-# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
-# returns the extended version of the transects 
-match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
-  
-  # transect_lines = transects
-  # fixed_cs_pts   = fixed_pts
-  # crosswalk_id   = CROSSWALK_ID
-  
-  fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
-  transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
-  
-  # get the counts of each point type to add this data to the transect_lines dataset
-  point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
-                                                            crosswalk_id = crosswalk_id)
-  # Check the number of cross sections that were extended
-  message("Subsetting cross section points generated after extending transect_lines...")
-  
-  # extract cross section points that have an "is_extended" value of TRUE
-  extended_pts <- 
-    fixed_cs_pts %>%
-    dplyr::filter(is_extended) %>%
-    hydrofabric3D::add_tmp_id(x = crosswalk_id)
-  
-  # extended_pts %>% 
-  #   get_unique_tmp_ids() %>% 
-  #   length()
-  
-  # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
-  update_transect_lines <- 
-    transect_lines %>%
-    hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
-    dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
-  
-  cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
-  
-  # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
-  if (nrow(update_transect_lines) > 0) {
-    message("Updating ", nrow(update_transect_lines), " transect_lines")
-    
-    
-    update_transect_lines <- 
-      update_transect_lines %>% 
-      dplyr::rename(hy_id := !!sym(crosswalk_id)) 
-    
-    update_transect_lines <- 
-      update_transect_lines %>%
-      # apply extend_by_percent function to each transect line:
-      hydrofabric3D:::extend_by_percent(
-        pct        = EXTENSION_PCT,
-        length_col = "cs_lengthm"
-      )
-    
-    update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
-    
-    update_transect_lines <- 
-      update_transect_lines %>%  
-      dplyr::rename(!!sym(crosswalk_id) := hy_id)
-
-    # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
-    # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
-    
-    # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
-    # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
-    # and then replace with old transect_lines with the "update_transect_lines"
-    out_transect_lines <-
-      transect_lines %>%
-      hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
-      dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
-      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-      dplyr::bind_rows(
-        dplyr::mutate(update_transect_lines, is_extended = TRUE)
-      )
-    
-    # transect_lines %>% 
-    #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
-    #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
-    #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
-    #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
-    #   dplyr::bind_rows( # bring in the new updated extended transect_lines
-    #     dplyr::mutate(
-    #       update_transect_lines,
-    #       is_extended = TRUE
-    #     )
-    #   )  
-  } else {
-    # If no transect_lines were extended
-    out_transect_lines <- 
-      transect_lines %>%
-      hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
-      dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
-      # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
-      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+# Directory tree:
+# base_dir/
+#   └── domain_dirname/
+#     ├── flowlines/
+#     ├── dem/
+#     ├── transects/
+#     ├── cross_sections/
+#     └── cs_pts/
+create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  
+  if(with_output) {
+    output_dir       <- paste0(domain_dir, "/outputs")
   }
   
-  # Finalize new transect_lines
-  out_transect_lines <- 
-    out_transect_lines %>%
-    dplyr::left_join(
-      point_type_counts, 
-      by = c(crosswalk_id, "cs_id")
-    ) %>%
-    dplyr::left_join(
-      dplyr::ungroup(
-        dplyr::slice(
-          dplyr::group_by(
-            dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
-                          dplyr::any_of(crosswalk_id), 
-                          cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
-            ),
-            dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
-          ),
-          1
-        )
-      ),
-      by = c(crosswalk_id, "cs_id")
-    ) %>%
-    dplyr::select(
-      dplyr::any_of(crosswalk_id),
-      cs_source, cs_id, cs_measure, cs_lengthm,
-      # sinuosity,
-      is_extended,
-      left_bank_count, right_bank_count, channel_count, bottom_count,
-      bottom, left_bank, right_bank, valid_banks, has_relief,
-      geometry
-    ) %>% 
-    dplyr::mutate(
-      is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
-    )  
+  # create directories
+  create_if_not_exists(domain_dir)
+  create_if_not_exists(flowlines_dir)
+  create_if_not_exists(domain_subset_dir)
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cs_pts_dir)
+  create_if_not_exists(vpu_subsets_dir)
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
   
-  return(out_transect_lines)
 }
 
+# get path strings for a domain dir (based of a base dir and domain dirname)
+# NOTE: this does NOT guarentee that these folders exist, 
+# NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
+get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  output_dir         <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA)
+  
+  
+  # named list of file paths
+  return(
+    list(
+      base_dir           = base_dir, 
+      domain_dir         = domain_dir,
+      flowlines_dir      = flowlines_dir,
+      domain_subset_dir  = domain_subset_dir,
+      dem_dir            = dem_dir,
+      transects_dir      = transects_dir,
+      cross_sections_dir = cross_sections_dir,
+      cs_pts_dir         = cs_pts_dir,
+      vpu_subsets_dir    = vpu_subsets_dir,
+      output_dir         = output_dir
+    )
+  )
+  
+}
+
+download_3dep_vrt <- function(base_dir) {
+  
+  ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/
+  ### paste w/ `datapasta::vector_paste_vertical()`
+  ### Some manual cleaning of header and footer mess...
+  ### Reason? Un-scrapable page, and no index.gpkg...
+  
+  t <- c(
+    "0              n06e162/",
+    "0              n06e163/",
+    "0              n07e134/",
+    "0              n07e151/",
+    "0              n07e152/",
+    "0              n07e158/",
+    "0              n08e134/",
+    "0              n08e151/",
+    "0              n08e152/",
+    "0              n08e158/",
+    "0              n09e134/",
+    "0              n10e138/",
+    "0              n14e144/",
+    "0              n15e145/",
+    "0              n16e145/",
+    "0              n18w065/",
+    "0              n18w066/",
+    "0              n18w067/",
+    "0              n18w068/",
+    "0              n19w065/",
+    "0              n19w066/",
+    "0              n19w067/",
+    "0              n19w068/",
+    "0              n19w156/",
+    "0              n20w155/",
+    "0              n20w156/",
+    "0              n20w157/",
+    "0              n21w156/",
+    "0              n21w157/",
+    "0              n21w158/",
+    "0              n22w157/",
+    "0              n22w158/",
+    "0              n22w159/",
+    "0              n22w160/",
+    "0              n22w161/",
+    "0              n23w160/",
+    "0              n23w161/",
+    "0              n25w081/",
+    "0              n25w082/",
+    "0              n25w083/",
+    "0              n26w081/",
+    "0              n26w082/",
+    "0              n26w098/",
+    "0              n26w099/",
+    "0              n27w081/",
+    "0              n27w082/",
+    "0              n27w083/",
+    "0              n27w098/",
+    "0              n27w099/",
+    "0              n27w100/",
+    "0              n28w081/",
+    "0              n28w082/",
+    "0              n28w083/",
+    "0              n28w097/",
+    "0              n28w098/",
+    "0              n28w099/",
+    "0              n28w100/",
+    "0              n28w101/",
+    "0              n29w081/",
+    "0              n29w082/",
+    "0              n29w083/",
+    "0              n29w090/",
+    "0              n29w096/",
+    "0              n29w097/",
+    "0              n29w098/",
+    "0              n29w099/",
+    "0              n29w100/",
+    "0              n29w101/",
+    "0              n29w104/",
+    "0              n30w081/",
+    "0              n30w082/",
+    "0              n30w083/",
+    "0              n30w084/",
+    "0              n30w085/",
+    "0              n30w086/",
+    "0              n30w089/",
+    "0              n30w090/",
+    "0              n30w091/",
+    "0              n30w092/",
+    "0              n30w093/",
+    "0              n30w094/",
+    "0              n30w095/",
+    "0              n30w096/",
+    "0              n30w097/",
+    "0              n30w098/",
+    "0              n30w099/",
+    "0              n30w100/",
+    "0              n30w101/",
+    "0              n30w102/",
+    "0              n30w103/",
+    "0              n30w104/",
+    "0              n30w105/",
+    "0              n31w082/",
+    "0              n31w083/",
+    "0              n31w084/",
+    "0              n31w085/",
+    "0              n31w086/",
+    "0              n31w087/",
+    "0              n31w088/",
+    "0              n31w089/",
+    "0              n31w090/",
+    "0              n31w091/",
+    "0              n31w092/",
+    "0              n31w093/",
+    "0              n31w094/",
+    "0              n31w095/",
+    "0              n31w096/",
+    "0              n31w097/",
+    "0              n31w098/",
+    "0              n31w099/",
+    "0              n31w100/",
+    "0              n31w101/",
+    "0              n31w102/",
+    "0              n31w103/",
+    "0              n31w104/",
+    "0              n31w105/",
+    "0              n31w106/",
+    "0              n31w107/",
+    "0              n32w081/",
+    "0              n32w082/",
+    "0              n32w083/",
+    "0              n32w084/",
+    "0              n32w085/",
+    "0              n32w086/",
+    "0              n32w087/",
+    "0              n32w088/",
+    "0              n32w089/",
+    "0              n32w090/",
+    "0              n32w091/",
+    "0              n32w092/",
+    "0              n32w093/",
+    "0              n32w094/",
+    "0              n32w095/",
+    "0              n32w096/",
+    "0              n32w097/",
+    "0              n32w098/",
+    "0              n32w099/",
+    "0              n32w100/",
+    "0              n32w101/",
+    "0              n32w102/",
+    "0              n32w103/",
+    "0              n32w104/",
+    "0              n32w105/",
+    "0              n32w106/",
+    "0              n32w107/",
+    "0              n32w108/",
+    "0              n32w109/",
+    "0              n32w110/",
+    "0              n32w111/",
+    "0              n32w112/",
+    "0              n32w113/",
+    "0              n32w114/",
+    "0              n33w080/",
+    "0              n33w081/",
+    "0              n33w082/",
+    "0              n33w083/",
+    "0              n33w084/",
+    "0              n33w085/",
+    "0              n33w086/",
+    "0              n33w087/",
+    "0              n33w088/",
+    "0              n33w089/",
+    "0              n33w090/",
+    "0              n33w091/",
+    "0              n33w092/",
+    "0              n33w093/",
+    "0              n33w094/",
+    "0              n33w095/",
+    "0              n33w096/",
+    "0              n33w097/",
+    "0              n33w098/",
+    "0              n33w099/",
+    "0              n33w100/",
+    "0              n33w101/",
+    "0              n33w102/",
+    "0              n33w103/",
+    "0              n33w104/",
+    "0              n33w105/",
+    "0              n33w106/",
+    "0              n33w107/",
+    "0              n33w108/",
+    "0              n33w109/",
+    "0              n33w110/",
+    "0              n33w111/",
+    "0              n33w112/",
+    "0              n33w113/",
+    "0              n33w114/",
+    "0              n33w115/",
+    "0              n33w116/",
+    "0              n33w117/",
+    "0              n33w118/",
+    "0              n33w119/",
+    "0              n34w078/",
+    "0              n34w079/",
+    "0              n34w080/",
+    "0              n34w081/",
+    "0              n34w082/",
+    "0              n34w083/",
+    "0              n34w084/",
+    "0              n34w085/",
+    "0              n34w086/",
+    "0              n34w087/",
+    "0              n34w088/",
+    "0              n34w089/",
+    "0              n34w090/",
+    "0              n34w091/",
+    "0              n34w092/",
+    "0              n34w093/",
+    "0              n34w094/",
+    "0              n34w095/",
+    "0              n34w096/",
+    "0              n34w097/",
+    "0              n34w098/",
+    "0              n34w099/",
+    "0              n34w100/",
+    "0              n34w101/",
+    "0              n34w102/",
+    "0              n34w103/",
+    "0              n34w104/",
+    "0              n34w105/",
+    "0              n34w106/",
+    "0              n34w107/",
+    "0              n34w108/",
+    "0              n34w109/",
+    "0              n34w110/",
+    "0              n34w111/",
+    "0              n34w112/",
+    "0              n34w113/",
+    "0              n34w114/",
+    "0              n34w115/",
+    "0              n34w116/",
+    "0              n34w117/",
+    "0              n34w118/",
+    "0              n34w119/",
+    "0              n34w120/",
+    "0              n34w121/",
+    "0              n35w076/",
+    "0              n35w077/",
+    "0              n35w078/",
+    "0              n35w079/",
+    "0              n35w080/",
+    "0              n35w081/",
+    "0              n35w082/",
+    "0              n35w083/",
+    "0              n35w084/",
+    "0              n35w085/",
+    "0              n35w086/",
+    "0              n35w087/",
+    "0              n35w088/",
+    "0              n35w089/",
+    "0              n35w090/",
+    "0              n35w091/",
+    "0              n35w092/",
+    "0              n35w093/",
+    "0              n35w094/",
+    "0              n35w095/",
+    "0              n35w096/",
+    "0              n35w097/",
+    "0              n35w098/",
+    "0              n35w099/",
+    "0              n35w100/",
+    "0              n35w101/",
+    "0              n35w102/",
+    "0              n35w103/",
+    "0              n35w104/",
+    "0              n35w105/",
+    "0              n35w106/",
+    "0              n35w107/",
+    "0              n35w108/",
+    "0              n35w109/",
+    "0              n35w110/",
+    "0              n35w111/",
+    "0              n35w112/",
+    "0              n35w113/",
+    "0              n35w114/",
+    "0              n35w115/",
+    "0              n35w116/",
+    "0              n35w117/",
+    "0              n35w118/",
+    "0              n35w119/",
+    "0              n35w120/",
+    "0              n35w121/",
+    "0              n36w076/",
+    "0              n36w077/",
+    "0              n36w078/",
+    "0              n36w079/",
+    "0              n36w080/",
+    "0              n36w081/",
+    "0              n36w082/",
+    "0              n36w083/",
+    "0              n36w084/",
+    "0              n36w085/",
+    "0              n36w086/",
+    "0              n36w087/",
+    "0              n36w088/",
+    "0              n36w089/",
+    "0              n36w090/",
+    "0              n36w091/",
+    "0              n36w092/",
+    "0              n36w093/",
+    "0              n36w094/",
+    "0              n36w095/",
+    "0              n36w096/",
+    "0              n36w097/",
+    "0              n36w098/",
+    "0              n36w099/",
+    "0              n36w100/",
+    "0              n36w101/",
+    "0              n36w102/",
+    "0              n36w103/",
+    "0              n36w104/",
+    "0              n36w105/",
+    "0              n36w106/",
+    "0              n36w107/",
+    "0              n36w108/",
+    "0              n36w109/",
+    "0              n36w110/",
+    "0              n36w111/",
+    "0              n36w112/",
+    "0              n36w113/",
+    "0              n36w114/",
+    "0              n36w115/",
+    "0              n36w116/",
+    "0              n36w117/",
+    "0              n36w118/",
+    "0              n36w119/",
+    "0              n36w120/",
+    "0              n36w121/",
+    "0              n36w122/",
+    "0              n37w076/",
+    "0              n37w077/",
+    "0              n37w078/",
+    "0              n37w079/",
+    "0              n37w080/",
+    "0              n37w081/",
+    "0              n37w082/",
+    "0              n37w083/",
+    "0              n37w084/",
+    "0              n37w085/",
+    "0              n37w086/",
+    "0              n37w087/",
+    "0              n37w088/",
+    "0              n37w089/",
+    "0              n37w090/",
+    "0              n37w091/",
+    "0              n37w092/",
+    "0              n37w093/",
+    "0              n37w094/",
+    "0              n37w095/",
+    "0              n37w096/",
+    "0              n37w097/",
+    "0              n37w098/",
+    "0              n37w099/",
+    "0              n37w100/",
+    "0              n37w101/",
+    "0              n37w102/",
+    "0              n37w103/",
+    "0              n37w104/",
+    "0              n37w105/",
+    "0              n37w106/",
+    "0              n37w107/",
+    "0              n37w108/",
+    "0              n37w109/",
+    "0              n37w110/",
+    "0              n37w111/",
+    "0              n37w112/",
+    "0              n37w113/",
+    "0              n37w114/",
+    "0              n37w115/",
+    "0              n37w116/",
+    "0              n37w117/",
+    "0              n37w118/",
+    "0              n37w119/",
+    "0              n37w120/",
+    "0              n37w121/",
+    "0              n37w122/",
+    "0              n37w123/",
+    "0              n38w076/",
+    "0              n38w077/",
+    "0              n38w078/",
+    "0              n38w079/",
+    "0              n38w080/",
+    "0              n38w081/",
+    "0              n38w082/",
+    "0              n38w083/",
+    "0              n38w084/",
+    "0              n38w085/",
+    "0              n38w086/",
+    "0              n38w087/",
+    "0              n38w088/",
+    "0              n38w089/",
+    "0              n38w090/",
+    "0              n38w091/",
+    "0              n38w092/",
+    "0              n38w093/",
+    "0              n38w094/",
+    "0              n38w095/",
+    "0              n38w096/",
+    "0              n38w097/",
+    "0              n38w098/",
+    "0              n38w099/",
+    "0              n38w100/",
+    "0              n38w101/",
+    "0              n38w102/",
+    "0              n38w103/",
+    "0              n38w104/",
+    "0              n38w105/",
+    "0              n38w106/",
+    "0              n38w107/",
+    "0              n38w108/",
+    "0              n38w109/",
+    "0              n38w110/",
+    "0              n38w111/",
+    "0              n38w112/",
+    "0              n38w113/",
+    "0              n38w114/",
+    "0              n38w115/",
+    "0              n38w116/",
+    "0              n38w117/",
+    "0              n38w118/",
+    "0              n38w119/",
+    "0              n38w120/",
+    "0              n38w121/",
+    "0              n38w122/",
+    "0              n38w123/",
+    "0              n38w124/",
+    "0              n39w075/",
+    "0              n39w076/",
+    "0              n39w077/",
+    "0              n39w078/",
+    "0              n39w079/",
+    "0              n39w080/",
+    "0              n39w081/",
+    "0              n39w082/",
+    "0              n39w083/",
+    "0              n39w084/",
+    "0              n39w085/",
+    "0              n39w086/",
+    "0              n39w087/",
+    "0              n39w088/",
+    "0              n39w089/",
+    "0              n39w090/",
+    "0              n39w091/",
+    "0              n39w092/",
+    "0              n39w093/",
+    "0              n39w094/",
+    "0              n39w095/",
+    "0              n39w096/",
+    "0              n39w097/",
+    "0              n39w098/",
+    "0              n39w099/",
+    "0              n39w100/",
+    "0              n39w101/",
+    "0              n39w102/",
+    "0              n39w103/",
+    "0              n39w104/",
+    "0              n39w105/",
+    "0              n39w106/",
+    "0              n39w107/",
+    "0              n39w108/",
+    "0              n39w109/",
+    "0              n39w110/",
+    "0              n39w111/",
+    "0              n39w112/",
+    "0              n39w113/",
+    "0              n39w114/",
+    "0              n39w115/",
+    "0              n39w116/",
+    "0              n39w117/",
+    "0              n39w118/",
+    "0              n39w119/",
+    "0              n39w120/",
+    "0              n39w121/",
+    "0              n39w122/",
+    "0              n39w123/",
+    "0              n39w124/",
+    "0              n40w075/",
+    "0              n40w076/",
+    "0              n40w077/",
+    "0              n40w078/",
+    "0              n40w079/",
+    "0              n40w080/",
+    "0              n40w081/",
+    "0              n40w082/",
+    "0              n40w083/",
+    "0              n40w084/",
+    "0              n40w085/",
+    "0              n40w086/",
+    "0              n40w087/",
+    "0              n40w088/",
+    "0              n40w089/",
+    "0              n40w090/",
+    "0              n40w091/",
+    "0              n40w092/",
+    "0              n40w093/",
+    "0              n40w094/",
+    "0              n40w095/",
+    "0              n40w096/",
+    "0              n40w097/",
+    "0              n40w098/",
+    "0              n40w099/",
+    "0              n40w100/",
+    "0              n40w101/",
+    "0              n40w102/",
+    "0              n40w103/",
+    "0              n40w104/",
+    "0              n40w105/",
+    "0              n40w106/",
+    "0              n40w107/",
+    "0              n40w108/",
+    "0              n40w109/",
+    "0              n40w110/",
+    "0              n40w111/",
+    "0              n40w112/",
+    "0              n40w113/",
+    "0              n40w114/",
+    "0              n40w115/",
+    "0              n40w116/",
+    "0              n40w117/",
+    "0              n40w118/",
+    "0              n40w119/",
+    "0              n40w120/",
+    "0              n40w121/",
+    "0              n40w122/",
+    "0              n40w123/",
+    "0              n40w124/",
+    "0              n40w125/",
+    "0              n41w073/",
+    "0              n41w074/",
+    "0              n41w075/",
+    "0              n41w076/",
+    "0              n41w077/",
+    "0              n41w078/",
+    "0              n41w079/",
+    "0              n41w080/",
+    "0              n41w081/",
+    "0              n41w082/",
+    "0              n41w083/",
+    "0              n41w084/",
+    "0              n41w085/",
+    "0              n41w086/",
+    "0              n41w087/",
+    "0              n41w088/",
+    "0              n41w089/",
+    "0              n41w090/",
+    "0              n41w091/",
+    "0              n41w092/",
+    "0              n41w093/",
+    "0              n41w094/",
+    "0              n41w095/",
+    "0              n41w096/",
+    "0              n41w097/",
+    "0              n41w098/",
+    "0              n41w099/",
+    "0              n41w100/",
+    "0              n41w101/",
+    "0              n41w102/",
+    "0              n41w103/",
+    "0              n41w104/",
+    "0              n41w105/",
+    "0              n41w106/",
+    "0              n41w107/",
+    "0              n41w108/",
+    "0              n41w109/",
+    "0              n41w110/",
+    "0              n41w111/",
+    "0              n41w112/",
+    "0              n41w113/",
+    "0              n41w114/",
+    "0              n41w115/",
+    "0              n41w116/",
+    "0              n41w117/",
+    "0              n41w118/",
+    "0              n41w119/",
+    "0              n41w120/",
+    "0              n41w121/",
+    "0              n41w122/",
+    "0              n41w123/",
+    "0              n41w124/",
+    "0              n41w125/",
+    "0              n42w070/",
+    "0              n42w071/",
+    "0              n42w072/",
+    "0              n42w073/",
+    "0              n42w074/",
+    "0              n42w075/",
+    "0              n42w076/",
+    "0              n42w077/",
+    "0              n42w078/",
+    "0              n42w079/",
+    "0              n42w080/",
+    "0              n42w081/",
+    "0              n42w082/",
+    "0              n42w083/",
+    "0              n42w084/",
+    "0              n42w085/",
+    "0              n42w086/",
+    "0              n42w087/",
+    "0              n42w088/",
+    "0              n42w089/",
+    "0              n42w090/",
+    "0              n42w091/",
+    "0              n42w092/",
+    "0              n42w093/",
+    "0              n42w094/",
+    "0              n42w095/",
+    "0              n42w096/",
+    "0              n42w097/",
+    "0              n42w098/",
+    "0              n42w099/",
+    "0              n42w100/",
+    "0              n42w101/",
+    "0              n42w102/",
+    "0              n42w103/",
+    "0              n42w104/",
+    "0              n42w105/",
+    "0              n42w106/",
+    "0              n42w107/",
+    "0              n42w108/",
+    "0              n42w109/",
+    "0              n42w110/",
+    "0              n42w111/",
+    "0              n42w112/",
+    "0              n42w113/",
+    "0              n42w114/",
+    "0              n42w115/",
+    "0              n42w116/",
+    "0              n42w117/",
+    "0              n42w118/",
+    "0              n42w119/",
+    "0              n42w120/",
+    "0              n42w121/",
+    "0              n42w122/",
+    "0              n42w123/",
+    "0              n42w124/",
+    "0              n42w125/",
+    "0              n43w071/",
+    "0              n43w072/",
+    "0              n43w073/",
+    "0              n43w074/",
+    "0              n43w075/",
+    "0              n43w076/",
+    "0              n43w077/",
+    "0              n43w078/",
+    "0              n43w079/",
+    "0              n43w080/",
+    "0              n43w081/",
+    "0              n43w082/",
+    "0              n43w083/",
+    "0              n43w084/",
+    "0              n43w085/",
+    "0              n43w086/",
+    "0              n43w087/",
+    "0              n43w088/",
+    "0              n43w089/",
+    "0              n43w090/",
+    "0              n43w091/",
+    "0              n43w092/",
+    "0              n43w093/",
+    "0              n43w094/",
+    "0              n43w095/",
+    "0              n43w096/",
+    "0              n43w097/",
+    "0              n43w098/",
+    "0              n43w099/",
+    "0              n43w100/",
+    "0              n43w101/",
+    "0              n43w102/",
+    "0              n43w103/",
+    "0              n43w104/",
+    "0              n43w105/",
+    "0              n43w106/",
+    "0              n43w107/",
+    "0              n43w108/",
+    "0              n43w109/",
+    "0              n43w110/",
+    "0              n43w111/",
+    "0              n43w112/",
+    "0              n43w113/",
+    "0              n43w114/",
+    "0              n43w115/",
+    "0              n43w116/",
+    "0              n43w117/",
+    "0              n43w118/",
+    "0              n43w119/",
+    "0              n43w120/",
+    "0              n43w121/",
+    "0              n43w122/",
+    "0              n43w123/",
+    "0              n43w124/",
+    "0              n43w125/",
+    "0              n44w069/",
+    "0              n44w070/",
+    "0              n44w071/",
+    "0              n44w072/",
+    "0              n44w073/",
+    "0              n44w074/",
+    "0              n44w075/",
+    "0              n44w076/",
+    "0              n44w077/",
+    "0              n44w078/",
+    "0              n44w079/",
+    "0              n44w080/",
+    "0              n44w081/",
+    "0              n44w083/",
+    "0              n44w084/",
+    "0              n44w085/",
+    "0              n44w086/",
+    "0              n44w087/",
+    "0              n44w088/",
+    "0              n44w089/",
+    "0              n44w090/",
+    "0              n44w091/",
+    "0              n44w092/",
+    "0              n44w093/",
+    "0              n44w094/",
+    "0              n44w095/",
+    "0              n44w096/",
+    "0              n44w097/",
+    "0              n44w098/",
+    "0              n44w099/",
+    "0              n44w100/",
+    "0              n44w101/",
+    "0              n44w102/",
+    "0              n44w103/",
+    "0              n44w104/",
+    "0              n44w105/",
+    "0              n44w106/",
+    "0              n44w107/",
+    "0              n44w108/",
+    "0              n44w109/",
+    "0              n44w110/",
+    "0              n44w111/",
+    "0              n44w112/",
+    "0              n44w113/",
+    "0              n44w114/",
+    "0              n44w115/",
+    "0              n44w116/",
+    "0              n44w117/",
+    "0              n44w118/",
+    "0              n44w119/",
+    "0              n44w120/",
+    "0              n44w121/",
+    "0              n44w122/",
+    "0              n44w123/",
+    "0              n44w124/",
+    "0              n44w125/",
+    "0              n45w067/",
+    "0              n45w068/",
+    "0              n45w069/",
+    "0              n45w070/",
+    "0              n45w071/",
+    "0              n45w072/",
+    "0              n45w073/",
+    "0              n45w074/",
+    "0              n45w075/",
+    "0              n45w076/",
+    "0              n45w077/",
+    "0              n45w083/",
+    "0              n45w084/",
+    "0              n45w085/",
+    "0              n45w086/",
+    "0              n45w087/",
+    "0              n45w088/",
+    "0              n45w089/",
+    "0              n45w090/",
+    "0              n45w091/",
+    "0              n45w092/",
+    "0              n45w093/",
+    "0              n45w094/",
+    "0              n45w095/",
+    "0              n45w096/",
+    "0              n45w097/",
+    "0              n45w098/",
+    "0              n45w099/",
+    "0              n45w100/",
+    "0              n45w101/",
+    "0              n45w102/",
+    "0              n45w103/",
+    "0              n45w104/",
+    "0              n45w105/",
+    "0              n45w106/",
+    "0              n45w107/",
+    "0              n45w108/",
+    "0              n45w109/",
+    "0              n45w110/",
+    "0              n45w111/",
+    "0              n45w112/",
+    "0              n45w113/",
+    "0              n45w114/",
+    "0              n45w115/",
+    "0              n45w116/",
+    "0              n45w117/",
+    "0              n45w118/",
+    "0              n45w119/",
+    "0              n45w120/",
+    "0              n45w121/",
+    "0              n45w122/",
+    "0              n45w123/",
+    "0              n45w124/",
+    "0              n45w125/",
+    "0              n46w068/",
+    "0              n46w069/",
+    "0              n46w070/",
+    "0              n46w071/",
+    "0              n46w072/",
+    "0              n46w073/",
+    "0              n46w074/",
+    "0              n46w075/",
+    "0              n46w084/",
+    "0              n46w085/",
+    "0              n46w086/",
+    "0              n46w087/",
+    "0              n46w088/",
+    "0              n46w089/",
+    "0              n46w090/",
+    "0              n46w091/",
+    "0              n46w092/",
+    "0              n46w093/",
+    "0              n46w094/",
+    "0              n46w095/",
+    "0              n46w096/",
+    "0              n46w097/",
+    "0              n46w098/",
+    "0              n46w099/",
+    "0              n46w100/",
+    "0              n46w101/",
+    "0              n46w102/",
+    "0              n46w103/",
+    "0              n46w104/",
+    "0              n46w105/",
+    "0              n46w106/",
+    "0              n46w107/",
+    "0              n46w108/",
+    "0              n46w109/",
+    "0              n46w110/",
+    "0              n46w111/",
+    "0              n46w112/",
+    "0              n46w113/",
+    "0              n46w114/",
+    "0              n46w115/",
+    "0              n46w116/",
+    "0              n46w117/",
+    "0              n46w118/",
+    "0              n46w119/",
+    "0              n46w120/",
+    "0              n46w121/",
+    "0              n46w122/",
+    "0              n46w123/",
+    "0              n46w124/",
+    "0              n46w125/",
+    "0              n47w068/",
+    "0              n47w069/",
+    "0              n47w070/",
+    "0              n47w071/",
+    "0              n47w084/",
+    "0              n47w085/",
+    "0              n47w086/",
+    "0              n47w087/",
+    "0              n47w088/",
+    "0              n47w089/",
+    "0              n47w090/",
+    "0              n47w091/",
+    "0              n47w092/",
+    "0              n47w093/",
+    "0              n47w094/",
+    "0              n47w095/",
+    "0              n47w096/",
+    "0              n47w097/",
+    "0              n47w098/",
+    "0              n47w099/",
+    "0              n47w100/",
+    "0              n47w101/",
+    "0              n47w102/",
+    "0              n47w103/",
+    "0              n47w104/",
+    "0              n47w105/",
+    "0              n47w106/",
+    "0              n47w107/",
+    "0              n47w108/",
+    "0              n47w109/",
+    "0              n47w110/",
+    "0              n47w111/",
+    "0              n47w112/",
+    "0              n47w113/",
+    "0              n47w114/",
+    "0              n47w115/",
+    "0              n47w116/",
+    "0              n47w117/",
+    "0              n47w118/",
+    "0              n47w119/",
+    "0              n47w120/",
+    "0              n47w121/",
+    "0              n47w122/",
+    "0              n47w123/",
+    "0              n47w124/",
+    "0              n47w125/",
+    "0              n48w068/",
+    "0              n48w069/",
+    "0              n48w070/",
+    "0              n48w087/",
+    "0              n48w088/",
+    "0              n48w089/",
+    "0              n48w090/",
+    "0              n48w091/",
+    "0              n48w092/",
+    "0              n48w093/",
+    "0              n48w094/",
+    "0              n48w095/",
+    "0              n48w096/",
+    "0              n48w097/",
+    "0              n48w098/",
+    "0              n48w099/",
+    "0              n48w100/",
+    "0              n48w101/",
+    "0              n48w102/",
+    "0              n48w103/",
+    "0              n48w104/",
+    "0              n48w105/",
+    "0              n48w106/",
+    "0              n48w107/",
+    "0              n48w108/",
+    "0              n48w109/",
+    "0              n48w110/",
+    "0              n48w111/",
+    "0              n48w112/",
+    "0              n48w113/",
+    "0              n48w114/",
+    "0              n48w115/",
+    "0              n48w116/",
+    "0              n48w117/",
+    "0              n48w118/",
+    "0              n48w119/",
+    "0              n48w120/",
+    "0              n48w121/",
+    "0              n48w122/",
+    "0              n48w123/",
+    "0              n48w124/",
+    "0              n48w125/",
+    "0              n49w089/",
+    "0              n49w090/",
+    "0              n49w091/",
+    "0              n49w092/",
+    "0              n49w093/",
+    "0              n49w094/",
+    "0              n49w095/",
+    "0              n49w096/",
+    "0              n49w097/",
+    "0              n49w098/",
+    "0              n49w099/",
+    "0              n49w100/",
+    "0              n49w101/",
+    "0              n49w102/",
+    "0              n49w103/",
+    "0              n49w104/",
+    "0              n49w105/",
+    "0              n49w106/",
+    "0              n49w107/",
+    "0              n49w108/",
+    "0              n49w109/",
+    "0              n49w110/",
+    "0              n49w111/",
+    "0              n49w112/",
+    "0              n49w113/",
+    "0              n49w114/",
+    "0              n49w115/",
+    "0              n49w116/",
+    "0              n49w117/",
+    "0              n49w118/",
+    "0              n49w119/",
+    "0              n49w120/",
+    "0              n49w121/",
+    "0              n49w122/",
+    "0              n49w123/",
+    "0              n49w124/",
+    "0              n49w125/",
+    "0              n50w095/",
+    "0              n50w096/",
+    "0              n50w097/",
+    "0              n50w098/",
+    "0              n50w099/",
+    "0              n50w100/",
+    "0              n50w101/",
+    "0              n50w107/",
+    "0              n50w108/",
+    "0              n50w122/",
+    "0              n50w123/",
+    "0              n50w124/",
+    "0              n52e177/",
+    "0              n52e178/",
+    "0              n52e179/",
+    "0              n52w174/",
+    "0              n52w176/",
+    "0              n52w177/",
+    "0              n52w178/",
+    "0              n52w179/",
+    "0              n52w180/",
+    "0              n53e172/",
+    "0              n53e173/",
+    "0              n53e174/",
+    "0              n53e175/",
+    "0              n53e177/",
+    "0              n53e178/",
+    "0              n53e179/",
+    "0              n53w169/",
+    "0              n53w170/",
+    "0              n53w171/",
+    "0              n53w172/",
+    "0              n53w173/",
+    "0              n53w174/",
+    "0              n53w175/",
+    "0              n53w176/",
+    "0              n53w177/",
+    "0              n54e172/",
+    "0              n54e173/",
+    "0              n54w167/",
+    "0              n54w168/",
+    "0              n54w169/",
+    "0              n54w170/",
+    "0              n55w131/",
+    "0              n55w132/",
+    "0              n55w133/",
+    "0              n55w134/",
+    "0              n55w160/",
+    "0              n55w161/",
+    "0              n55w162/",
+    "0              n55w163/",
+    "0              n55w164/",
+    "0              n55w165/",
+    "0              n55w166/",
+    "0              n55w167/",
+    "0              n56w130/",
+    "0              n56w131/",
+    "0              n56w132/",
+    "0              n56w133/",
+    "0              n56w134/",
+    "0              n56w135/",
+    "0              n56w156/",
+    "0              n56w157/",
+    "0              n56w159/",
+    "0              n56w160/",
+    "0              n56w161/",
+    "0              n56w162/",
+    "0              n56w163/",
+    "0              n56w164/",
+    "0              n57w131/",
+    "0              n57w132/",
+    "0              n57w133/",
+    "0              n57w134/",
+    "0              n57w135/",
+    "0              n57w136/",
+    "0              n57w153/",
+    "0              n57w154/",
+    "0              n57w155/",
+    "0              n57w156/",
+    "0              n57w157/",
+    "0              n57w158/",
+    "0              n57w159/",
+    "0              n57w160/",
+    "0              n57w161/",
+    "0              n57w162/",
+    "0              n57w170/",
+    "0              n57w171/",
+    "0              n58w133/",
+    "0              n58w134/",
+    "0              n58w135/",
+    "0              n58w136/",
+    "0              n58w137/",
+    "0              n58w153/",
+    "0              n58w154/",
+    "0              n58w155/",
+    "0              n58w156/",
+    "0              n58w157/",
+    "0              n58w158/",
+    "0              n58w159/",
+    "0              n58w170/",
+    "0              n58w171/",
+    "0              n59w134/",
+    "0              n59w135/",
+    "0              n59w136/",
+    "0              n59w137/",
+    "0              n59w138/",
+    "0              n59w139/",
+    "0              n59w152/",
+    "0              n59w153/",
+    "0              n59w154/",
+    "0              n59w155/",
+    "0              n59w156/",
+    "0              n59w157/",
+    "0              n59w158/",
+    "0              n59w159/",
+    "0              n59w160/",
+    "0              n59w161/",
+    "0              n59w162/",
+    "0              n59w163/",
+    "0              n60w135/",
+    "0              n60w136/",
+    "0              n60w137/",
+    "0              n60w138/",
+    "0              n60w139/",
+    "0              n60w140/",
+    "0              n60w141/",
+    "0              n60w142/",
+    "0              n60w143/",
+    "0              n60w144/",
+    "0              n60w145/",
+    "0              n60w146/",
+    "0              n60w147/",
+    "0              n60w148/",
+    "0              n60w149/",
+    "0              n60w150/",
+    "0              n60w151/",
+    "0              n60w152/",
+    "0              n60w153/",
+    "0              n60w154/",
+    "0              n60w155/",
+    "0              n60w156/",
+    "0              n60w157/",
+    "0              n60w158/",
+    "0              n60w159/",
+    "0              n60w160/",
+    "0              n60w161/",
+    "0              n60w162/",
+    "0              n60w163/",
+    "0              n60w164/",
+    "0              n60w165/",
+    "0              n60w166/",
+    "0              n60w167/",
+    "0              n60w168/",
+    "0              n61w140/",
+    "0              n61w141/",
+    "0              n61w142/",
+    "0              n61w143/",
+    "0              n61w144/",
+    "0              n61w145/",
+    "0              n61w146/",
+    "0              n61w147/",
+    "0              n61w148/",
+    "0              n61w149/",
+    "0              n61w150/",
+    "0              n61w151/",
+    "0              n61w152/",
+    "0              n61w153/",
+    "0              n61w154/",
+    "0              n61w155/",
+    "0              n61w156/",
+    "0              n61w157/",
+    "0              n61w158/",
+    "0              n61w159/",
+    "0              n61w160/",
+    "0              n61w161/",
+    "0              n61w162/",
+    "0              n61w163/",
+    "0              n61w164/",
+    "0              n61w165/",
+    "0              n61w166/",
+    "0              n61w167/",
+    "0              n61w168/",
+    "0              n61w173/",
+    "0              n61w174/",
+    "0              n62w142/",
+    "0              n62w143/",
+    "0              n62w144/",
+    "0              n62w145/",
+    "0              n62w146/",
+    "0              n62w147/",
+    "0              n62w148/",
+    "0              n62w149/",
+    "0              n62w150/",
+    "0              n62w151/",
+    "0              n62w152/",
+    "0              n62w153/",
+    "0              n62w154/",
+    "0              n62w155/",
+    "0              n62w156/",
+    "0              n62w157/",
+    "0              n62w158/",
+    "0              n62w159/",
+    "0              n62w160/",
+    "0              n62w161/",
+    "0              n62w162/",
+    "0              n62w163/",
+    "0              n62w164/",
+    "0              n62w165/",
+    "0              n62w166/",
+    "0              n62w167/",
+    "0              n63w142/",
+    "0              n63w143/",
+    "0              n63w144/",
+    "0              n63w145/",
+    "0              n63w146/",
+    "0              n63w147/",
+    "0              n63w148/",
+    "0              n63w149/",
+    "0              n63w150/",
+    "0              n63w151/",
+    "0              n63w152/",
+    "0              n63w153/",
+    "0              n63w154/",
+    "0              n63w155/",
+    "0              n63w156/",
+    "0              n63w157/",
+    "0              n63w158/",
+    "0              n63w159/",
+    "0              n63w160/",
+    "0              n63w161/",
+    "0              n63w162/",
+    "0              n63w163/",
+    "0              n63w164/",
+    "0              n63w165/",
+    "0              n63w166/",
+    "0              n63w167/",
+    "0              n63w170/",
+    "0              n64w141/",
+    "0              n64w142/",
+    "0              n64w143/",
+    "0              n64w144/",
+    "0              n64w145/",
+    "0              n64w146/",
+    "0              n64w147/",
+    "0              n64w148/",
+    "0              n64w149/",
+    "0              n64w150/",
+    "0              n64w151/",
+    "0              n64w152/",
+    "0              n64w153/",
+    "0              n64w154/",
+    "0              n64w155/",
+    "0              n64w156/",
+    "0              n64w157/",
+    "0              n64w158/",
+    "0              n64w159/",
+    "0              n64w160/",
+    "0              n64w161/",
+    "0              n64w162/",
+    "0              n64w163/",
+    "0              n64w164/",
+    "0              n64w165/",
+    "0              n64w169/",
+    "0              n64w170/",
+    "0              n64w171/",
+    "0              n64w172/",
+    "0              n65w141/",
+    "0              n65w142/",
+    "0              n65w143/",
+    "0              n65w144/",
+    "0              n65w145/",
+    "0              n65w146/",
+    "0              n65w147/",
+    "0              n65w148/",
+    "0              n65w149/",
+    "0              n65w150/",
+    "0              n65w151/",
+    "0              n65w152/",
+    "0              n65w153/",
+    "0              n65w154/",
+    "0              n65w155/",
+    "0              n65w156/",
+    "0              n65w157/",
+    "0              n65w158/",
+    "0              n65w159/",
+    "0              n65w160/",
+    "0              n65w161/",
+    "0              n65w162/",
+    "0              n65w163/",
+    "0              n65w164/",
+    "0              n65w165/",
+    "0              n65w166/",
+    "0              n65w167/",
+    "0              n66w141/",
+    "0              n66w142/",
+    "0              n66w143/",
+    "0              n66w144/",
+    "0              n66w145/",
+    "0              n66w146/",
+    "0              n66w147/",
+    "0              n66w148/",
+    "0              n66w149/",
+    "0              n66w150/",
+    "0              n66w151/",
+    "0              n66w152/",
+    "0              n66w153/",
+    "0              n66w154/",
+    "0              n66w155/",
+    "0              n66w156/",
+    "0              n66w157/",
+    "0              n66w158/",
+    "0              n66w159/",
+    "0              n66w160/",
+    "0              n66w161/",
+    "0              n66w162/",
+    "0              n66w163/",
+    "0              n66w164/",
+    "0              n66w165/",
+    "0              n66w166/",
+    "0              n66w167/",
+    "0              n66w168/",
+    "0              n66w169/",
+    "0              n67w141/",
+    "0              n67w142/",
+    "0              n67w143/",
+    "0              n67w144/",
+    "0              n67w145/",
+    "0              n67w146/",
+    "0              n67w147/",
+    "0              n67w148/",
+    "0              n67w149/",
+    "0              n67w150/",
+    "0              n67w151/",
+    "0              n67w152/",
+    "0              n67w153/",
+    "0              n67w154/",
+    "0              n67w155/",
+    "0              n67w156/",
+    "0              n67w157/",
+    "0              n67w158/",
+    "0              n67w159/",
+    "0              n67w160/",
+    "0              n67w161/",
+    "0              n67w162/",
+    "0              n67w163/",
+    "0              n67w164/",
+    "0              n67w165/",
+    "0              n67w166/",
+    "0              n67w167/",
+    "0              n67w168/",
+    "0              n68w141/",
+    "0              n68w142/",
+    "0              n68w143/",
+    "0              n68w144/",
+    "0              n68w145/",
+    "0              n68w146/",
+    "0              n68w147/",
+    "0              n68w148/",
+    "0              n68w149/",
+    "0              n68w150/",
+    "0              n68w151/",
+    "0              n68w152/",
+    "0              n68w153/",
+    "0              n68w154/",
+    "0              n68w155/",
+    "0              n68w156/",
+    "0              n68w157/",
+    "0              n68w158/",
+    "0              n68w159/",
+    "0              n68w160/",
+    "0              n68w161/",
+    "0              n68w162/",
+    "0              n68w163/",
+    "0              n68w164/",
+    "0              n68w165/",
+    "0              n68w166/",
+    "0              n69w141/",
+    "0              n69w142/",
+    "0              n69w143/",
+    "0              n69w144/",
+    "0              n69w145/",
+    "0              n69w146/",
+    "0              n69w147/",
+    "0              n69w148/",
+    "0              n69w149/",
+    "0              n69w150/",
+    "0              n69w151/",
+    "0              n69w152/",
+    "0              n69w153/",
+    "0              n69w154/",
+    "0              n69w155/",
+    "0              n69w156/",
+    "0              n69w157/",
+    "0              n69w158/",
+    "0              n69w159/",
+    "0              n69w160/",
+    "0              n69w161/",
+    "0              n69w162/",
+    "0              n69w163/",
+    "0              n69w164/",
+    "0              n69w165/",
+    "0              n69w166/",
+    "0              n69w167/",
+    "0              n70w141/",
+    "0              n70w142/",
+    "0              n70w143/",
+    "0              n70w144/",
+    "0              n70w145/",
+    "0              n70w146/",
+    "0              n70w147/",
+    "0              n70w148/",
+    "0              n70w149/",
+    "0              n70w150/",
+    "0              n70w151/",
+    "0              n70w152/",
+    "0              n70w153/",
+    "0              n70w154/",
+    "0              n70w155/",
+    "0              n70w156/",
+    "0              n70w157/",
+    "0              n70w158/",
+    "0              n70w159/",
+    "0              n70w160/",
+    "0              n70w161/",
+    "0              n70w162/",
+    "0              n70w163/",
+    "0              n70w164/",
+    "0              n70w165/",
+    "0              n71w143/",
+    "0              n71w144/",
+    "0              n71w145/",
+    "0              n71w146/",
+    "0              n71w147/",
+    "0              n71w148/",
+    "0              n71w149/",
+    "0              n71w150/",
+    "0              n71w151/",
+    "0              n71w152/",
+    "0              n71w153/",
+    "0              n71w154/",
+    "0              n71w155/",
+    "0              n71w156/",
+    "0              n71w157/",
+    "0              n71w158/",
+    "0              n71w159/",
+    "0              n71w160/",
+    "0              n71w161/",
+    "0              n71w162/",
+    "0              n71w163/",
+    "0              n71w164/",
+    "0              n72w155/",
+    "0              n72w156/",
+    "0              n72w157/",
+    "0              n72w158/",
+    "0              s14w170/",
+    "0              s14w171/"
+  )
+  
+  
+  # sub out HTML copy pattern with vsi URL
+  t2 <- gsub("0              ",
+             "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/",
+             t)
+  
+  # add file paths following NED scheme
+  t3 <- paste0(t2,
+               "USGS_13_",
+               basename(t2),
+               ".tif")
+  
+  # Write table to data-raw
+  write.table(t3,
+              paste0(base_dir, "/ned_list_USGS_13.txt"),
+              row.names = FALSE,
+              col.names = FALSE,
+              quote = FALSE)
+  
+  
+}
+# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem"
+# download_3dep_vrt(dem_base_dir)
+# 
+# # Create meta data object of three NED resoruces
+# ned <- data.frame(rbind(
+#   # c(id        = "USGS_1",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1",
+#   #   varname   = "30m elevation",
+#   #   long_name = "30m (1 arcsec) National Elevation Dataset",
+#   #   units     = "m"),
+#   # 
+#   # c(id        = "USGS_2",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2",
+#   #   varname   = "60m elevation",
+#   #   long_name = "60m (2 arcsec) National Elevation Dataset Alaska",
+#   #   units     = "m"),
+#   
+#   c(id        = "USGS_13",
+#     URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13",
+#     varname   = "10m elevation",
+#     long_name = "10m (1/3th arcsec) National Elevation Dataset",
+#     units     = "m")
+# ))
+# 
+# 
+# 
+# 
+# # Loop over the three resolutions
+# for (i in 1:length(ned)) {
+#   i = 1
+#   
+#   # Define output text file path
+#   txt_file  <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt")
+#   
+#   # Define output VRT path
+#   vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt"))
+#   
+#   # If VRT does NOT exist, build VRT
+#   if (!file.exists(vrt_file)) {
+#     
+#     # read the corresponding index.gpkg
+#     files <- sf::read_sf(ned$domain_url[i])
+#     DEM_UR
+#     DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     # Build full HTTPS paths to "./current/"
+#     files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location)))
+#     
+#     files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     
+#     # write list of files to text file
+#     write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
+#     
+#     # build VRT from text file input using GDAL system call ...
+#     system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file))
+#   }
+#   
+#   logger::log_info("Finished ", ned$id[i], "...")
+# }
+
+# 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>% 
+#     #   dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     # 
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         crosswalk_id = crosswalk_id,
+#         pct        = extension_pct,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>%  
+#     #   dplyr::rename(!!sym(crosswalk_id) := hy_id)
+#     
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+# utility function for getting transects extended and 
+# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
+# returns the extended version of the transects 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>% 
+#       dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         pct        = EXTENSION_PCT,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%  
+#       dplyr::rename(!!sym(crosswalk_id) := hy_id)
+# 
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
 
 
diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R
new file mode 100644
index 0000000..79792c2
--- /dev/null
+++ b/runners/cs_runner2/base_variables.R
@@ -0,0 +1,396 @@
+### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ###
+# load utils 
+source("runners/cs_runner2/utils.R")
+
+# ---------------------------------------------------------------------------------
+# ---- General paths and constants variables ----
+# - edit to match your local environment
+# - BASE_DIR: base directory for local file storage
+# - AWS_PROFILE: AWS profile to run CLI commands
+# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for
+# ---------------------------------------------------------------------------------
+# Base directory for local file storage
+BASE_DIR    <- '/Volumes/T7SSD/lynker-spatial'
+# BASE_DIR           <- '/Users/anguswatters/Desktop/lynker-spatial'
+
+BASE_DIRS_LIST   <- get_base_dir_paths(BASE_DIR)
+
+# AWS profile to run CLI commands 
+AWS_PROFILE        <- "angus-lynker"
+
+# S3 prefix/folder of version run
+VERSION            <- "v3.0"
+CONUS_VERSION      <- "v2.2"
+
+VERSION_DIRS_LIST            <- get_version_base_dir_paths(BASE_DIR, VERSION)
+
+# string to fill in "CS_SOURCE" column in output datasets
+CS_SOURCE          <- "hydrofabric3D"
+
+# -------------------------------------------------------------------------------------
+# ---- S3 BUCKET NAMES ----
+# -------------------------------------------------------------------------------------
+
+# name of bucket with nextgen data
+LYNKER_SPATIAL_S3_BUCKET_NAME   <- "lynker-spatial"
+LYNKER_SPATIAL_HF_S3_PREFIX     <- "hydrofabric"
+
+# AWS S3 bucket URI 
+LYNKER_SPATIAL_BASE_S3_URI  <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/")
+LYNKER_SPATIAL_HF_S3_URI    <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/")
+
+# name of bucket with nextgen data
+LYNKER_HF_S3_BUCKET_NAME        <- "lynker-hydrofabric"
+
+# -------------------------------------------------------------------------------------
+# ---- VPU IDs  ----
+# -------------------------------------------------------------------------------------
+VPU_IDS              <- c('01', '02', '03N', '03S', '03W', 
+                          '04', '05', '06', '07', '08', '09', 
+                          '10L', '10U', '11', '12', '13', '14', 
+                          '15', '16', '17', '18', '20', '21')
+# VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS NEXTGEN  ----
+# -------------------------------------------------------------------------------------
+
+CONUS_NEXTGEN_S3_URI    <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_nextgen.gpkg")
+CONUS_NEXTGEN_GPKG_PATH <- file.path(VERSION_DIRS_LIST$network_dir, "conus_nextgen.gpkg")
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS REFEREMCE FEATURES  ----
+# -------------------------------------------------------------------------------------
+
+CONUS_REF_FEATURES_S3_URI    <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_reference.gpkg")
+CONUS_REF_FEATURES_GPKG_PATH <- file.path(VERSION_DIRS_LIST$ref_features_dir, "conus_reference.gpkg")
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS ML DATA  ----
+# -------------------------------------------------------------------------------------
+
+CONUS_ML_S3_URI          <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/bathymetry/ml_auxiliary_data.parquet")
+CONUS_ML_PARQUET_PATH    <- file.path(VERSION_DIRS_LIST$ml_dir, "ml_auxiliary_data.parquet")
+
+# -------------------------------------------------------------------------------------
+# ----  FEMA 100 year flood plain data  ----
+# -------------------------------------------------------------------------------------
+
+LYNKER_HF_FEMA_S3_PREFIX        <- "FEMA100"
+LYNKER_HF_FEMA_S3_URI           <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/")
+
+FEMA_FGB_PATH       = BASE_DIRS_LIST$fema_fgb_dir
+FEMA_GEOJSON_PATH   = BASE_DIRS_LIST$fema_geojson_dir
+FEMA_CLEAN_PATH     = BASE_DIRS_LIST$fema_clean_dir
+FEMA_GPKG_PATH      = BASE_DIRS_LIST$fema_gpkg_dir
+FEMA_BY_VPU_PATH    = BASE_DIRS_LIST$fema_by_vpu_dir
+FEMA_VPU_SUBFOLDERS = BASE_DIRS_LIST$fema_by_vpu_subdirs
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS CS extension polygons ----
+# Data derived from FEMA 100 year flood plain data, puts all FEMA VPU datasets into a 
+# single geopackage for CONUS
+# -------------------------------------------------------------------------------------
+
+CS_EXTENSION_POLYGONS_DIR  <- BASE_DIRS_LIST$cs_extension_polygons_dir
+CONUS_FEMA_GPKG_PATH       <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg')
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS CS extension polygons ----
+# -------------------------------------------------------------------------------------
+
+DEM_DIR        <- BASE_DIRS_LIST$dem_dir
+DEM_VRT_DIR    <- BASE_DIRS_LIST$dem_vrt_dir
+DEM_TIF_DIR    <- BASE_DIRS_LIST$dem_tif_dir
+
+# DEM URL
+DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+# ----------------------------------------------------------------------------
+# ---- Cross section point extraction constant variables ----
+# ----------------------------------------------------------------------------
+
+# scale argument for cross_section_pts() function. 
+# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
+# Default setting is 50% of the original transect lines length (0.5)
+EXTENSION_PCT  <- 0.5
+
+# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
+# 1% of the cross sections length is the default value we are using 
+# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
+PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01
+
+# # -------------------------------------------------------------------------------------
+# # ---- Create local directory / path variables (FEMA data) ----
+# # -------------------------------------------------------------------------------------
+
+# LYNKER_HF_FEMA_S3_PREFIX        <- "FEMA100"
+# LYNKER_HF_FEMA_S3_URI           <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/")
+
+# # location of FEMA 100 year flood plain FGB files
+# FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
+# LYNKER_HF_FEMA_S3_PREFIX  <- "FEMA100/"
+# LYNKER_HF_FEMA_S3_URI            <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX)
+
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb")
+# FEMA_GEOJSON_PATH    <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson")
+# FEMA_CLEAN_PATH      <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean")
+# FEMA_GPKG_PATH       <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg")
+# FEMA_BY_VPU_PATH     <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU")
+
+# VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# CS_EXTENSION_POLYGONS_DIR  <- paste0(BASE_DIR, "/cs-extension-polygons")
+# CONUS_FEMA_GPKG_PATH       <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg')
+
+# DEM_DIR      <- BASE_DIRS_LIST$dem_dir
+# DEM_VRT_DIR  <- BASE_DIRS_LIST$dem_vrt_dir
+# DEM_TIF_DIR  <- BASE_DIRS_LIST$dem_tif_dir
+
+# # ----------------------------------------------------------------------------
+# # ---- Cross section point extraction constant variables ----
+# # ----------------------------------------------------------------------------
+
+# # DEM URL
+# DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+# # scale argument for cross_section_pts() function. 
+# # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
+# # Default setting is 50% of the original transect lines length (0.5)
+# EXTENSION_PCT  <- 0.5
+
+# # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
+# # 1% of the cross sections length is the default value we are using 
+# # (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
+# PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01
+
+
+# # -------------------------------------------------------------------------------------
+# # ---- S3 output directories -----
+# # - transects 
+# # - cross section points
+# # - ML cross section points
+# # -------------------------------------------------------------------------------------
+# 
+# # transect bucket prefix
+# S3_TRANSECTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
+# 
+# # cross section bucket prefix
+# S3_CS_PTS_DIR      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/")
+# 
+# # cross section bucket prefix
+# S3_CS_ML_PTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/")
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- S3 nextgen data paths / directories -----
+# # -------------------------------------------------------------------------------------
+# 
+# # the name of the folder in the S3 bucket with the nextgen data
+# S3_BUCKET_NEXTGEN_DIR       <- paste0(VERSION, "/gpkg/")
+# # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
+# 
+# # full URI to the S3 bucket folder with the nextgen data 
+# S3_BUCKET_NEXTGEN_DIR_URI   <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR)
+# 
+# # reference features S3 bucket prefix
+# S3_BUCKET_REF_FEATURES_URI  <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/")
+# # S3_BUCKET_REF_FEATURES_URI  <- "s3://lynker-spatial/00_reference_features/gpkg/"
+# 
+# # ----------------------------------------------------------------------------
+# # ---- Machine learning data path variables ----
+# # ----------------------------------------------------------------------------
+# 
+# ML_OUTPUTS_S3_FILE   <- "channel_ml_outputs.parquet"
+# 
+# # ML_OUTPUTS_S3_DIR    <- paste0(VERSION, "/3D/ml-outputs/")
+# # ML_OUTPUTS_S3_DIR  <- "v20.1/3D/ml-outputs/"
+# 
+# ML_OUTPUTS_S3_URI      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# # ML_OUTPUTS_S3_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE)
+# 
+# ML_OUTPUTS_PATH      <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# 
+# # path to the remote CONUS net parquet file
+# CONUS_NETWORK_FILE   <- "conus_net.parquet"
+# CONUS_NETWORK_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE)
+# 
+# # ----------------------------------------------------------------------------
+# 
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- Local directory / path variables ----
+# # -------------------------------------------------------------------------------------
+# 
+# # directory to copy nextgen bucket data too
+# NEXTGEN_DIR      <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
+# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/")
+# 
+# # # model attributes directory
+# # MODEL_ATTR_DIR   <- paste0(BASE_DIR, "/model_attributes/")
+# 
+# # cross-section data model data directories
+# TRANSECTS_DIR    <- paste0(BASE_DIR, "/01_transects/")
+# CS_PTS_DIR       <- paste0(BASE_DIR, "/02_cs_pts/")
+# 
+# # final output directory with geopackages per VPU
+# CS_OUTPUT_DIR    <- paste0(BASE_DIR, "/cross_sections/")
+# 
+# # directory to copy nextgen bucket data too
+# REF_FEATURES_DIR      <- paste0(BASE_DIR, "/00_reference_features/")
+# REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/")
+# 
+# # make a directory for the ML outputs data
+# ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
+# 
+# DEM_DIR      <- paste0(BASE_DIR, "/dem")
+# DEM_VRT_DIR  <- paste0(DEM_DIR, "/vrt")
+# DEM_TIF_DIR  <- paste0(DEM_DIR, "/tif")
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- Create local directory / path variables (FEMA data) ----
+# # -------------------------------------------------------------------------------------
+# 
+# # location of FEMA 100 year flood plain FGB files
+# FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
+# LYNKER_HF_FEMA_S3_PREFIX  <- "FEMA100/"
+# LYNKER_HF_FEMA_S3_URI            <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX)
+# 
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- paste0(BASE_DIR, "/FEMA100")
+# FEMA_GEOJSON_PATH    <- paste0(BASE_DIR, "/FEMA100_geojson")
+# FEMA_CLEAN_PATH      <- paste0(BASE_DIR, "/FEMA100_clean")
+# FEMA_GPKG_PATH       <- paste0(BASE_DIR, "/FEMA100_gpkg")
+# FEMA_GPKG_BB_PATH    <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
+# 
+# FEMA_BY_VPU_PATH     <- paste0(BASE_DIR, "/FEMA_BY_VPU")
+# VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+# 
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+# # FEMA_VPU_SUBFOLDERS <- paste0(
+# #                         FEMA_BY_VPU_PATH, "/VPU_",
+# #                         unlist(
+# #                           lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) {
+# #                             unlist(regmatches(vpu_file_names,  gregexpr("\\d+[A-Za-z]*", vpu_file_names)))})
+# #                           )
+# #                         )
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- OVERWRITE_FEMA_FILES constant logicals----
+# # ---- > if TRUE, processing steps will be run again 
+# #          and overwrite existing previously processed files
+# # TODO: Describe these variables
+# # -------------------------------------------------------------------------------------
+# 
+# # Default is TRUE (i.e. a fresh processing run is done from start to finish)
+# OVERWRITE_FEMA_FILES  <- TRUE
+# DELETE_STAGING_GPKGS  <- TRUE # remove intermediary files from the main output folder
+# 
+
+# # -------------------------------------------------------------------------------------
+# # ---- (New single domain) Local directory / path variables ----
+# # -------------------------------------------------------------------------------------
+# 
+# # directory for new domain data 
+# NEW_DOMAIN_DIRNAME  <- "new_domain"
+# NEW_DOMAIN_DIR      <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME)
+# 
+# NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines"
+# NEW_DOMAIN_FLOWLINES_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME)
+# 
+# NEW_DOMAIN_DEM_DIRNAME <- "dem"
+# NEW_DOMAIN_DEM_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME)
+# 
+# NEW_DOMAIN_FLOWLINES_FILE  <- "AllDiffusiveCombined.gpkg"
+# NEW_DOMAIN_FLOWLINES_PATH  <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE)
+# 
+# # # Local DEM file
+# # NEW_DOMAIN_DEM_FILE        <- "hi_dem.tif"
+# # NEW_DOMAIN_DEM_PATH        <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE)
+# 
+# # Remote DEM file
+# NEW_DOMAIN_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+# 
+# NEW_DOMAIN_TRANSECTS_DIRNAME         <- "transects"
+# NEW_DOMAIN_CS_PTS_DIRNAME            <- "cs_pts"
+# NEW_DOMAIN_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+# 
+# NEW_DOMAIN_TRANSECTS_DIR          <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME)
+# NEW_DOMAIN_CS_PTS_DIR             <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME)
+# NEW_DOMAIN_CROSS_SECTIONS_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME)
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- (New single domain) Local directory / path variables ----
+# # -------------------------------------------------------------------------------------
+# 
+# # directory for new domain data 
+# DOMAIN_WITH_FEMA_DIRNAME  <- "domain_with_fema"
+# DOMAIN_WITH_FEMA_DIR      <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines"
+# DOMAIN_WITH_FEMA_FLOWLINES_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_SUBSET_DIRNAME  <- "domain_subset"
+# DOMAIN_WITH_FEMA_SUBSET_DIR      <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_DEM_DIRNAME     <- "dem"
+# DOMAIN_WITH_FEMA_DEM_DIR         <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_FLOWLINES_FILE  <- "ls_conus.gpkg"
+# DOMAIN_WITH_FEMA_FLOWLINES_PATH  <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE)
+# 
+# # Geopackage containing area to subset flowlines to before processing
+# DOMAIN_WITH_FEMA_SUBSET_FILE     <- "AllDiffusiveCombined.gpkg"
+# DOMAIN_WITH_FEMA_SUBSET_PATH     <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE)
+# 
+# # # Local DEM file
+# # DOMAIN_WITH_FEMA_DEM_FILE        <- "hi_dem.tif"
+# # DOMAIN_WITH_FEMA_DEM_PATH        <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE)
+# 
+# # Remote DEM file
+# DOMAIN_WITH_FEMA_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+# 
+# DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME         <- "transects"
+# DOMAIN_WITH_FEMA_CS_PTS_DIRNAME            <- "cs_pts"
+# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+# DOMAIN_WITH_FEMA_OUTPUT_DIRNAME            <- "outputs"
+# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME       <- "vpu-subsets"
+# DOMAIN_WITH_FEMA_ML_DIRNAME                <- "ml"
+# # aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt
+# DOMAIN_WITH_FEMA_TRANSECTS_DIR          <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME)
+# DOMAIN_WITH_FEMA_CS_PTS_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME)
+# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME)
+# DOMAIN_WITH_FEMA_OUTPUT_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME)
+# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR        <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME)
+# DOMAIN_WITH_FEMA_ML_DIR                 <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME)
+# 
+# ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data")
+# # LYNKER_SPATIAL_BASE_S3_URI
+# LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI   <- "s3://lynker-hydrofabric/"
+# ML_BATHYMETRY_S3_DATA_DIR               <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/"
+# ML_BATHYMETRY_S3_URI                    <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR)
+# VPU_ML_BATHYMETRY_S3_DIRS               <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/")
+# 
+# COASTAL_BATHY_DEM_S3_DIR     <- "coastal_bathy/diffusive_domain/"
+# COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR)
+# # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+# 
+# 
+# # # # --------------------------------------------------------------------------
+# # # # ---- Get locations of diffusive domain DEM files in S3 ----
+# # # # --------------------------------------------------------------------------
+# # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+# # 
+# # # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, 
+# # #                                        list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)
+# # #                                        )
+# 
+# 
+
+
+
+
+
+
diff --git a/runners/cs_runner2/config_env.R b/runners/cs_runner2/config_env.R
new file mode 100644
index 0000000..a9b0b17
--- /dev/null
+++ b/runners/cs_runner2/config_env.R
@@ -0,0 +1,26 @@
+# load required packages
+pacman::p_load(
+  archive,
+  hydrofabric,
+  hydrofabric3D,
+  dplyr,
+  sf
+)
+
+# # install.packages("devtools")
+# devtools::install_github("anguswg-ucsb/hydrofabric3D")
+
+# load root directory 
+source("runners/cs_runner2/base_variables.R")
+source("runners/cs_runner2/utils.R")
+
+sf::sf_use_s2(FALSE)
+
+# create empty base directories 
+create_local_hydrofabric_base_dirs(base_dir = BASE_DIR)
+
+# create a new version directory
+create_new_version_dirs(base_dir = BASE_DIR, 
+                        version = VERSION, 
+                        with_output = TRUE)
+
diff --git a/runners/cs_runner2/download_conus_nextgen.R b/runners/cs_runner2/download_conus_nextgen.R
new file mode 100644
index 0000000..80de910
--- /dev/null
+++ b/runners/cs_runner2/download_conus_nextgen.R
@@ -0,0 +1,28 @@
+# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
+
+# load config variables
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Download conus_nextgen.gpkg 
+# ---------------------------------------------------------------------------
+
+copy_cmd <- paste0('aws s3 cp ', CONUS_NEXTGEN_S3_URI, " ", CONUS_NEXTGEN_GPKG_PATH)
+message("Copying S3 object:\n", CONUS_NEXTGEN_S3_URI)
+
+if (!file.exists(CONUS_NEXTGEN_GPKG_PATH)) {
+  tryCatch({
+      system(copy_cmd)
+      message("Download '", basename(CONUS_NEXTGEN_GPKG_PATH), "' complete!")
+      message("------------------")
+  }, error = function(e) {
+      message("Error downloading conus_nextgen.gpkg")
+      message(e)
+      stop()
+  })
+
+} else {
+  message("conus_nextgen.gpkg file already exists at\n > '", CONUS_NEXTGEN_GPKG_PATH, "'")
+}
diff --git a/runners/cs_runner2/download_conus_ref_features.R b/runners/cs_runner2/download_conus_ref_features.R
new file mode 100644
index 0000000..b6f6a06
--- /dev/null
+++ b/runners/cs_runner2/download_conus_ref_features.R
@@ -0,0 +1,31 @@
+# Running this script goes and pulls the desired CONUS NextGen geopackage dataset from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/cs_runner/base_variables.R
+
+# load config variables
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Download conus_reference.gpkg 
+# ---------------------------------------------------------------------------
+
+copy_cmd <- paste0('aws s3 cp ', CONUS_REF_FEATURES_S3_URI, " ", CONUS_REF_FEATURES_GPKG_PATH)
+message("Copying S3 object:\n", CONUS_REF_FEATURES_S3_URI)
+
+if (!file.exists(CONUS_REF_FEATURES_GPKG_PATH)) {
+  tryCatch({
+    system(copy_cmd)
+    message("Download '", basename(CONUS_REF_FEATURES_GPKG_PATH), "' complete!")
+    message("------------------")
+  }, error = function(e) {
+    message("Error downloading conus_nextgen.gpkg")
+    message(e)
+    stop()
+  })
+  
+} else {
+  message("conus_nextgen.gpkg file already exists at\n > '", CONUS_REF_FEATURES_GPKG_PATH, "'")
+}
+
+
+# }
\ No newline at end of file
diff --git a/runners/cs_runner2/download_dem_from_vrt.R b/runners/cs_runner2/download_dem_from_vrt.R
new file mode 100644
index 0000000..edd77bb
--- /dev/null
+++ b/runners/cs_runner2/download_dem_from_vrt.R
@@ -0,0 +1,32 @@
+library(raster)
+library(httr)
+library(terra)
+library(dplyr)
+
+# get the main config variables / paths 
+# source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner2/base_variables.R")
+source("runners/cs_runner2/utils.R")
+
+base_dirs   <- get_base_dir_paths(BASE_DIR)
+
+DEM_VRT_DIR <- base_dirs$dem_vrt_dir
+DEM_TIF_DIR <- base_dirs$dem_tif_dir
+
+# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+
+# Parse the VRT file
+vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE)
+
+vrt_tiles <- terra::vrt_tiles(vrt_file)
+
+# tile_path <- vrt_tiles[500]
+tile_paths <- gsub("/vsicurl/", "",  vrt_tiles)
+
+# TODO: use AWS S3 sync command like below
+s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_TIF_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors")
+
+# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+
+# TODO: Old method curl request each file individually.... super slow
+# error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR)
\ No newline at end of file
diff --git a/runners/cs_runner2/download_fema100.R b/runners/cs_runner2/download_fema100.R
new file mode 100644
index 0000000..86b7e6d
--- /dev/null
+++ b/runners/cs_runner2/download_fema100.R
@@ -0,0 +1,123 @@
+# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
+
+# NOTE: The lynker-hydrofabric S3 bucket is private at the moment
+
+# load config variables
+# source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner2/base_variables.R")
+
+# -------------------------------------------------------------------------------------
+# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ----
+# -------------------------------------------------------------------------------------
+
+# if (!dir.exists(FEMA_FGB_PATH)) {
+#   message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'"))
+#   dir.create(FEMA_FGB_PATH)
+# }
+# 
+# # create geojsons directory (if not exists) 
+# if (!dir.exists(FEMA_GEOJSON_PATH)) {
+#   message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'"))
+#   dir.create(FEMA_GEOJSON_PATH)
+# }
+# 
+# # create directory for cleaned FEMA geometries (if not exists) 
+# if (!dir.exists(FEMA_CLEAN_PATH)) {
+#   message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'"))
+#   dir.create(FEMA_CLEAN_PATH)
+# }
+# 
+# # create directory for cleaned FEMA geometries as geopackages (if not exists) 
+# if (!dir.exists(FEMA_GPKG_PATH)) {
+#   message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
+#   dir.create(FEMA_GPKG_PATH)
+# }
+# 
+# # create simplified geojsons directory (if not exists)
+# if (!dir.exists(FEMA_SIMPLIFIED_PATH)) {
+#   message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'"))
+#   dir.create(FEMA_SIMPLIFIED_PATH)
+# }
+# 
+# # create simplified geojsons directory (if not exists)
+# if (!dir.exists(FEMA_DISSOLVED_PATH)) {
+#   message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'"))
+#   dir.create(FEMA_DISSOLVED_PATH)
+# }
+# 
+# # create exploded geojsons directory (if not exists)
+# if (!dir.exists(FEMA_EXPLODED_PATH)) {
+#   message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'"))
+#   dir.create(FEMA_EXPLODED_PATH)
+# }
+# 
+# # create FEMA GPKG Bounding Boxes directory (if not exists)
+# if (!dir.exists(FEMA_GPKG_BB_PATH)) {
+#   message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
+#   dir.create(FEMA_GPKG_BB_PATH)
+# }
+# 
+# if (!dir.exists(FEMA_FGB_BB_PATH)) {
+#   message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'"))
+#   dir.create(FEMA_FGB_BB_PATH)
+# }
+
+# -------------------------------------------------------------------------------------
+# ---- Get list of FEMA FGB files in S3 bucket ----
+# -------------------------------------------------------------------------------------
+
+# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+fema_list_command <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', LYNKER_HF_FEMA_S3_URI, '" 
+            
+            # Regular expression pattern to match object keys
+            PATTERN=".fgb$"
+            
+            # AWS CLI command to list objects in the S3 bucket and use grep to filter them
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# -------------------------------------------------------------------------------------
+# ---- Get the S3 buckets object keys for FEMA 100 FGB files ----
+# -------------------------------------------------------------------------------------
+
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
+
+# create bucket object URIs
+# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX, FEMA_BUCKET_KEYS)
+
+# -------------------------------------------------------------------------------------
+# ---- Download FEMA 100 year FGB files from S3 ----
+# -------------------------------------------------------------------------------------
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (key in FEMA_BUCKET_KEYS) {
+  local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
+  
+  if (!file.exists(local_save_path)) {
+    copy_cmd <- paste0('aws s3 cp ', LYNKER_HF_FEMA_S3_URI, key, " ", local_save_path, " --profile ", AWS_PROFILE)
+    
+    message("S3 object:\n > '", LYNKER_HF_FEMA_S3_URI, key, "'")
+    message("Downloading S3 object to:\n > '", local_save_path, "'")
+    
+    system(copy_cmd)
+    
+    message(" > '", key, "' download complete!")
+    message("----------------------------------")
+  } else {
+    message("File already exists at:\n > '", local_save_path, "'")
+  }
+}
+
+
+
+  
+
+
+
+
diff --git a/runners/cs_runner/partition_fema_by_vpu.R b/runners/cs_runner2/partition_fema_by_vpu.R
similarity index 69%
rename from runners/cs_runner/partition_fema_by_vpu.R
rename to runners/cs_runner2/partition_fema_by_vpu.R
index 8e413eb..2292588 100644
--- a/runners/cs_runner/partition_fema_by_vpu.R
+++ b/runners/cs_runner2/partition_fema_by_vpu.R
@@ -12,9 +12,10 @@
 # - Get FEMA bounding box geometries (maybe)
 
 # load config variables
-source("runners/cs_runner/config_vars.R")
-source("runners/cs_runner/config.R")
-source("runners/cs_runner/utils.R")
+# source("runners/cs_runner/config_vars.R")
+# source("runners/cs_runner/config.R")
+source("runners/cs_runner2/base_variables.R")
+source("runners/cs_runner2/utils.R")
 
 library(dplyr)
 library(sf)
@@ -38,46 +39,6 @@ OVERWRITE_FEMA_FILES  <- TRUE
 DELETE_STAGING_GPKGS  <- TRUE
 Sys.setenv(OGR_GEOJSON_MAX_OBJ_SIZE=0)
 
-# -------------------------------------------------------------------------------------
-# ---- Create directories (if they do NOT exist) ----
-# -------------------------------------------------------------------------------------
-
-# create directory for cleaned FEMA geometries as geopackages (if not exists) 
-if (!dir.exists(FEMA_GPKG_PATH)) {
-  message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
-  dir.create(FEMA_GPKG_PATH)
-}
-
-# create directory for FEMA geomteries partioned by VPU
-if (!dir.exists(FEMA_BY_VPU_PATH)) {
-  message(paste0(FEMA_BY_VPU_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_BY_VPU_PATH, "'"))
-  dir.create(FEMA_BY_VPU_PATH)
-}
-
-for (VPU_SUBFOLDER in FEMA_VPU_SUBFOLDERS) {
-  # create directory for FEMA geomteries by VPU
-  # message(VPU_SUBFOLDER)
-  
-  # state_dir  = paste0(VPU_SUBFOLDER, "/states/")
-  # merged_dir = paste0(VPU_SUBFOLDER, "/merged/")
-  
-  if (!dir.exists(VPU_SUBFOLDER)) {
-    message("Creating FEMA VPU subfolder...")
-    message(paste0("'/", basename(VPU_SUBFOLDER), "' directory does not exist...\n  Creating directory:\n > '", VPU_SUBFOLDER, "'"))
-    dir.create(VPU_SUBFOLDER)
-  }
-  # if (!dir.exists(state_dir)) { 
-  #   message("Creating FEMA VPU states subfolder...")
-  #   message(paste0("'/", basename(state_dir), "' directory does not exist...\n  Creating directory:\n > '", state_dir, "'"))
-  #   dir.create(state_dir)
-  # }
-  # if (!dir.exists(merged_dir)) { 
-  #   message("Creating FEMA VPU merged subfolder...")
-  #   message(paste0("'/", basename(merged_dir), "' directory does not exist...\n  Creating directory:\n > '", merged_dir, "'"))
-  #   dir.create(merged_dir)
-  # }
-}
-
 # -------------------------------------------------------------------------------------
 # ---- Get paths to downloaded FEMA 100 FGBs ----
 # -------------------------------------------------------------------------------------
@@ -86,10 +47,14 @@ FEMA_FILENAMES        <- list.files(FEMA_FGB_PATH, full.names = FALSE)
 FEMA_FILE_PATHS       <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
 
 for (file in FEMA_FILENAMES) {
+  # message(file)
   
   STAGING_FILES_TO_DELETE <- c()
   
-  # Convert FGB to GeoJSON
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- Step 1: Convert FGB to GeoJSON 
+  # -------------------------------------------------------------------------------------------------------------------
+ 
   local_fema_path   <- paste0(FEMA_FGB_PATH, "/", file)
   geojson_filename  <- gsub(".fgb", ".geojson", file)
   geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename)
@@ -102,6 +67,7 @@ for (file in FEMA_FILENAMES) {
   message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
   message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
   
+  # Step 1.1 Run FGDB to GeoJSON conversion
   ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
   
   if (OVERWRITE_FEMA_FILES || !geojson_exists) {
@@ -111,7 +77,10 @@ for (file in FEMA_FILENAMES) {
     STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path)
   }
   
-  # Clean GeoJSON
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- # Step 2: Clean GeoJSON
+  # -------------------------------------------------------------------------------------------------------------------
+ 
   message("Simplify, dissolve, explode > '", geojson_filename, "'")
   output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename)
   output_clean_geojson_path     <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename)
@@ -124,10 +93,9 @@ for (file in FEMA_FILENAMES) {
                              ' -dissolve2 FLD_AR_ID \\', 
                              ' -simplify 0.1 visvalingam \\', 
                              ' -snap \\',
-                             ' -o ', output_clean_geojson_path
-  )
-  
-  
+                             ' -o ', output_clean_geojson_path)
+
+  # Step 2.1 Run simplify, dissolve, explode on cleaned GeoJSON
   if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) {
     message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
     system(mapshaper_command)
@@ -136,9 +104,11 @@ for (file in FEMA_FILENAMES) {
     STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
   }
   
-  # Convert cleaned GeoJSON to GeoPackage
+  # -------------------------------------------------------------------------------------------------------------------
+  # ----  # Step 3: Convert cleaned GeoJSON to GeoPackage
+  # -------------------------------------------------------------------------------------------------------------------
+ 
   message("Fema 100 year flood plain:\n > '", output_clean_filename, "'")
-  
   output_gpkg_filename  <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename)
   output_gpkg_path      <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
   
@@ -155,6 +125,32 @@ for (file in FEMA_FILENAMES) {
     message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'")
   }
   
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- Step 4: Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
+  # -------------------------------------------------------------------------------------------------------------------
+ 
+  message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(output_gpkg_path), "'")
+  
+  fema <- sf::read_sf(output_gpkg_path)
+  fema <- resolve_internal_fema_boundaries(fema, output_gpkg_path)  
+
+  message("End time: ", Sys.time())
+  
+  if (OVERWRITE_FEMA_FILES) {
+    message("Writting '", basename(output_gpkg_path), "' to: \n > '", output_gpkg_path, "'")
+    sf::write_sf(
+      fema,
+      # fema_clean,
+      output_gpkg_path
+    )
+  }
+  
+
+  
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- Step 5: Delete intermediary files 
+  # -------------------------------------------------------------------------------------------------------------------
+  
   message("Deleting intermediary files\n")
   for (delete_file in STAGING_FILES_TO_DELETE) {
     if (file.exists(delete_file)) {
@@ -163,7 +159,9 @@ for (file in FEMA_FILENAMES) {
     }
     
   }
-  
+
+  rm(fema)
+
   message()
   
 }
@@ -171,87 +169,71 @@ for (file in FEMA_FILENAMES) {
 # -------------------------------------------------------------------------------------------------------------------
 # ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
 # -------------------------------------------------------------------------------------------------------------------
+source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false"
+source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false"
 
-# paths to FEMA 100 year flood plain files
-FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 
-for (file_path in FEMA_gpkg_paths) {
-  message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
-  
-  fema <- sf::read_sf(file_path)
-  
-  fema <-
-    fema[!sf::st_is_empty(fema), ] %>% 
-    sf::st_transform(5070)
-  
-  #  TODO: Snap using geos::geos_snap()
-  # fema <-
-  #   geos::geos_snap(
-  #     geos::as_geos_geometry(fema),
-  #     geos::as_geos_geometry(fema),
-  #     tolerance = 1
-  #     ) %>%
-  #   geos::geos_make_valid()  %>%
-  #   sf::st_as_sf()
-  
-  # TODO: we get this error when trying to use the geometry column after geos snapping
-  # TODO: Error = "Error: Not compatible with STRSXP: [type=NULL]."
-  # fema %>%
-  # sf::st_cast("POLYGON")
-  
-  # TODO: Snap using sf::st_snap()
-  # fema <- sf::st_snap(
-  #             fema,
-  #             fema,
-  #             tolerance = 2
-  #             )
-  
-  fema <-
-    fema %>% 
-    # fema[!sf::st_is_empty(fema), ] %>% 
-    dplyr::select(geometry = geom) %>%
-    add_predicate_group_id(sf::st_intersects) %>% 
-    sf::st_make_valid() %>% 
-    dplyr::group_by(group_id) %>% 
-    dplyr::summarise(
-      geometry = sf::st_combine(sf::st_union(geometry))
-    ) %>% 
-    dplyr::ungroup() %>% 
-    dplyr::select(-group_id) %>% 
-    add_predicate_group_id(sf::st_intersects) %>% 
-    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
-    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
-    dplyr::mutate(
-      fema_id = as.character(1:dplyr::n())
-    ) %>% 
-    dplyr::select(fema_id, geometry)
+
+# # paths to FEMA 100 year flood plain files
+# FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+# for (file_path in FEMA_gpkg_paths) {
+#   message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
   
-  # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
-  # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
+#   fema <- sf::read_sf(file_path)
+
+#   fema <- resolve_internal_fema_boundaries(fema) 
   
-  fema <- 
-    fema %>% 
-    dplyr::mutate(
-      source = basename(file_path),
-      state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
-    ) %>%
-    dplyr::select(fema_id, source, state, 
-                  # areasqkm, 
-                  geometry)
+#   # fema <-
+#   #   fema[!sf::st_is_empty(fema), ] %>% 
+#   #   sf::st_transform(5070)
   
-  message("End time: ", Sys.time())
+#   # fema <-
+#   #   fema %>% 
+#   #   # fema[!sf::st_is_empty(fema), ] %>% 
+#   #   dplyr::select(geometry = geom) %>%
+#   #   add_predicate_group_id(sf::st_intersects) %>% 
+#   #   sf::st_make_valid() %>% 
+#   #   dplyr::group_by(group_id) %>% 
+#   #   dplyr::summarise(
+#   #     geometry = sf::st_combine(sf::st_union(geometry))
+#   #   ) %>% 
+#   #   dplyr::ungroup() %>% 
+#   #   dplyr::select(-group_id) %>% 
+#   #   add_predicate_group_id(sf::st_intersects) %>% 
+#   #   rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+#   #   rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+#   #   dplyr::mutate(
+#   #     fema_id = as.character(1:dplyr::n())
+#   #   ) %>% 
+#   #   dplyr::select(fema_id, geometry)
+  
+#   # # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
+#   # # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
+  
+#   # fema <- 
+#   #   fema %>% 
+#   #   dplyr::mutate(
+#   #     source = basename(file_path),
+#   #     state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+#   #   ) %>%
+#   #   dplyr::select(fema_id, source, state, 
+#   #                 # areasqkm, 
+#   #                 geometry)
+  
+#   message("End time: ", Sys.time())
   
-  if (OVERWRITE_FEMA_FILES) {
-    message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
-    sf::write_sf(
-      # fema_clean,
-      fema,
-      file_path
-    )
-  }
-  message()
+#   if (OVERWRITE_FEMA_FILES) {
+#     message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
+#     sf::write_sf(
+#       # fema_clean,
+#       fema,
+#       file_path
+#     )
+#   }
+#   message()
   
-}
+# }
 
 # -------------------------------------------------------------------------------------
 # ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- 
@@ -261,11 +243,33 @@ for (file_path in FEMA_gpkg_paths) {
 FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
 
 # paths to nextgen datasets and model attribute parquet files
-NEXTGEN_FILENAMES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
-NEXTGEN_FILE_PATHS   <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES)
+# CONUS_NEXTGEN_GPKG_PATH 
+
+# layer_info = sf::st_layers(CONUS_NEXTGEN_GPKG_PATH)
+# layer_info[layer_info$name == "flowpaths", "fields"]
+# layer_inf
+
+# flines = sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths")
+# flines %>% names()
+# (flines$vpuid  %>% unique()) %in% VPU_IDS
+# VPU_IDS
+# VPU_IDS %in% (flines$vpuid  %>% unique())
+
+# query the unique VPU_IDS from the flowlines layer from sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths")
+CONUS_VPU_IDS <- 
+            CONUS_NEXTGEN_GPKG_PATH  %>% 
+            sf::read_sf(query = "SELECT DISTINCT vpuid FROM flowpaths") %>%
+            dplyr::pull()
+
+
+# NEXTGEN_FILENAMES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
+# NEXTGEN_FILE_PATHS   <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES)
+
 
 for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   
+  # file_path = FEMA_CLEAN_GPKG_PATHS[25]
+
   fema_file <- basename(file_path)
   
   message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'")
@@ -273,16 +277,23 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   # read in fema polygons
   fema <- sf::read_sf(file_path)
   
-  for (nextgen_path in NEXTGEN_FILE_PATHS) {
-    nextgen_basename <- basename(nextgen_path)
-    vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
+  for (vpu in CONUS_VPU_IDS) {
+
+    # vpu = CONUS_VPU_IDS[12]
+    
+    # nextgen_basename <- basename(nextgen_path)
+    # vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
     
     message("VPU: ", vpu)   
-    message("- nextgen gpkg:\n > '", nextgen_path, "'")   
-    message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
+    # message("- nextgen gpkg:\n > '", nextgen_path, "'")   
+    message(" > Checking if '", fema_file, "' intersects with CONUS flowpaths in VPU '", vpu, "'")
     
     # read in nextgen flowlines 
-    flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+    flines <- 
+            CONUS_NEXTGEN_GPKG_PATH  %>% 
+            sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'"))
+
+    # flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
     
     # get the FEMA polygons that intersect with the nextgen flowlines
     fema_intersect <- polygons_with_line_intersects(fema, flines)
@@ -294,7 +305,7 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     if(fema_in_nextgen) {
       
       # create filepaths
-      vpu_subfolder      <- paste0("VPU_", vpu)
+      vpu_subfolder      <- paste0("vpu-", vpu)
       # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
       vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
       
@@ -325,7 +336,60 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
       
     }
     message()
-  }
+  } 
+  # for (nextgen_path in NEXTGEN_FILE_PATHS) {
+  #   nextgen_basename <- basename(nextgen_path)
+  #   vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
+    
+  #   message("VPU: ", vpu)   
+  #   message("- nextgen gpkg:\n > '", nextgen_path, "'")   
+  #   message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
+    
+  #   # read in nextgen flowlines 
+  #   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+    
+  #   # get the FEMA polygons that intersect with the nextgen flowlines
+  #   fema_intersect <- polygons_with_line_intersects(fema, flines)
+    
+  #   fema_in_nextgen <-  nrow(fema_intersect) != 0
+    
+  #   message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
+    
+  #   if(fema_in_nextgen) {
+      
+  #     # create filepaths
+  #     vpu_subfolder      <- paste0("VPU_", vpu)
+  #     # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
+  #     vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
+      
+  #     # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
+      
+  #     fema_intersect <-
+  #       fema_intersect %>%
+  #       dplyr::mutate(
+  #         vpu = vpu
+  #       ) %>%
+  #       dplyr::select(vpu, fema_id, source, state, geom)
+      
+  #     # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
+      
+  #     fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
+  #     fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
+      
+      
+  #     if (OVERWRITE_FEMA_FILES) {
+  #       message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
+        
+  #       sf::write_sf(
+  #         fema_intersect,
+  #         fema_vpu_path
+  #       )
+  #     }
+      
+      
+  #   }
+  #   message()
+  # }
   
   
   message(
@@ -548,6 +612,54 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
     
   }
 }
+
+
+# -------------------------------------------------------------------------------------
+# ---- Store all FEMA layers in a single conus_fema.gpkg 
+# -------------------------------------------------------------------------------------
+
+fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))]
+
+combine_gpkg_files(fema_vpu_layers, '/Volumes/T7SSD/lynker-spatial/cs-extension-polygons/conus_fema.gpkg')
+  
+
+combine_gpkg_files <- function(gpkg_paths, output_gpkg) {
+  
+  layer_counter <- list()
+    
+  for (gpkg_path in gpkg_paths) {
+    
+    base_name <- tools::file_path_sans_ext(basename(gpkg_path))
+    # base_name <- gsub("_output.gpkg", "", basename(gpkg_path))
+    
+    if (base_name %in% names(layer_counter)) {
+      layer_counter[[base_name]] <- layer_counter[[base_name]] + 1
+      layer_name <- paste0(base_name, "_", layer_counter[[base_name]])
+    } else {
+      
+      layer_counter[[base_name]] <- 1
+      layer_name <- base_name
+    }
+    
+    tryCatch({
+      sf_layer <- st_read(gpkg_path, quiet = TRUE)
+      
+      sf::st_write(sf_layer, 
+               dsn = output_gpkg, 
+               layer = layer_name, 
+               append = TRUE,
+               quiet = TRUE)
+      
+      message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'")
+      
+    }, error = function(e) {
+      warning("Error processing: ",  basename(gpkg_path))
+      warning(e)
+    })
+  }
+}
+
+
 # # -------------------------------------------------------------------------------------
 # # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
 # # -------------------------------------------------------------------------------------
diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R
new file mode 100644
index 0000000..6feff0b
--- /dev/null
+++ b/runners/cs_runner2/utils.R
@@ -0,0 +1,2562 @@
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+#     ├── dem/
+#         ├── vrt/
+#         ├── tif/
+#     ├── cs-extension-polygons/
+create_local_hydrofabric_base_dirs <- function(base_dir) {
+  
+  
+  # build paths
+  hydrofabric_dir <- paste0(base_dir, "/hydrofabric")
+  
+  # DEM dirs
+  dem_dir                    <- file.path(base_dir, "dem")
+  dem_vrt_dir                <- file.path(dem_dir, "vrt")
+  dem_tif_dir                <- file.path(dem_dir, "tif")
+  
+  # polygons for transect extensions
+  cs_extension_polygons_dir     <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  fema_dir                      <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  # create base directories
+  create_if_not_exists(base_dir)
+  create_if_not_exists(hydrofabric_dir)
+  
+  # DEM dirs
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(dem_vrt_dir)
+  create_if_not_exists(dem_tif_dir)
+  
+  # extension polygons
+  create_if_not_exists(cs_extension_polygons_dir)
+  
+  
+  # Create FEMA folders
+  create_if_not_exists(fema_dir)
+  create_if_not_exists(fema_fgb_dir)
+  create_if_not_exists(fema_geojson_dir)
+  create_if_not_exists(fema_clean_dir)
+  create_if_not_exists(fema_gpkg_dir)
+  create_if_not_exists(fema_by_vpu_dir)
+  
+  for (path in fema_by_vpu_subdirs) {
+    create_if_not_exists(path)
+  }
+  
+}
+
+get_vpu_ids <- function() {
+  VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+                            '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+  # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+  
+  return(VPU_IDS)
+  
+}
+
+# # Base directory for local file storage
+# BASE_DIR    <- '/Volumes/T7SSD/lynker-spatial'
+# base_dir <- BASE_DIR
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb")
+# FEMA_GEOJSON_PATH    <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson")
+# FEMA_CLEAN_PATH      <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean")
+# FEMA_GPKG_PATH       <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg")
+# FEMA_BY_VPU_PATH     <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU")
+# 
+# 
+# VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+#                           '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+# # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+# 
+# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ")
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# Create an empty file structure for a new version within a specified base_dir
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+    #     ├── version_number/
+    #         ├── network/
+    #         ├── transects/
+    #         ├── cross-sections/
+          #         ├── dem/
+          #         ├── dem-ml/
+          #         ├── dem-coastal-bathy/
+          #         ├── dem-points/
+create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
+  # version = "v3.0"
+  # base_dir <- BASE_DIR
+  
+  # build paths
+  hydrofabric_dir   <- paste0(base_dir, "/hydrofabric")
+  version_base_dir  <- paste0(hydrofabric_dir, "/", version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- paste0(version_base_dir, "/ml")
+  
+  # reference features 
+  ref_features_dir           <- paste0(version_base_dir, "/reference-features")
+  
+  # conus network gpkg
+  network_dir                <- paste0(version_base_dir, "/network")
+  
+  # transects
+  transects_dir              <- paste0(version_base_dir, "/transects")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- paste0(version_base_dir, "/cross-sections")
+  cross_sections_dem_dir             <- paste0(cross_sections_dir, "/dem")
+  cross_sections_ml_dir              <- paste0(cross_sections_dir, "/dem-ml")
+  cross_sections_coastal_bathy_dir   <- paste0(cross_sections_dir, "/dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- paste0(cross_sections_dir, "/dem-points")
+  
+  if(with_output) {
+    output_dir       <- paste0(version_base_dir, "/outputs")
+  }
+  
+  # create version BASE dir
+  create_if_not_exists(version_base_dir)
+  
+  # CONUS dir
+  create_if_not_exists(network_dir)
+  
+  # ML data
+  create_if_not_exists(ml_dir)
+  
+  # reference features data
+  create_if_not_exists(ref_features_dir)
+  
+  # transects
+  create_if_not_exists(transects_dir)
+  
+  # CS pts
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cross_sections_dem_dir)
+  create_if_not_exists(cross_sections_ml_dir)
+  create_if_not_exists(cross_sections_coastal_bathy_dir)
+  create_if_not_exists(cross_sections_dem_pts_dir)
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
+}
+
+create_if_not_exists <- function(dir_path) {
+  if (!dir.exists(dir_path)) {
+    dir.create(dir_path, recursive = TRUE)
+    message("Created directory: '", dir_path, "'\n")
+  }
+}
+
+# get a list of top level directories for main directory
+get_base_dir_paths <- function(base_dir) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir  <- file.path(base_dir, "hydrofabric")
+  
+  dem_dir          <- file.path(base_dir, "dem")
+  dem_vrt_dir      <- file.path(base_dir, "dem", "vrt")
+  dem_tif_dir      <- file.path(base_dir, "dem", "tif")
+  
+  cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  
+  fema_dir         <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  return(
+      list(
+      hydrofabric_dir = hydrofabric_dir,
+      dem_dir = dem_dir,
+      dem_vrt_dir = dem_vrt_dir, 
+      dem_tif_dir = dem_tif_dir,
+      cs_extension_polygons_dir = cs_extension_polygons_dir,
+      fema_dir = fema_dir,
+      fema_fgb_dir = fema_fgb_dir,
+      fema_geojson_dir = fema_geojson_dir,
+      fema_clean_dir = fema_clean_dir,
+      fema_gpkg_dir = fema_gpkg_dir,
+      fema_by_vpu_dir = fema_by_vpu_dir,
+      fema_by_vpu_subdirs = fema_by_vpu_subdirs
+    )
+  )
+}
+
+# get list of a specific directories in a version directory
+get_version_base_dir_paths <- function(base_dir, version) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir   <- file.path(base_dir, "hydrofabric")
+  
+  version_base_dir  <- file.path(hydrofabric_dir, version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- file.path(version_base_dir, "ml")
+  
+  # reference features 
+  ref_features_dir           <- file.path(version_base_dir, "reference-features")
+  
+  # conus network gpkg
+  network_dir                <- file.path(version_base_dir, "network")
+  
+  # transects
+  transects_dir              <- file.path(version_base_dir, "transects")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- file.path(version_base_dir, "cross-sections")
+  cross_sections_dem_dir             <- file.path(cross_sections_dir, "dem")
+  cross_sections_ml_dir              <- file.path(cross_sections_dir, "dem-ml")
+  cross_sections_coastal_bathy_dir   <- file.path(cross_sections_dir, "dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- file.path(cross_sections_dir, "dem-points")
+  
+  return(
+    list(
+      hydrofabric_dir    = hydrofabric_dir,
+      version_base_dir   = version_base_dir,
+      ref_features_dir   = ref_features_dir, 
+      network_dir        = network_dir,
+      ml_dir = ml_dir,
+      transects_dir      = transects_dir,
+      cross_sections_dir = cross_sections_dir,
+      cross_sections_dem_dir     = cross_sections_dem_dir,
+      cross_sections_dem_pts_dir = cross_sections_dem_pts_dir,
+      cross_sections_ml_dir      = cross_sections_ml_dir,
+      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir
+    )
+  )
+}
+
+list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
+  
+  profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""
+  
+  if (is.null(pattern) || pattern == "") {
+    grep_command <- ""  # no filtering if empty or NULL
+  } else {
+    grep_command <- paste0(" | grep -E \"", pattern, "\"")  # grep if a pattern is given
+  }
+  
+  cmd <- paste0(
+    '#!/bin/bash\n',
+    'S3_BUCKET="', s3_bucket, '"\n',
+    'PATTERN="', pattern, '"\n',
+    'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n',
+    'echo "$S3_OBJECTS"'
+  )
+  # cmd <- paste0(
+  #   '#!/bin/bash\n',
+  #   'S3_BUCKET="', s3_bucket, '"\n',
+  #   'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n',
+  #   'echo "$S3_OBJECTS"'
+  # )
+  ls_output <- system(cmd, intern = TRUE)
+  return(ls_output)
+}
+
+download_tiles <- function(tile_paths, output_dir) {
+  # output_dir <- DEM_TIF_DIR
+  # tile_paths
+  # error_tiles <- 
+  tif_save_paths       <- paste0(output_dir, "/", basename(tile_paths))
+  
+  error_tiles <- data.frame(
+    tile   = basename(tile_paths),
+    status = TRUE
+  )
+  
+  for (i in seq_along(tile_paths)) {
+    # i = 1
+    tile_path         <- tile_paths[i]
+    tif_save_path     <- tif_save_paths[i]
+    
+    message("[", i, "]", 
+            "\n > Tile: ", basename(tile_path),
+            "\n > Output path: ", tif_save_path
+    )
+    
+    download_tif_cmd    <- paste0("curl -o ", tif_save_path, " ", tile_path)
+    
+    tryCatch({
+      
+      tif_download_output <- system(download_tif_cmd, intern = TRUE)
+      message("   > Succesfully downloaded tile: ", basename(tile_path))
+      
+    }, error = function(e) {
+      
+      message("Error downloading tile: ", basename(tile_path))
+      message("ERROW below: \n  ", e)
+      
+      error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE
+      
+    })
+    
+  }
+  
+  return(error_tiles)
+  
+}
+
+# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
+# make sure they are aligned and in the same order
+# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# base is a character vector of the base directory of the files. Defaults to NULL
+# Returns a dataframe with VPU, x, and y columns
+align_files_by_vpu <- function(
+    x, 
+    y, 
+    base = NULL
+) {
+
+  # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period
+  regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*"
+  
+  # path dataframe for X filepaths
+  x_paths <- data.frame(x = x)
+  
+  # path dataframe for Y filepaths
+  y_paths <- data.frame(y = y)
+  
+  # generate VPU IDs based on file path regular expression matching with "regex_pattern" above
+  x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x)
+  y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y)
+  
+  # match paths based on VPU column
+  matched_paths <- dplyr::left_join(
+    x_paths,
+    y_paths,
+    by = "vpu"
+  ) 
+  
+  # reorder columns
+  matched_paths <- dplyr::relocate(matched_paths, vpu, x, y)
+  
+  if(!is.null(base)) {
+    matched_paths$base_dir <- base
+  }
+  
+  return(matched_paths)
+  
+}
+
+# -------------------------------------------------------------------------------------
+# FEMA processing functions: 
+# -------------------------------------------------------------------------------------
+
+resolve_internal_fema_boundaries <- function(fema, source_file = "") {
+#   message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
+  
+#   fema <- sf::read_sf(file_path)
+  
+  fema <-
+    fema[!sf::st_is_empty(fema), ] %>% 
+    sf::st_transform(5070)
+  
+  fema <-
+    fema %>% 
+    dplyr::select(geometry = geom) %>%
+    add_predicate_group_id(sf::st_intersects) %>% 
+    sf::st_make_valid() %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-group_id) %>% 
+    add_predicate_group_id(sf::st_intersects) %>% 
+    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+    dplyr::mutate(
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(fema_id, geometry)
+  
+  fema <- 
+    fema %>% 
+    dplyr::mutate(
+      source = basename(source_file),
+      state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+    ) %>%
+    dplyr::select(fema_id, source, state, geometry)
+  
+  return(fema)
+}
+
+add_predicate_group_id <- function(polys, predicate) {
+  # GROUP BY SPATIAL PREDICATES
+  # ----------------------------------------- 
+  # predicate = sf::st_touches
+  # polys <- sf_df
+  # ----------------------------------------- 
+  
+  
+  relations <- predicate(polys)
+  
+  relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) })
+  
+  group_ids_map <- fastmap::fastmap()
+  ids_to_groups <- fastmap::fastmap()
+  
+  group_id <- 0
+  
+  for (i in seq_along(relations)) {
+    
+    predicate_ids <- relations[i][[1]]
+    
+    # message("(", i, ") - ", predicate_ids)
+    # message("Start Group ID: ", group_id)
+    
+    id_group_check <- ids_to_groups$has(predicate_ids)
+    
+    if(any(id_group_check)) {
+      
+      known_groups  <- ids_to_groups$mget(predicate_ids)
+      known_group   <- known_groups[unname(sapply(known_groups , function(kg) {
+        !is.null(kg)
+      }))][[1]]
+      
+      # message("IDs part of past group ID > '", known_group, "'")
+      
+      past_group_ids     <- group_ids_map$get(known_group)[[1]]
+      updated_group_ids  <- as.character(
+        sort(as.numeric(unique(c(past_group_ids, predicate_ids))))
+      )
+      
+      group_ids_map$set(known_group, list(updated_group_ids))
+      
+      new_ids <- predicate_ids[!predicate_ids %in% past_group_ids]
+      
+      # message("Adding ", new_ids, " to seen set...")
+      
+      # add any newly added IDs to the seen map
+      for (seen_id in new_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+      
+    } else {
+      # get a new group ID number
+      group_id <- group_id + 1    
+      # message("IDs form NEW group > '", group_id, "'")
+      
+      # create a new key in the map with the predicate IDs list as the value
+      group_ids_map$set(as.character(group_id), list(predicate_ids))
+      
+      # message("Adding ", predicate_ids, " to seen set...")
+      
+      # add each predicate ID to the map storing the seen indexes and their respecitve group IDs 
+      for (seen_id in predicate_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+    }
+    # message("End group ID: ", group_id, "\n") 
+  }
+  
+  group_ids   <- group_ids_map$as_list() 
+  
+  grouping_df <- lapply(seq_along(group_ids), function(i) {
+    # i = 2
+    grouping  <- group_ids[i] 
+    group_id  <- names(grouping)
+    indices   <- grouping[[1]][[1]]
+    
+    data.frame(
+      index      = as.numeric(indices),
+      group_id   = rep(group_id, length(indices))   
+    )
+    
+  }) %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::arrange(i) 
+  
+  # count up the number of IDs for each group, well use this to determine which group 
+  # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members)
+  group_id_counts <- 
+    grouping_df %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::count() %>% 
+    # dplyr::arrange(-n) %>% 
+    dplyr::ungroup()
+  
+  # select the IDs with the most other members
+  grouping_df <- 
+    grouping_df %>% 
+    dplyr::left_join(
+      group_id_counts, 
+      by = 'group_id'
+    ) %>% 
+    dplyr::group_by(index) %>% 
+    dplyr::slice_max(n, with_ties = FALSE) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-n) %>% 
+    dplyr::arrange(-index) 
+  
+  polys$group_id <- grouping_df$group_id
+  
+  return(polys)
+  
+}
+
+# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+# flowlines: flowlines linestring sf object
+# trans: transects linestring sf object
+# waterbodies: waterbodies polygon sf object
+# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
+### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
+wb_intersects <- function(flowlines, trans, waterbodies) {
+  
+  ########  ########  ########  ########  ########  ########
+  
+  flowlines_geos <- geos::as_geos_geometry(flowlines)
+  wbs_geos <- geos::as_geos_geometry(waterbodies)
+  
+  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  
+  message("Checking flowlines against waterbodies...")
+  
+  # create an index between flowlines and waterbodies 
+  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+  
+  # remove any flowlines that cross more than 1 waterbody
+  to_keep  <- flowlines[lengths(wb_index) == 0, ]
+  to_check <- flowlines[lengths(wb_index) != 0, ]
+  
+  # subset transects to the hy_ids in "to_check" set of flowlines
+  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+  
+  # check where the transects linestrings intersect with the waterbodies
+  trans_geos_check <- geos::as_geos_geometry(trans_check)
+  
+  message("Checking transects against waterbodies (v2) ...")
+  wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos)                    # (NEW METHOD)
+  # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)])   # (OLD METHOD)
+  
+  # sum(lengths(wb_trans_index) == 0)
+  # length(wb_trans_index)
+  
+  # within the transects lines that are on a flowline that crosses a waterbody, 
+  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  trans_keep <- trans_check[lengths(wb_trans_index) == 0, ]                       # (NEW METHOD)
+  # trans_keep <- trans_check[!wb_trans_index, ]                                  # (OLD METHOD)
+  
+  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  
+  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # have transects that are NOT in the waterbody
+  to_keep <- dplyr::bind_rows(to_keep, to_check)
+  
+  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # that were determined to be valid (are being kept)
+  check_ids <- unique(trans_check$tmp_id)
+  keep_ids <- unique(trans_keep$tmp_id)
+  
+  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  valid_flowlines <- flowlines$id %in% to_keep$id
+  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  
+  # return alist of updated flowlines and transects 
+  return(
+    list(
+      "valid_flowlines" =  valid_flowlines,
+      "valid_transects" =  valid_transects
+    )
+  )
+  
+  # # within the transects lines that are on a flowline that crosses a waterbody, 
+  # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  # trans_keep <- trans_check[!trans_wb_index, ]
+  # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  # 
+  # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  # 
+  # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # # have transects that are NOT in the waterbody
+  # to_keep <- dplyr::bind_rows(to_keep, to_check)
+  # 
+  # # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # # that were determined to be valid (are being kept)
+  # check_ids <- unique(trans_check$tmp_id)
+  # keep_ids <- unique(trans_keep$tmp_id)
+  # 
+  # # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  # valid_flowlines <- flowlines$id %in% to_keep$id
+  # valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+  #                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  # 
+  # # return alist of updated flowlines and transects 
+  # return(
+  #   list(
+  #     "valid_flowlines" =  valid_flowlines,
+  #     "valid_transects" =  valid_transects
+  #   )
+  # )
+}
+
+add_intersects_ids <- function(x, y, id_col) {
+  # make sure the crs are tjhe same
+  y <- sf::st_transform(y, sf::st_crs(x))
+  
+  # Perform the intersection
+  intersections <- sf::st_intersects(x, y)
+  
+  # add the intersected values to the first dataframe
+  x[[id_col]] <- unlist(lapply(intersections, function(idx) {
+    if (length(idx) > 0) {
+      paste0(unlist(y[[id_col]][idx]), collapse = ", ")
+    } else {
+      NA
+    }
+  }))
+  
+  return(x)
+}
+
+unnest_ids <- function(ids) {
+  return(
+    unique(unlist(strsplit(unique(ids), ", ")))
+  )
+}
+
+#' Get the polygons that interesect with any of the linestring geometries
+#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes
+#' @param polygons polygon sf object. Default is NULL
+#' @param lines linestring sf object. Default is NULL.
+#'
+#' @return sf dataframe of polygons that intersect with the linestrings
+polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) {
+  
+  if (is.null(polygons)) {
+    stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries")
+  }
+  
+  if (is.null(lines)) {
+    stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries")
+  }
+  
+  # Convert the SF geometries to geos geometries
+  polygons_geos   <- geos::as_geos_geometry(polygons)
+  lines_geos      <- geos::as_geos_geometry(lines)
+  
+  # create an index between the polygons and linestrings
+  lines_index <-  geos::geos_intersects_matrix(polygons_geos, lines_geos)
+  
+  # get the polygons that have atleast 1 intersection with the 'lines'
+  polygons_with_lines <- polygons[lengths(lines_index) != 0, ]
+  
+  return(polygons_with_lines)
+}
+
+# TODO: DElete these NEW DOMAIN functions...
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist)
+
+# Directory tree:
+# base_dir/
+#   └── domain_dirname/
+#     ├── flowlines/
+#     ├── dem/
+#     ├── transects/
+#     ├── cross_sections/
+#     └── cs_pts/
+create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  
+  if(with_output) {
+    output_dir       <- paste0(domain_dir, "/outputs")
+  }
+  
+  # create directories
+  create_if_not_exists(domain_dir)
+  create_if_not_exists(flowlines_dir)
+  create_if_not_exists(domain_subset_dir)
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cs_pts_dir)
+  create_if_not_exists(vpu_subsets_dir)
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
+}
+
+# get path strings for a domain dir (based of a base dir and domain dirname)
+# NOTE: this does NOT guarentee that these folders exist, 
+# NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
+get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  output_dir         <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA)
+  
+  
+  # named list of file paths
+  return(
+    list(
+      base_dir           = base_dir, 
+      domain_dir         = domain_dir,
+      flowlines_dir      = flowlines_dir,
+      domain_subset_dir  = domain_subset_dir,
+      dem_dir            = dem_dir,
+      transects_dir      = transects_dir,
+      cross_sections_dir = cross_sections_dir,
+      cs_pts_dir         = cs_pts_dir,
+      vpu_subsets_dir    = vpu_subsets_dir,
+      output_dir         = output_dir
+    )
+  )
+  
+}
+
+download_3dep_vrt <- function(base_dir) {
+  
+  ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/
+  ### paste w/ `datapasta::vector_paste_vertical()`
+  ### Some manual cleaning of header and footer mess...
+  ### Reason? Un-scrapable page, and no index.gpkg...
+  
+  t <- c(
+    "0              n06e162/",
+    "0              n06e163/",
+    "0              n07e134/",
+    "0              n07e151/",
+    "0              n07e152/",
+    "0              n07e158/",
+    "0              n08e134/",
+    "0              n08e151/",
+    "0              n08e152/",
+    "0              n08e158/",
+    "0              n09e134/",
+    "0              n10e138/",
+    "0              n14e144/",
+    "0              n15e145/",
+    "0              n16e145/",
+    "0              n18w065/",
+    "0              n18w066/",
+    "0              n18w067/",
+    "0              n18w068/",
+    "0              n19w065/",
+    "0              n19w066/",
+    "0              n19w067/",
+    "0              n19w068/",
+    "0              n19w156/",
+    "0              n20w155/",
+    "0              n20w156/",
+    "0              n20w157/",
+    "0              n21w156/",
+    "0              n21w157/",
+    "0              n21w158/",
+    "0              n22w157/",
+    "0              n22w158/",
+    "0              n22w159/",
+    "0              n22w160/",
+    "0              n22w161/",
+    "0              n23w160/",
+    "0              n23w161/",
+    "0              n25w081/",
+    "0              n25w082/",
+    "0              n25w083/",
+    "0              n26w081/",
+    "0              n26w082/",
+    "0              n26w098/",
+    "0              n26w099/",
+    "0              n27w081/",
+    "0              n27w082/",
+    "0              n27w083/",
+    "0              n27w098/",
+    "0              n27w099/",
+    "0              n27w100/",
+    "0              n28w081/",
+    "0              n28w082/",
+    "0              n28w083/",
+    "0              n28w097/",
+    "0              n28w098/",
+    "0              n28w099/",
+    "0              n28w100/",
+    "0              n28w101/",
+    "0              n29w081/",
+    "0              n29w082/",
+    "0              n29w083/",
+    "0              n29w090/",
+    "0              n29w096/",
+    "0              n29w097/",
+    "0              n29w098/",
+    "0              n29w099/",
+    "0              n29w100/",
+    "0              n29w101/",
+    "0              n29w104/",
+    "0              n30w081/",
+    "0              n30w082/",
+    "0              n30w083/",
+    "0              n30w084/",
+    "0              n30w085/",
+    "0              n30w086/",
+    "0              n30w089/",
+    "0              n30w090/",
+    "0              n30w091/",
+    "0              n30w092/",
+    "0              n30w093/",
+    "0              n30w094/",
+    "0              n30w095/",
+    "0              n30w096/",
+    "0              n30w097/",
+    "0              n30w098/",
+    "0              n30w099/",
+    "0              n30w100/",
+    "0              n30w101/",
+    "0              n30w102/",
+    "0              n30w103/",
+    "0              n30w104/",
+    "0              n30w105/",
+    "0              n31w082/",
+    "0              n31w083/",
+    "0              n31w084/",
+    "0              n31w085/",
+    "0              n31w086/",
+    "0              n31w087/",
+    "0              n31w088/",
+    "0              n31w089/",
+    "0              n31w090/",
+    "0              n31w091/",
+    "0              n31w092/",
+    "0              n31w093/",
+    "0              n31w094/",
+    "0              n31w095/",
+    "0              n31w096/",
+    "0              n31w097/",
+    "0              n31w098/",
+    "0              n31w099/",
+    "0              n31w100/",
+    "0              n31w101/",
+    "0              n31w102/",
+    "0              n31w103/",
+    "0              n31w104/",
+    "0              n31w105/",
+    "0              n31w106/",
+    "0              n31w107/",
+    "0              n32w081/",
+    "0              n32w082/",
+    "0              n32w083/",
+    "0              n32w084/",
+    "0              n32w085/",
+    "0              n32w086/",
+    "0              n32w087/",
+    "0              n32w088/",
+    "0              n32w089/",
+    "0              n32w090/",
+    "0              n32w091/",
+    "0              n32w092/",
+    "0              n32w093/",
+    "0              n32w094/",
+    "0              n32w095/",
+    "0              n32w096/",
+    "0              n32w097/",
+    "0              n32w098/",
+    "0              n32w099/",
+    "0              n32w100/",
+    "0              n32w101/",
+    "0              n32w102/",
+    "0              n32w103/",
+    "0              n32w104/",
+    "0              n32w105/",
+    "0              n32w106/",
+    "0              n32w107/",
+    "0              n32w108/",
+    "0              n32w109/",
+    "0              n32w110/",
+    "0              n32w111/",
+    "0              n32w112/",
+    "0              n32w113/",
+    "0              n32w114/",
+    "0              n33w080/",
+    "0              n33w081/",
+    "0              n33w082/",
+    "0              n33w083/",
+    "0              n33w084/",
+    "0              n33w085/",
+    "0              n33w086/",
+    "0              n33w087/",
+    "0              n33w088/",
+    "0              n33w089/",
+    "0              n33w090/",
+    "0              n33w091/",
+    "0              n33w092/",
+    "0              n33w093/",
+    "0              n33w094/",
+    "0              n33w095/",
+    "0              n33w096/",
+    "0              n33w097/",
+    "0              n33w098/",
+    "0              n33w099/",
+    "0              n33w100/",
+    "0              n33w101/",
+    "0              n33w102/",
+    "0              n33w103/",
+    "0              n33w104/",
+    "0              n33w105/",
+    "0              n33w106/",
+    "0              n33w107/",
+    "0              n33w108/",
+    "0              n33w109/",
+    "0              n33w110/",
+    "0              n33w111/",
+    "0              n33w112/",
+    "0              n33w113/",
+    "0              n33w114/",
+    "0              n33w115/",
+    "0              n33w116/",
+    "0              n33w117/",
+    "0              n33w118/",
+    "0              n33w119/",
+    "0              n34w078/",
+    "0              n34w079/",
+    "0              n34w080/",
+    "0              n34w081/",
+    "0              n34w082/",
+    "0              n34w083/",
+    "0              n34w084/",
+    "0              n34w085/",
+    "0              n34w086/",
+    "0              n34w087/",
+    "0              n34w088/",
+    "0              n34w089/",
+    "0              n34w090/",
+    "0              n34w091/",
+    "0              n34w092/",
+    "0              n34w093/",
+    "0              n34w094/",
+    "0              n34w095/",
+    "0              n34w096/",
+    "0              n34w097/",
+    "0              n34w098/",
+    "0              n34w099/",
+    "0              n34w100/",
+    "0              n34w101/",
+    "0              n34w102/",
+    "0              n34w103/",
+    "0              n34w104/",
+    "0              n34w105/",
+    "0              n34w106/",
+    "0              n34w107/",
+    "0              n34w108/",
+    "0              n34w109/",
+    "0              n34w110/",
+    "0              n34w111/",
+    "0              n34w112/",
+    "0              n34w113/",
+    "0              n34w114/",
+    "0              n34w115/",
+    "0              n34w116/",
+    "0              n34w117/",
+    "0              n34w118/",
+    "0              n34w119/",
+    "0              n34w120/",
+    "0              n34w121/",
+    "0              n35w076/",
+    "0              n35w077/",
+    "0              n35w078/",
+    "0              n35w079/",
+    "0              n35w080/",
+    "0              n35w081/",
+    "0              n35w082/",
+    "0              n35w083/",
+    "0              n35w084/",
+    "0              n35w085/",
+    "0              n35w086/",
+    "0              n35w087/",
+    "0              n35w088/",
+    "0              n35w089/",
+    "0              n35w090/",
+    "0              n35w091/",
+    "0              n35w092/",
+    "0              n35w093/",
+    "0              n35w094/",
+    "0              n35w095/",
+    "0              n35w096/",
+    "0              n35w097/",
+    "0              n35w098/",
+    "0              n35w099/",
+    "0              n35w100/",
+    "0              n35w101/",
+    "0              n35w102/",
+    "0              n35w103/",
+    "0              n35w104/",
+    "0              n35w105/",
+    "0              n35w106/",
+    "0              n35w107/",
+    "0              n35w108/",
+    "0              n35w109/",
+    "0              n35w110/",
+    "0              n35w111/",
+    "0              n35w112/",
+    "0              n35w113/",
+    "0              n35w114/",
+    "0              n35w115/",
+    "0              n35w116/",
+    "0              n35w117/",
+    "0              n35w118/",
+    "0              n35w119/",
+    "0              n35w120/",
+    "0              n35w121/",
+    "0              n36w076/",
+    "0              n36w077/",
+    "0              n36w078/",
+    "0              n36w079/",
+    "0              n36w080/",
+    "0              n36w081/",
+    "0              n36w082/",
+    "0              n36w083/",
+    "0              n36w084/",
+    "0              n36w085/",
+    "0              n36w086/",
+    "0              n36w087/",
+    "0              n36w088/",
+    "0              n36w089/",
+    "0              n36w090/",
+    "0              n36w091/",
+    "0              n36w092/",
+    "0              n36w093/",
+    "0              n36w094/",
+    "0              n36w095/",
+    "0              n36w096/",
+    "0              n36w097/",
+    "0              n36w098/",
+    "0              n36w099/",
+    "0              n36w100/",
+    "0              n36w101/",
+    "0              n36w102/",
+    "0              n36w103/",
+    "0              n36w104/",
+    "0              n36w105/",
+    "0              n36w106/",
+    "0              n36w107/",
+    "0              n36w108/",
+    "0              n36w109/",
+    "0              n36w110/",
+    "0              n36w111/",
+    "0              n36w112/",
+    "0              n36w113/",
+    "0              n36w114/",
+    "0              n36w115/",
+    "0              n36w116/",
+    "0              n36w117/",
+    "0              n36w118/",
+    "0              n36w119/",
+    "0              n36w120/",
+    "0              n36w121/",
+    "0              n36w122/",
+    "0              n37w076/",
+    "0              n37w077/",
+    "0              n37w078/",
+    "0              n37w079/",
+    "0              n37w080/",
+    "0              n37w081/",
+    "0              n37w082/",
+    "0              n37w083/",
+    "0              n37w084/",
+    "0              n37w085/",
+    "0              n37w086/",
+    "0              n37w087/",
+    "0              n37w088/",
+    "0              n37w089/",
+    "0              n37w090/",
+    "0              n37w091/",
+    "0              n37w092/",
+    "0              n37w093/",
+    "0              n37w094/",
+    "0              n37w095/",
+    "0              n37w096/",
+    "0              n37w097/",
+    "0              n37w098/",
+    "0              n37w099/",
+    "0              n37w100/",
+    "0              n37w101/",
+    "0              n37w102/",
+    "0              n37w103/",
+    "0              n37w104/",
+    "0              n37w105/",
+    "0              n37w106/",
+    "0              n37w107/",
+    "0              n37w108/",
+    "0              n37w109/",
+    "0              n37w110/",
+    "0              n37w111/",
+    "0              n37w112/",
+    "0              n37w113/",
+    "0              n37w114/",
+    "0              n37w115/",
+    "0              n37w116/",
+    "0              n37w117/",
+    "0              n37w118/",
+    "0              n37w119/",
+    "0              n37w120/",
+    "0              n37w121/",
+    "0              n37w122/",
+    "0              n37w123/",
+    "0              n38w076/",
+    "0              n38w077/",
+    "0              n38w078/",
+    "0              n38w079/",
+    "0              n38w080/",
+    "0              n38w081/",
+    "0              n38w082/",
+    "0              n38w083/",
+    "0              n38w084/",
+    "0              n38w085/",
+    "0              n38w086/",
+    "0              n38w087/",
+    "0              n38w088/",
+    "0              n38w089/",
+    "0              n38w090/",
+    "0              n38w091/",
+    "0              n38w092/",
+    "0              n38w093/",
+    "0              n38w094/",
+    "0              n38w095/",
+    "0              n38w096/",
+    "0              n38w097/",
+    "0              n38w098/",
+    "0              n38w099/",
+    "0              n38w100/",
+    "0              n38w101/",
+    "0              n38w102/",
+    "0              n38w103/",
+    "0              n38w104/",
+    "0              n38w105/",
+    "0              n38w106/",
+    "0              n38w107/",
+    "0              n38w108/",
+    "0              n38w109/",
+    "0              n38w110/",
+    "0              n38w111/",
+    "0              n38w112/",
+    "0              n38w113/",
+    "0              n38w114/",
+    "0              n38w115/",
+    "0              n38w116/",
+    "0              n38w117/",
+    "0              n38w118/",
+    "0              n38w119/",
+    "0              n38w120/",
+    "0              n38w121/",
+    "0              n38w122/",
+    "0              n38w123/",
+    "0              n38w124/",
+    "0              n39w075/",
+    "0              n39w076/",
+    "0              n39w077/",
+    "0              n39w078/",
+    "0              n39w079/",
+    "0              n39w080/",
+    "0              n39w081/",
+    "0              n39w082/",
+    "0              n39w083/",
+    "0              n39w084/",
+    "0              n39w085/",
+    "0              n39w086/",
+    "0              n39w087/",
+    "0              n39w088/",
+    "0              n39w089/",
+    "0              n39w090/",
+    "0              n39w091/",
+    "0              n39w092/",
+    "0              n39w093/",
+    "0              n39w094/",
+    "0              n39w095/",
+    "0              n39w096/",
+    "0              n39w097/",
+    "0              n39w098/",
+    "0              n39w099/",
+    "0              n39w100/",
+    "0              n39w101/",
+    "0              n39w102/",
+    "0              n39w103/",
+    "0              n39w104/",
+    "0              n39w105/",
+    "0              n39w106/",
+    "0              n39w107/",
+    "0              n39w108/",
+    "0              n39w109/",
+    "0              n39w110/",
+    "0              n39w111/",
+    "0              n39w112/",
+    "0              n39w113/",
+    "0              n39w114/",
+    "0              n39w115/",
+    "0              n39w116/",
+    "0              n39w117/",
+    "0              n39w118/",
+    "0              n39w119/",
+    "0              n39w120/",
+    "0              n39w121/",
+    "0              n39w122/",
+    "0              n39w123/",
+    "0              n39w124/",
+    "0              n40w075/",
+    "0              n40w076/",
+    "0              n40w077/",
+    "0              n40w078/",
+    "0              n40w079/",
+    "0              n40w080/",
+    "0              n40w081/",
+    "0              n40w082/",
+    "0              n40w083/",
+    "0              n40w084/",
+    "0              n40w085/",
+    "0              n40w086/",
+    "0              n40w087/",
+    "0              n40w088/",
+    "0              n40w089/",
+    "0              n40w090/",
+    "0              n40w091/",
+    "0              n40w092/",
+    "0              n40w093/",
+    "0              n40w094/",
+    "0              n40w095/",
+    "0              n40w096/",
+    "0              n40w097/",
+    "0              n40w098/",
+    "0              n40w099/",
+    "0              n40w100/",
+    "0              n40w101/",
+    "0              n40w102/",
+    "0              n40w103/",
+    "0              n40w104/",
+    "0              n40w105/",
+    "0              n40w106/",
+    "0              n40w107/",
+    "0              n40w108/",
+    "0              n40w109/",
+    "0              n40w110/",
+    "0              n40w111/",
+    "0              n40w112/",
+    "0              n40w113/",
+    "0              n40w114/",
+    "0              n40w115/",
+    "0              n40w116/",
+    "0              n40w117/",
+    "0              n40w118/",
+    "0              n40w119/",
+    "0              n40w120/",
+    "0              n40w121/",
+    "0              n40w122/",
+    "0              n40w123/",
+    "0              n40w124/",
+    "0              n40w125/",
+    "0              n41w073/",
+    "0              n41w074/",
+    "0              n41w075/",
+    "0              n41w076/",
+    "0              n41w077/",
+    "0              n41w078/",
+    "0              n41w079/",
+    "0              n41w080/",
+    "0              n41w081/",
+    "0              n41w082/",
+    "0              n41w083/",
+    "0              n41w084/",
+    "0              n41w085/",
+    "0              n41w086/",
+    "0              n41w087/",
+    "0              n41w088/",
+    "0              n41w089/",
+    "0              n41w090/",
+    "0              n41w091/",
+    "0              n41w092/",
+    "0              n41w093/",
+    "0              n41w094/",
+    "0              n41w095/",
+    "0              n41w096/",
+    "0              n41w097/",
+    "0              n41w098/",
+    "0              n41w099/",
+    "0              n41w100/",
+    "0              n41w101/",
+    "0              n41w102/",
+    "0              n41w103/",
+    "0              n41w104/",
+    "0              n41w105/",
+    "0              n41w106/",
+    "0              n41w107/",
+    "0              n41w108/",
+    "0              n41w109/",
+    "0              n41w110/",
+    "0              n41w111/",
+    "0              n41w112/",
+    "0              n41w113/",
+    "0              n41w114/",
+    "0              n41w115/",
+    "0              n41w116/",
+    "0              n41w117/",
+    "0              n41w118/",
+    "0              n41w119/",
+    "0              n41w120/",
+    "0              n41w121/",
+    "0              n41w122/",
+    "0              n41w123/",
+    "0              n41w124/",
+    "0              n41w125/",
+    "0              n42w070/",
+    "0              n42w071/",
+    "0              n42w072/",
+    "0              n42w073/",
+    "0              n42w074/",
+    "0              n42w075/",
+    "0              n42w076/",
+    "0              n42w077/",
+    "0              n42w078/",
+    "0              n42w079/",
+    "0              n42w080/",
+    "0              n42w081/",
+    "0              n42w082/",
+    "0              n42w083/",
+    "0              n42w084/",
+    "0              n42w085/",
+    "0              n42w086/",
+    "0              n42w087/",
+    "0              n42w088/",
+    "0              n42w089/",
+    "0              n42w090/",
+    "0              n42w091/",
+    "0              n42w092/",
+    "0              n42w093/",
+    "0              n42w094/",
+    "0              n42w095/",
+    "0              n42w096/",
+    "0              n42w097/",
+    "0              n42w098/",
+    "0              n42w099/",
+    "0              n42w100/",
+    "0              n42w101/",
+    "0              n42w102/",
+    "0              n42w103/",
+    "0              n42w104/",
+    "0              n42w105/",
+    "0              n42w106/",
+    "0              n42w107/",
+    "0              n42w108/",
+    "0              n42w109/",
+    "0              n42w110/",
+    "0              n42w111/",
+    "0              n42w112/",
+    "0              n42w113/",
+    "0              n42w114/",
+    "0              n42w115/",
+    "0              n42w116/",
+    "0              n42w117/",
+    "0              n42w118/",
+    "0              n42w119/",
+    "0              n42w120/",
+    "0              n42w121/",
+    "0              n42w122/",
+    "0              n42w123/",
+    "0              n42w124/",
+    "0              n42w125/",
+    "0              n43w071/",
+    "0              n43w072/",
+    "0              n43w073/",
+    "0              n43w074/",
+    "0              n43w075/",
+    "0              n43w076/",
+    "0              n43w077/",
+    "0              n43w078/",
+    "0              n43w079/",
+    "0              n43w080/",
+    "0              n43w081/",
+    "0              n43w082/",
+    "0              n43w083/",
+    "0              n43w084/",
+    "0              n43w085/",
+    "0              n43w086/",
+    "0              n43w087/",
+    "0              n43w088/",
+    "0              n43w089/",
+    "0              n43w090/",
+    "0              n43w091/",
+    "0              n43w092/",
+    "0              n43w093/",
+    "0              n43w094/",
+    "0              n43w095/",
+    "0              n43w096/",
+    "0              n43w097/",
+    "0              n43w098/",
+    "0              n43w099/",
+    "0              n43w100/",
+    "0              n43w101/",
+    "0              n43w102/",
+    "0              n43w103/",
+    "0              n43w104/",
+    "0              n43w105/",
+    "0              n43w106/",
+    "0              n43w107/",
+    "0              n43w108/",
+    "0              n43w109/",
+    "0              n43w110/",
+    "0              n43w111/",
+    "0              n43w112/",
+    "0              n43w113/",
+    "0              n43w114/",
+    "0              n43w115/",
+    "0              n43w116/",
+    "0              n43w117/",
+    "0              n43w118/",
+    "0              n43w119/",
+    "0              n43w120/",
+    "0              n43w121/",
+    "0              n43w122/",
+    "0              n43w123/",
+    "0              n43w124/",
+    "0              n43w125/",
+    "0              n44w069/",
+    "0              n44w070/",
+    "0              n44w071/",
+    "0              n44w072/",
+    "0              n44w073/",
+    "0              n44w074/",
+    "0              n44w075/",
+    "0              n44w076/",
+    "0              n44w077/",
+    "0              n44w078/",
+    "0              n44w079/",
+    "0              n44w080/",
+    "0              n44w081/",
+    "0              n44w083/",
+    "0              n44w084/",
+    "0              n44w085/",
+    "0              n44w086/",
+    "0              n44w087/",
+    "0              n44w088/",
+    "0              n44w089/",
+    "0              n44w090/",
+    "0              n44w091/",
+    "0              n44w092/",
+    "0              n44w093/",
+    "0              n44w094/",
+    "0              n44w095/",
+    "0              n44w096/",
+    "0              n44w097/",
+    "0              n44w098/",
+    "0              n44w099/",
+    "0              n44w100/",
+    "0              n44w101/",
+    "0              n44w102/",
+    "0              n44w103/",
+    "0              n44w104/",
+    "0              n44w105/",
+    "0              n44w106/",
+    "0              n44w107/",
+    "0              n44w108/",
+    "0              n44w109/",
+    "0              n44w110/",
+    "0              n44w111/",
+    "0              n44w112/",
+    "0              n44w113/",
+    "0              n44w114/",
+    "0              n44w115/",
+    "0              n44w116/",
+    "0              n44w117/",
+    "0              n44w118/",
+    "0              n44w119/",
+    "0              n44w120/",
+    "0              n44w121/",
+    "0              n44w122/",
+    "0              n44w123/",
+    "0              n44w124/",
+    "0              n44w125/",
+    "0              n45w067/",
+    "0              n45w068/",
+    "0              n45w069/",
+    "0              n45w070/",
+    "0              n45w071/",
+    "0              n45w072/",
+    "0              n45w073/",
+    "0              n45w074/",
+    "0              n45w075/",
+    "0              n45w076/",
+    "0              n45w077/",
+    "0              n45w083/",
+    "0              n45w084/",
+    "0              n45w085/",
+    "0              n45w086/",
+    "0              n45w087/",
+    "0              n45w088/",
+    "0              n45w089/",
+    "0              n45w090/",
+    "0              n45w091/",
+    "0              n45w092/",
+    "0              n45w093/",
+    "0              n45w094/",
+    "0              n45w095/",
+    "0              n45w096/",
+    "0              n45w097/",
+    "0              n45w098/",
+    "0              n45w099/",
+    "0              n45w100/",
+    "0              n45w101/",
+    "0              n45w102/",
+    "0              n45w103/",
+    "0              n45w104/",
+    "0              n45w105/",
+    "0              n45w106/",
+    "0              n45w107/",
+    "0              n45w108/",
+    "0              n45w109/",
+    "0              n45w110/",
+    "0              n45w111/",
+    "0              n45w112/",
+    "0              n45w113/",
+    "0              n45w114/",
+    "0              n45w115/",
+    "0              n45w116/",
+    "0              n45w117/",
+    "0              n45w118/",
+    "0              n45w119/",
+    "0              n45w120/",
+    "0              n45w121/",
+    "0              n45w122/",
+    "0              n45w123/",
+    "0              n45w124/",
+    "0              n45w125/",
+    "0              n46w068/",
+    "0              n46w069/",
+    "0              n46w070/",
+    "0              n46w071/",
+    "0              n46w072/",
+    "0              n46w073/",
+    "0              n46w074/",
+    "0              n46w075/",
+    "0              n46w084/",
+    "0              n46w085/",
+    "0              n46w086/",
+    "0              n46w087/",
+    "0              n46w088/",
+    "0              n46w089/",
+    "0              n46w090/",
+    "0              n46w091/",
+    "0              n46w092/",
+    "0              n46w093/",
+    "0              n46w094/",
+    "0              n46w095/",
+    "0              n46w096/",
+    "0              n46w097/",
+    "0              n46w098/",
+    "0              n46w099/",
+    "0              n46w100/",
+    "0              n46w101/",
+    "0              n46w102/",
+    "0              n46w103/",
+    "0              n46w104/",
+    "0              n46w105/",
+    "0              n46w106/",
+    "0              n46w107/",
+    "0              n46w108/",
+    "0              n46w109/",
+    "0              n46w110/",
+    "0              n46w111/",
+    "0              n46w112/",
+    "0              n46w113/",
+    "0              n46w114/",
+    "0              n46w115/",
+    "0              n46w116/",
+    "0              n46w117/",
+    "0              n46w118/",
+    "0              n46w119/",
+    "0              n46w120/",
+    "0              n46w121/",
+    "0              n46w122/",
+    "0              n46w123/",
+    "0              n46w124/",
+    "0              n46w125/",
+    "0              n47w068/",
+    "0              n47w069/",
+    "0              n47w070/",
+    "0              n47w071/",
+    "0              n47w084/",
+    "0              n47w085/",
+    "0              n47w086/",
+    "0              n47w087/",
+    "0              n47w088/",
+    "0              n47w089/",
+    "0              n47w090/",
+    "0              n47w091/",
+    "0              n47w092/",
+    "0              n47w093/",
+    "0              n47w094/",
+    "0              n47w095/",
+    "0              n47w096/",
+    "0              n47w097/",
+    "0              n47w098/",
+    "0              n47w099/",
+    "0              n47w100/",
+    "0              n47w101/",
+    "0              n47w102/",
+    "0              n47w103/",
+    "0              n47w104/",
+    "0              n47w105/",
+    "0              n47w106/",
+    "0              n47w107/",
+    "0              n47w108/",
+    "0              n47w109/",
+    "0              n47w110/",
+    "0              n47w111/",
+    "0              n47w112/",
+    "0              n47w113/",
+    "0              n47w114/",
+    "0              n47w115/",
+    "0              n47w116/",
+    "0              n47w117/",
+    "0              n47w118/",
+    "0              n47w119/",
+    "0              n47w120/",
+    "0              n47w121/",
+    "0              n47w122/",
+    "0              n47w123/",
+    "0              n47w124/",
+    "0              n47w125/",
+    "0              n48w068/",
+    "0              n48w069/",
+    "0              n48w070/",
+    "0              n48w087/",
+    "0              n48w088/",
+    "0              n48w089/",
+    "0              n48w090/",
+    "0              n48w091/",
+    "0              n48w092/",
+    "0              n48w093/",
+    "0              n48w094/",
+    "0              n48w095/",
+    "0              n48w096/",
+    "0              n48w097/",
+    "0              n48w098/",
+    "0              n48w099/",
+    "0              n48w100/",
+    "0              n48w101/",
+    "0              n48w102/",
+    "0              n48w103/",
+    "0              n48w104/",
+    "0              n48w105/",
+    "0              n48w106/",
+    "0              n48w107/",
+    "0              n48w108/",
+    "0              n48w109/",
+    "0              n48w110/",
+    "0              n48w111/",
+    "0              n48w112/",
+    "0              n48w113/",
+    "0              n48w114/",
+    "0              n48w115/",
+    "0              n48w116/",
+    "0              n48w117/",
+    "0              n48w118/",
+    "0              n48w119/",
+    "0              n48w120/",
+    "0              n48w121/",
+    "0              n48w122/",
+    "0              n48w123/",
+    "0              n48w124/",
+    "0              n48w125/",
+    "0              n49w089/",
+    "0              n49w090/",
+    "0              n49w091/",
+    "0              n49w092/",
+    "0              n49w093/",
+    "0              n49w094/",
+    "0              n49w095/",
+    "0              n49w096/",
+    "0              n49w097/",
+    "0              n49w098/",
+    "0              n49w099/",
+    "0              n49w100/",
+    "0              n49w101/",
+    "0              n49w102/",
+    "0              n49w103/",
+    "0              n49w104/",
+    "0              n49w105/",
+    "0              n49w106/",
+    "0              n49w107/",
+    "0              n49w108/",
+    "0              n49w109/",
+    "0              n49w110/",
+    "0              n49w111/",
+    "0              n49w112/",
+    "0              n49w113/",
+    "0              n49w114/",
+    "0              n49w115/",
+    "0              n49w116/",
+    "0              n49w117/",
+    "0              n49w118/",
+    "0              n49w119/",
+    "0              n49w120/",
+    "0              n49w121/",
+    "0              n49w122/",
+    "0              n49w123/",
+    "0              n49w124/",
+    "0              n49w125/",
+    "0              n50w095/",
+    "0              n50w096/",
+    "0              n50w097/",
+    "0              n50w098/",
+    "0              n50w099/",
+    "0              n50w100/",
+    "0              n50w101/",
+    "0              n50w107/",
+    "0              n50w108/",
+    "0              n50w122/",
+    "0              n50w123/",
+    "0              n50w124/",
+    "0              n52e177/",
+    "0              n52e178/",
+    "0              n52e179/",
+    "0              n52w174/",
+    "0              n52w176/",
+    "0              n52w177/",
+    "0              n52w178/",
+    "0              n52w179/",
+    "0              n52w180/",
+    "0              n53e172/",
+    "0              n53e173/",
+    "0              n53e174/",
+    "0              n53e175/",
+    "0              n53e177/",
+    "0              n53e178/",
+    "0              n53e179/",
+    "0              n53w169/",
+    "0              n53w170/",
+    "0              n53w171/",
+    "0              n53w172/",
+    "0              n53w173/",
+    "0              n53w174/",
+    "0              n53w175/",
+    "0              n53w176/",
+    "0              n53w177/",
+    "0              n54e172/",
+    "0              n54e173/",
+    "0              n54w167/",
+    "0              n54w168/",
+    "0              n54w169/",
+    "0              n54w170/",
+    "0              n55w131/",
+    "0              n55w132/",
+    "0              n55w133/",
+    "0              n55w134/",
+    "0              n55w160/",
+    "0              n55w161/",
+    "0              n55w162/",
+    "0              n55w163/",
+    "0              n55w164/",
+    "0              n55w165/",
+    "0              n55w166/",
+    "0              n55w167/",
+    "0              n56w130/",
+    "0              n56w131/",
+    "0              n56w132/",
+    "0              n56w133/",
+    "0              n56w134/",
+    "0              n56w135/",
+    "0              n56w156/",
+    "0              n56w157/",
+    "0              n56w159/",
+    "0              n56w160/",
+    "0              n56w161/",
+    "0              n56w162/",
+    "0              n56w163/",
+    "0              n56w164/",
+    "0              n57w131/",
+    "0              n57w132/",
+    "0              n57w133/",
+    "0              n57w134/",
+    "0              n57w135/",
+    "0              n57w136/",
+    "0              n57w153/",
+    "0              n57w154/",
+    "0              n57w155/",
+    "0              n57w156/",
+    "0              n57w157/",
+    "0              n57w158/",
+    "0              n57w159/",
+    "0              n57w160/",
+    "0              n57w161/",
+    "0              n57w162/",
+    "0              n57w170/",
+    "0              n57w171/",
+    "0              n58w133/",
+    "0              n58w134/",
+    "0              n58w135/",
+    "0              n58w136/",
+    "0              n58w137/",
+    "0              n58w153/",
+    "0              n58w154/",
+    "0              n58w155/",
+    "0              n58w156/",
+    "0              n58w157/",
+    "0              n58w158/",
+    "0              n58w159/",
+    "0              n58w170/",
+    "0              n58w171/",
+    "0              n59w134/",
+    "0              n59w135/",
+    "0              n59w136/",
+    "0              n59w137/",
+    "0              n59w138/",
+    "0              n59w139/",
+    "0              n59w152/",
+    "0              n59w153/",
+    "0              n59w154/",
+    "0              n59w155/",
+    "0              n59w156/",
+    "0              n59w157/",
+    "0              n59w158/",
+    "0              n59w159/",
+    "0              n59w160/",
+    "0              n59w161/",
+    "0              n59w162/",
+    "0              n59w163/",
+    "0              n60w135/",
+    "0              n60w136/",
+    "0              n60w137/",
+    "0              n60w138/",
+    "0              n60w139/",
+    "0              n60w140/",
+    "0              n60w141/",
+    "0              n60w142/",
+    "0              n60w143/",
+    "0              n60w144/",
+    "0              n60w145/",
+    "0              n60w146/",
+    "0              n60w147/",
+    "0              n60w148/",
+    "0              n60w149/",
+    "0              n60w150/",
+    "0              n60w151/",
+    "0              n60w152/",
+    "0              n60w153/",
+    "0              n60w154/",
+    "0              n60w155/",
+    "0              n60w156/",
+    "0              n60w157/",
+    "0              n60w158/",
+    "0              n60w159/",
+    "0              n60w160/",
+    "0              n60w161/",
+    "0              n60w162/",
+    "0              n60w163/",
+    "0              n60w164/",
+    "0              n60w165/",
+    "0              n60w166/",
+    "0              n60w167/",
+    "0              n60w168/",
+    "0              n61w140/",
+    "0              n61w141/",
+    "0              n61w142/",
+    "0              n61w143/",
+    "0              n61w144/",
+    "0              n61w145/",
+    "0              n61w146/",
+    "0              n61w147/",
+    "0              n61w148/",
+    "0              n61w149/",
+    "0              n61w150/",
+    "0              n61w151/",
+    "0              n61w152/",
+    "0              n61w153/",
+    "0              n61w154/",
+    "0              n61w155/",
+    "0              n61w156/",
+    "0              n61w157/",
+    "0              n61w158/",
+    "0              n61w159/",
+    "0              n61w160/",
+    "0              n61w161/",
+    "0              n61w162/",
+    "0              n61w163/",
+    "0              n61w164/",
+    "0              n61w165/",
+    "0              n61w166/",
+    "0              n61w167/",
+    "0              n61w168/",
+    "0              n61w173/",
+    "0              n61w174/",
+    "0              n62w142/",
+    "0              n62w143/",
+    "0              n62w144/",
+    "0              n62w145/",
+    "0              n62w146/",
+    "0              n62w147/",
+    "0              n62w148/",
+    "0              n62w149/",
+    "0              n62w150/",
+    "0              n62w151/",
+    "0              n62w152/",
+    "0              n62w153/",
+    "0              n62w154/",
+    "0              n62w155/",
+    "0              n62w156/",
+    "0              n62w157/",
+    "0              n62w158/",
+    "0              n62w159/",
+    "0              n62w160/",
+    "0              n62w161/",
+    "0              n62w162/",
+    "0              n62w163/",
+    "0              n62w164/",
+    "0              n62w165/",
+    "0              n62w166/",
+    "0              n62w167/",
+    "0              n63w142/",
+    "0              n63w143/",
+    "0              n63w144/",
+    "0              n63w145/",
+    "0              n63w146/",
+    "0              n63w147/",
+    "0              n63w148/",
+    "0              n63w149/",
+    "0              n63w150/",
+    "0              n63w151/",
+    "0              n63w152/",
+    "0              n63w153/",
+    "0              n63w154/",
+    "0              n63w155/",
+    "0              n63w156/",
+    "0              n63w157/",
+    "0              n63w158/",
+    "0              n63w159/",
+    "0              n63w160/",
+    "0              n63w161/",
+    "0              n63w162/",
+    "0              n63w163/",
+    "0              n63w164/",
+    "0              n63w165/",
+    "0              n63w166/",
+    "0              n63w167/",
+    "0              n63w170/",
+    "0              n64w141/",
+    "0              n64w142/",
+    "0              n64w143/",
+    "0              n64w144/",
+    "0              n64w145/",
+    "0              n64w146/",
+    "0              n64w147/",
+    "0              n64w148/",
+    "0              n64w149/",
+    "0              n64w150/",
+    "0              n64w151/",
+    "0              n64w152/",
+    "0              n64w153/",
+    "0              n64w154/",
+    "0              n64w155/",
+    "0              n64w156/",
+    "0              n64w157/",
+    "0              n64w158/",
+    "0              n64w159/",
+    "0              n64w160/",
+    "0              n64w161/",
+    "0              n64w162/",
+    "0              n64w163/",
+    "0              n64w164/",
+    "0              n64w165/",
+    "0              n64w169/",
+    "0              n64w170/",
+    "0              n64w171/",
+    "0              n64w172/",
+    "0              n65w141/",
+    "0              n65w142/",
+    "0              n65w143/",
+    "0              n65w144/",
+    "0              n65w145/",
+    "0              n65w146/",
+    "0              n65w147/",
+    "0              n65w148/",
+    "0              n65w149/",
+    "0              n65w150/",
+    "0              n65w151/",
+    "0              n65w152/",
+    "0              n65w153/",
+    "0              n65w154/",
+    "0              n65w155/",
+    "0              n65w156/",
+    "0              n65w157/",
+    "0              n65w158/",
+    "0              n65w159/",
+    "0              n65w160/",
+    "0              n65w161/",
+    "0              n65w162/",
+    "0              n65w163/",
+    "0              n65w164/",
+    "0              n65w165/",
+    "0              n65w166/",
+    "0              n65w167/",
+    "0              n66w141/",
+    "0              n66w142/",
+    "0              n66w143/",
+    "0              n66w144/",
+    "0              n66w145/",
+    "0              n66w146/",
+    "0              n66w147/",
+    "0              n66w148/",
+    "0              n66w149/",
+    "0              n66w150/",
+    "0              n66w151/",
+    "0              n66w152/",
+    "0              n66w153/",
+    "0              n66w154/",
+    "0              n66w155/",
+    "0              n66w156/",
+    "0              n66w157/",
+    "0              n66w158/",
+    "0              n66w159/",
+    "0              n66w160/",
+    "0              n66w161/",
+    "0              n66w162/",
+    "0              n66w163/",
+    "0              n66w164/",
+    "0              n66w165/",
+    "0              n66w166/",
+    "0              n66w167/",
+    "0              n66w168/",
+    "0              n66w169/",
+    "0              n67w141/",
+    "0              n67w142/",
+    "0              n67w143/",
+    "0              n67w144/",
+    "0              n67w145/",
+    "0              n67w146/",
+    "0              n67w147/",
+    "0              n67w148/",
+    "0              n67w149/",
+    "0              n67w150/",
+    "0              n67w151/",
+    "0              n67w152/",
+    "0              n67w153/",
+    "0              n67w154/",
+    "0              n67w155/",
+    "0              n67w156/",
+    "0              n67w157/",
+    "0              n67w158/",
+    "0              n67w159/",
+    "0              n67w160/",
+    "0              n67w161/",
+    "0              n67w162/",
+    "0              n67w163/",
+    "0              n67w164/",
+    "0              n67w165/",
+    "0              n67w166/",
+    "0              n67w167/",
+    "0              n67w168/",
+    "0              n68w141/",
+    "0              n68w142/",
+    "0              n68w143/",
+    "0              n68w144/",
+    "0              n68w145/",
+    "0              n68w146/",
+    "0              n68w147/",
+    "0              n68w148/",
+    "0              n68w149/",
+    "0              n68w150/",
+    "0              n68w151/",
+    "0              n68w152/",
+    "0              n68w153/",
+    "0              n68w154/",
+    "0              n68w155/",
+    "0              n68w156/",
+    "0              n68w157/",
+    "0              n68w158/",
+    "0              n68w159/",
+    "0              n68w160/",
+    "0              n68w161/",
+    "0              n68w162/",
+    "0              n68w163/",
+    "0              n68w164/",
+    "0              n68w165/",
+    "0              n68w166/",
+    "0              n69w141/",
+    "0              n69w142/",
+    "0              n69w143/",
+    "0              n69w144/",
+    "0              n69w145/",
+    "0              n69w146/",
+    "0              n69w147/",
+    "0              n69w148/",
+    "0              n69w149/",
+    "0              n69w150/",
+    "0              n69w151/",
+    "0              n69w152/",
+    "0              n69w153/",
+    "0              n69w154/",
+    "0              n69w155/",
+    "0              n69w156/",
+    "0              n69w157/",
+    "0              n69w158/",
+    "0              n69w159/",
+    "0              n69w160/",
+    "0              n69w161/",
+    "0              n69w162/",
+    "0              n69w163/",
+    "0              n69w164/",
+    "0              n69w165/",
+    "0              n69w166/",
+    "0              n69w167/",
+    "0              n70w141/",
+    "0              n70w142/",
+    "0              n70w143/",
+    "0              n70w144/",
+    "0              n70w145/",
+    "0              n70w146/",
+    "0              n70w147/",
+    "0              n70w148/",
+    "0              n70w149/",
+    "0              n70w150/",
+    "0              n70w151/",
+    "0              n70w152/",
+    "0              n70w153/",
+    "0              n70w154/",
+    "0              n70w155/",
+    "0              n70w156/",
+    "0              n70w157/",
+    "0              n70w158/",
+    "0              n70w159/",
+    "0              n70w160/",
+    "0              n70w161/",
+    "0              n70w162/",
+    "0              n70w163/",
+    "0              n70w164/",
+    "0              n70w165/",
+    "0              n71w143/",
+    "0              n71w144/",
+    "0              n71w145/",
+    "0              n71w146/",
+    "0              n71w147/",
+    "0              n71w148/",
+    "0              n71w149/",
+    "0              n71w150/",
+    "0              n71w151/",
+    "0              n71w152/",
+    "0              n71w153/",
+    "0              n71w154/",
+    "0              n71w155/",
+    "0              n71w156/",
+    "0              n71w157/",
+    "0              n71w158/",
+    "0              n71w159/",
+    "0              n71w160/",
+    "0              n71w161/",
+    "0              n71w162/",
+    "0              n71w163/",
+    "0              n71w164/",
+    "0              n72w155/",
+    "0              n72w156/",
+    "0              n72w157/",
+    "0              n72w158/",
+    "0              s14w170/",
+    "0              s14w171/"
+  )
+  
+  
+  # sub out HTML copy pattern with vsi URL
+  t2 <- gsub("0              ",
+             "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/",
+             t)
+  
+  # add file paths following NED scheme
+  t3 <- paste0(t2,
+               "USGS_13_",
+               basename(t2),
+               ".tif")
+  
+  # Write table to data-raw
+  write.table(t3,
+              paste0(base_dir, "/ned_list_USGS_13.txt"),
+              row.names = FALSE,
+              col.names = FALSE,
+              quote = FALSE)
+  
+  
+}
+# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem"
+# download_3dep_vrt(dem_base_dir)
+# 
+# # Create meta data object of three NED resoruces
+# ned <- data.frame(rbind(
+#   # c(id        = "USGS_1",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1",
+#   #   varname   = "30m elevation",
+#   #   long_name = "30m (1 arcsec) National Elevation Dataset",
+#   #   units     = "m"),
+#   # 
+#   # c(id        = "USGS_2",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2",
+#   #   varname   = "60m elevation",
+#   #   long_name = "60m (2 arcsec) National Elevation Dataset Alaska",
+#   #   units     = "m"),
+#   
+#   c(id        = "USGS_13",
+#     URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13",
+#     varname   = "10m elevation",
+#     long_name = "10m (1/3th arcsec) National Elevation Dataset",
+#     units     = "m")
+# ))
+# 
+# 
+# 
+# 
+# # Loop over the three resolutions
+# for (i in 1:length(ned)) {
+#   i = 1
+#   
+#   # Define output text file path
+#   txt_file  <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt")
+#   
+#   # Define output VRT path
+#   vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt"))
+#   
+#   # If VRT does NOT exist, build VRT
+#   if (!file.exists(vrt_file)) {
+#     
+#     # read the corresponding index.gpkg
+#     files <- sf::read_sf(ned$domain_url[i])
+#     DEM_UR
+#     DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     # Build full HTTPS paths to "./current/"
+#     files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location)))
+#     
+#     files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     
+#     # write list of files to text file
+#     write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
+#     
+#     # build VRT from text file input using GDAL system call ...
+#     system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file))
+#   }
+#   
+#   logger::log_info("Finished ", ned$id[i], "...")
+# }
+
+# 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>% 
+#     #   dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     # 
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         crosswalk_id = crosswalk_id,
+#         pct        = extension_pct,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>%  
+#     #   dplyr::rename(!!sym(crosswalk_id) := hy_id)
+#     
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+# utility function for getting transects extended and 
+# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
+# returns the extended version of the transects 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>% 
+#       dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         pct        = EXTENSION_PCT,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%  
+#       dplyr::rename(!!sym(crosswalk_id) := hy_id)
+# 
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+
+

From 8c094982a9c7ae193fefa956111a778cc4617768 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Mon, 18 Nov 2024 14:42:57 -0800
Subject: [PATCH 63/64] building new cs_runner format in cs_runner2 to replace
 first version of cs_runner

---
 runners/cs_runner2/00_driver.R                |  26 ++
 runners/cs_runner2/01_transects.R             |   0
 .../cs_runner2/02_extend_transects_by_fema.R  |   0
 .../cs_runner2/03_extract_initial_cs_pts.R    |   0
 .../04_extend_transects_by_cs_attributes.R    |   0
 runners/cs_runner2/05_extract_output_cs_pts.R |   0
 runners/cs_runner2/06_inject_ml_bathymetry.R  |   0
 runners/cs_runner2/base_variables.R           |  36 ++-
 runners/cs_runner2/download_dem_from_vrt.R    | 145 ++++++++--
 .../{download_fema100.R => download_fema.R}   |   0
 runners/cs_runner2/download_ml_outputs.R      |   0
 runners/cs_runner2/finalize_outputs.R         |   0
 ...partition_fema_by_vpu.R => process_fema.R} | 264 ++++++++----------
 runners/cs_runner2/utils.R                    |  58 +++-
 14 files changed, 350 insertions(+), 179 deletions(-)
 create mode 100644 runners/cs_runner2/00_driver.R
 create mode 100644 runners/cs_runner2/01_transects.R
 create mode 100644 runners/cs_runner2/02_extend_transects_by_fema.R
 create mode 100644 runners/cs_runner2/03_extract_initial_cs_pts.R
 create mode 100644 runners/cs_runner2/04_extend_transects_by_cs_attributes.R
 create mode 100644 runners/cs_runner2/05_extract_output_cs_pts.R
 create mode 100644 runners/cs_runner2/06_inject_ml_bathymetry.R
 rename runners/cs_runner2/{download_fema100.R => download_fema.R} (100%)
 create mode 100644 runners/cs_runner2/download_ml_outputs.R
 create mode 100644 runners/cs_runner2/finalize_outputs.R
 rename runners/cs_runner2/{partition_fema_by_vpu.R => process_fema.R} (79%)

diff --git a/runners/cs_runner2/00_driver.R b/runners/cs_runner2/00_driver.R
new file mode 100644
index 0000000..cb98b6e
--- /dev/null
+++ b/runners/cs_runner2/00_driver.R
@@ -0,0 +1,26 @@
+### Run this file to have all runner scripts run in order
+
+# downloads nextgen datasets 
+source("runners/cs_runner2/config_env.R")
+
+# downloads datasets
+# - Nextgen data
+# - Reference features (for waterbody filtering)
+# - ML outputs
+# - FEMA 100 year floodplain polygons (FGBs)
+# - 3DEP DEM VRT
+source("runners/cs_runner2/download_conus_nextgen.R")
+source("runners/cs_runner2/download_conus_ref_features.R")
+source("runners/cs_runner2/download_fema.r")
+source("runners/cs_runner2/process_fema.R")
+source("runners/cs_runner2/download_dem_from_vrt.R")
+source("runners/cs_runner2/download_ml_outputs.R")
+
+# generate and upload transects datasets 
+source("runners/cs_runner2/01_transects.R")
+
+# generate and upload cross sections points datasets 
+source("runners/cs_runner2/02_cs_pts.R")
+
+# Apply machine learning topwidths and depths estimates to DEM cross section points
+source("runners/cs_runner2/03_inject_ml.R")
diff --git a/runners/cs_runner2/01_transects.R b/runners/cs_runner2/01_transects.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/02_extend_transects_by_fema.R b/runners/cs_runner2/02_extend_transects_by_fema.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/03_extract_initial_cs_pts.R b/runners/cs_runner2/03_extract_initial_cs_pts.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/04_extend_transects_by_cs_attributes.R b/runners/cs_runner2/04_extend_transects_by_cs_attributes.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/05_extract_output_cs_pts.R b/runners/cs_runner2/05_extract_output_cs_pts.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/06_inject_ml_bathymetry.R b/runners/cs_runner2/06_inject_ml_bathymetry.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R
index 79792c2..c716e29 100644
--- a/runners/cs_runner2/base_variables.R
+++ b/runners/cs_runner2/base_variables.R
@@ -99,13 +99,45 @@ CONUS_FEMA_GPKG_PATH       <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.g
 # ---- CONUS CS extension polygons ----
 # -------------------------------------------------------------------------------------
 
+# Meta data object of National Elevation Datasets (NED)
+NED_META <- data.frame(rbind(
+  # c(id        = "USGS_1",
+  #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1",
+  #   varname   = "30m elevation",
+  #   long_name = "30m (1 arcsec) National Elevation Dataset",
+  #   units     = "m",
+  #   s3_uri    = "s3://prd-tnm/StagedProducts/Elevation/1/TIFF/",
+  #   vrt_name  = "USGS_Seamless_DEM_1.vrt"
+  # )
+  # 
+  # # c(id        = "USGS_2",
+  # #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2",
+  # #   varname   = "60m elevation",
+  # #   long_name = "60m (2 arcsec) National Elevation Dataset Alaska",
+  # #   units     = "m",
+  # #   s3_uri    = "s3://prd-tnm/StagedProducts/Elevation/2/TIFF/",
+  # #   vrt_name  = "USGS_Seamless_DEM_2.vrt"
+  # #   ),
+  c(id        = "USGS_13",
+    URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13",
+    varname   = "10m elevation",
+    long_name = "10m (1/3th arcsec) National Elevation Dataset",
+    units     = "m",
+    s3_uri    = "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/",
+    vrt_name  = "USGS_Seamless_DEM_13.vrt"
+    )
+  )
+)
+
+
 DEM_DIR        <- BASE_DIRS_LIST$dem_dir
 DEM_VRT_DIR    <- BASE_DIRS_LIST$dem_vrt_dir
-DEM_TIF_DIR    <- BASE_DIRS_LIST$dem_tif_dir
+# DEM_TIF_DIR    <- BASE_DIRS_LIST$dem_tif_dir
 
 # DEM URL
 DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
-
+# DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt"
+# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif
 # ----------------------------------------------------------------------------
 # ---- Cross section point extraction constant variables ----
 # ----------------------------------------------------------------------------
diff --git a/runners/cs_runner2/download_dem_from_vrt.R b/runners/cs_runner2/download_dem_from_vrt.R
index edd77bb..fc18f2c 100644
--- a/runners/cs_runner2/download_dem_from_vrt.R
+++ b/runners/cs_runner2/download_dem_from_vrt.R
@@ -1,5 +1,3 @@
-library(raster)
-library(httr)
 library(terra)
 library(dplyr)
 
@@ -8,25 +6,138 @@ library(dplyr)
 source("runners/cs_runner2/base_variables.R")
 source("runners/cs_runner2/utils.R")
 
-base_dirs   <- get_base_dir_paths(BASE_DIR)
+# specify where VRT should go
+DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir
+# # DEM_TIF_DIR <- base_dirs$dem_tif_dir
+# DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt"
+# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif
+# message("Syncing 3DEP elevation VRTs...")
+# s3_cp_cmd <- paste0("aws s3 cp ", "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt", " ", "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt", " --profile ", AWS_PROFILE, " --no-sign-request")
+# s3_cp_cmd
+# system(s3_cp_cmd)
+# "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt"
+# 
+# VRT_file <- "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt"
+# vrt_tiles <- terra::vrt_tiles(VRT_file)
+# 
+# dem <- terra::rast(vrt_tiles[999])
+# dem
+# terra::writeRaster(dem, "/Users/anguswatters/Desktop/tile1_USGS_Seamless_DEM_13.tif", overwrite = T)
+# length(vrt_tiles)
+# s3://lynker-nypa-inputs/ryan_test/
+  
 
-DEM_VRT_DIR <- base_dirs$dem_vrt_dir
-DEM_TIF_DIR <- base_dirs$dem_tif_dir
+for (i in seq_along(nrow(NED_META))) {
+  
+  NED_META
+  i = 1
+  ned_uri         <- NED_META$s3_uri[i]  
+  ned_current_uri <- paste0(ned_uri, "current/")
+  ned_current_uri
+  
+  output_dir <- file.path(DEM_VRT_DIR,  NED_META$id[i])
+  output_dir
+  current_output_dir <- file.path(DEM_VRT_DIR,  NED_META$id[i], "current")
+  
+  # create specific VRT directory if it doesnt already exist
+  create_if_not_exists(output_dir)
+  create_if_not_exists(current_output_dir)
+  
+  message("[", i, "] - ", "Syncing\n '", ned_current_uri, "' > '", current_output_dir, "'")
+  
+  s3_sync_cmd <- paste0("aws s3 sync ", ned_current_uri, " ", current_output_dir, " --profile ", AWS_PROFILE, " --no-sign-request")
+  # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request")
+  
+  
+  tryCatch({
+    
+    # system(s3_sync_cmd)
+    
+  }, error = function(e) {
+    
+    message("Error syncing ", ned_uri, " to local directory...")
+    message(e)
+    
+  })
+  
+}
 
-# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+# vrt_file <- file.path(DEM_VRT_DIR, "USGS_Seamless_DEM_1.vrt")
+# 
+# # TODO: use AWS S3 sync command like below
+# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request")
+# # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors")
+# 
+# system(s3_sync_cmd)
 
-# Parse the VRT file
-vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE)
+# ------------------------------------
+# ---- Build VRTs ----
+# ------------------------------------
 
-vrt_tiles <- terra::vrt_tiles(vrt_file)
+message("Building 3DEP elevation '.vrt' files...")
 
-# tile_path <- vrt_tiles[500]
-tile_paths <- gsub("/vsicurl/", "",  vrt_tiles)
+for (i in seq_along(nrow(NED_META))) {
+  
+  i = 1
+  NED_META$id[i]
+  
+  output_dir <- file.path(DEM_VRT_DIR,  NED_META$id[i])
+  
+  # output_dir 
+  
+  
+  # Define output text file path
+  txt_file  <- file.path(output_dir, paste0("ned_list_", NED_META$id[i], ".txt"))
+  
+  # Define output VRT path
+  vrt_file  <- file.path(output_dir, paste0("ned_", NED_META$id[i], ".vrt"))
+  # vrt_file <- paste0(paste0("data-raw/ned_", NED_META$id[i], ".vrt"))
+  
+  # If VRT does NOT exist, build VRT
+  if (!file.exists(vrt_file)) {
+    
+    # txt_file
+  
+    current_dir <- file.path(output_dir, "current")
+    
+    # all the current/ directories that have the TIFs
+    tile_dirs   <- list.files(current_dir, full.names = TRUE)
+    # list.files(tile_dirs[1:2],  full.names = T)
+    # index_gpkg <- sf::read_sf(list.files(tile_dirs[999], full.names = T, pattern = ".gpkg"))
+    
+    # get list of all the .tif files in the "current/" dir
+    tif_paths   <- list.files(tile_dirs,  pattern = ".tif", full.names = T)
+    
+    # write list of files to text file
+    write.table(tif_paths, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
+    
+    # build VRT from text file input using GDAL system call ...
+    system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file))
+    
+  }
+  
+}
 
-# TODO: use AWS S3 sync command like below
-s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_TIF_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors")
 
-# "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
-
-# TODO: Old method curl request each file individually.... super slow
-# error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR)
\ No newline at end of file
+# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt")
+# length(list.files(file.path(DEM_TIF_DIR, "current"), full.names = T))
+# vrt_tiles <- terra::vrt_tiles(file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt"))
+# vrt_tiles[1]
+# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+# 
+# # Parse the VRT file
+# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE)
+# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE)
+# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt")
+# vrt_tiles <- terra::vrt_tiles(vrt_file)
+# 
+# # tile_path <- vrt_tiles[500]
+# tile_paths <- gsub("/vsicurl/", "",  vrt_tiles)
+# 
+# # TODO: use AWS S3 sync command like below
+# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors")
+# # aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors
+# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+# 
+# # TODO: Old method curl request each file individually.... super slow
+# # error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR)
\ No newline at end of file
diff --git a/runners/cs_runner2/download_fema100.R b/runners/cs_runner2/download_fema.R
similarity index 100%
rename from runners/cs_runner2/download_fema100.R
rename to runners/cs_runner2/download_fema.R
diff --git a/runners/cs_runner2/download_ml_outputs.R b/runners/cs_runner2/download_ml_outputs.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/finalize_outputs.R b/runners/cs_runner2/finalize_outputs.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/partition_fema_by_vpu.R b/runners/cs_runner2/process_fema.R
similarity index 79%
rename from runners/cs_runner2/partition_fema_by_vpu.R
rename to runners/cs_runner2/process_fema.R
index 2292588..522d427 100644
--- a/runners/cs_runner2/partition_fema_by_vpu.R
+++ b/runners/cs_runner2/process_fema.R
@@ -169,10 +169,6 @@ for (file in FEMA_FILENAMES) {
 # -------------------------------------------------------------------------------------------------------------------
 # ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
 # -------------------------------------------------------------------------------------------------------------------
-source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false"
-source sh/create_tfstate_bucket.sh "mros-webapp-tfstate-bucket" 645515465214 "angus-lynker" us-west-1 "false"
-
-
 
 # # paths to FEMA 100 year flood plain files
 # FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
@@ -267,8 +263,8 @@ CONUS_VPU_IDS <-
 
 
 for (file_path in FEMA_CLEAN_GPKG_PATHS) {
-  
-  # file_path = FEMA_CLEAN_GPKG_PATHS[25]
+   # i = 35
+  # file_path = FEMA_CLEAN_GPKG_PATHS[i]
 
   fema_file <- basename(file_path)
   
@@ -278,8 +274,8 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   fema <- sf::read_sf(file_path)
   
   for (vpu in CONUS_VPU_IDS) {
-
-    # vpu = CONUS_VPU_IDS[12]
+    # j = 8 
+    # vpu = CONUS_VPU_IDS[j]
     
     # nextgen_basename <- basename(nextgen_path)
     # vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
@@ -320,8 +316,10 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
       
       # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
       
+      # BASE_DIRS_LIST$fema_by_vpu_subsets_dirs
+      
       fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
-      fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
+      fema_vpu_path     <- paste0(vpu_subfolder_path, "/subsets/", fema_vpu_filename)
       
       
       if (OVERWRITE_FEMA_FILES) {
@@ -337,60 +335,6 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
     }
     message()
   } 
-  # for (nextgen_path in NEXTGEN_FILE_PATHS) {
-  #   nextgen_basename <- basename(nextgen_path)
-  #   vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
-    
-  #   message("VPU: ", vpu)   
-  #   message("- nextgen gpkg:\n > '", nextgen_path, "'")   
-  #   message(" > Checking if '", fema_file, "' intersects with '", nextgen_basename, "'")
-    
-  #   # read in nextgen flowlines 
-  #   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-    
-  #   # get the FEMA polygons that intersect with the nextgen flowlines
-  #   fema_intersect <- polygons_with_line_intersects(fema, flines)
-    
-  #   fema_in_nextgen <-  nrow(fema_intersect) != 0
-    
-  #   message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
-    
-  #   if(fema_in_nextgen) {
-      
-  #     # create filepaths
-  #     vpu_subfolder      <- paste0("VPU_", vpu)
-  #     # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
-  #     vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
-      
-  #     # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
-      
-  #     fema_intersect <-
-  #       fema_intersect %>%
-  #       dplyr::mutate(
-  #         vpu = vpu
-  #       ) %>%
-  #       dplyr::select(vpu, fema_id, source, state, geom)
-      
-  #     # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
-      
-  #     fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
-  #     fema_vpu_path     <- paste0(vpu_subfolder_path, "/", fema_vpu_filename)
-      
-      
-  #     if (OVERWRITE_FEMA_FILES) {
-  #       message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
-        
-  #       sf::write_sf(
-  #         fema_intersect,
-  #         fema_vpu_path
-  #       )
-  #     }
-      
-      
-  #   }
-  #   message()
-  # }
-  
   
   message(
     "--------------------------------------------------------------\n", 
@@ -400,23 +344,50 @@ for (file_path in FEMA_CLEAN_GPKG_PATHS) {
   
 }
 
+# for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) {
+#   vpu_subdir <- BASE_DIRS_LIST$fema_by_vpu_subdirs[i]
+#   
+#   message(i, " - ", vpu_subdir)
+#   
+#   files_to_move = list.files(vpu_subdir, pattern = ".gpkg", full.names = T)
+#   
+#   for (file in files_to_move) {
+#     new_path <- paste0(vpu_subdir, "/subsets/",     basename(file))
+#     message(" > ", file, "\n > ", new_path, "\n")
+#     fs::file_move(
+#       file, 
+#       new_path
+#     )
+#   }
+# }
+
 # -------------------------------------------------------------------------------------
 # ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
 # -------------------------------------------------------------------------------------
+# BASE_DIRS_LIST$fema_by_vpu_subdirs
+# BASE_DIRS_LIST$fema_by_vpu_subsets_dirs
+# for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+BASE_DIRS_LIST$fema_by_vpu_subdirs
 
-for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) {
   # for (i in 1:4) {
-  # i = 1
-  # vpu_dir = FEMA_VPU_SUBFOLDERS2[i]
+  # i = 8
+  
+  vpu_dir <-  BASE_DIRS_LIST$fema_by_vpu_subdirs[i]
+  # vpu_dir <-  FEMA_VPU_SUBFOLDERS[i]
+  
+  vpu_subset_dir <- BASE_DIRS_LIST$fema_by_vpu_subsets_dirs[i]
+  
   message("Merging files in '", basename(vpu_dir), "' directory...")
-  # }
+  vpu_subdirs    <- list.files(vpu_subset_dir, full.names = TRUE)
+  # vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
   
-  vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
+  # message(paste0("\n > ", basename(vpu_subdirs), collapse = ""))
   
   # path to the merged directory where the final merged geopackge will end up
-  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_name       <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir)))
   master_gpkg_name  <- paste0(master_name, ".gpkg")
-  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
+  master_filepath   <- paste0(vpu_dir, "/merged/", master_gpkg_name)
   
   # if the file already exists, remove it so we dont OVER append data to the "master file"
   if (file.exists(master_filepath)) {
@@ -424,11 +395,11 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
   }
   
   # fema state geopackages partioned for the specific VPU
-  fema_state_gpkgs        <- list.files(vpu_dir, full.names = TRUE)
-  master_output_filepath  <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name))
-  
-  # make sure to ignore the master file if it already exists
-  fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath]
+  fema_state_gpkgs        <- list.files(vpu_subset_dir, full.names = TRUE)
+  # master_output_filepath  <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name))
+  # 
+  # # make sure to ignore the master file if it already exists
+  # fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath]
   
   for(gpkg_file in fema_state_gpkgs) {
     # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '",
@@ -443,29 +414,29 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
                                     " ", gpkg_file
     )
 
-    if (OVERWRITE_FEMA_FILES) {
+    # if (OVERWRITE_FEMA_FILES) {
       system(ogr2ogr_merge_command)
-    }
+    # }
+      
   }
   
-  has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0
-  
-  if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) {
-    message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...")
-    # message("- Deleting individual gpkgs from 'states' directory:\n > '", states_dir, "'")
-    
-    remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
-    
-    for (remove_cmd in remove_gpkg_cmds) {
-      message("  >  '", remove_cmd, "'")
-      system(remove_cmd)
-    }
-  }
+  # has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0
+  # 
+  # if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) {
+  #   message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...")
+  #   remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
+  #   
+  #   for (remove_cmd in remove_gpkg_cmds) {
+  #     message("  >  '", remove_cmd, "'")
+  #     system(remove_cmd)
+  #   }
+  # }
   
   # message()
   message("Merge complete!")
   message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'")
   message()
+  
 }
 # -------------------------------------------------------------------------------------
 # ----Apply simplify, dissolve, explode on the MERGED polygons  ---- 
@@ -475,9 +446,13 @@ for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
 # for (i in list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]) {
 #   file.remove(i)
 # }
+# seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)
+# rm(STAGING_FILES_TO_DELETE, i, VPU, vpu_dir)
 
 for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+
   # i = 8
+  # FEMA_VPU_SUBFOLDERS
   STAGING_FILES_TO_DELETE <- c()
   
   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
@@ -486,15 +461,23 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
   
   # path to the merged directory where the final merged geopackage will end up
-  master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_name       <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir)))
   master_gpkg_name  <- paste0(master_name, ".gpkg")
-  master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
+  master_filepath   <- paste0(vpu_dir, "/merged/", master_gpkg_name)
   
   master_geojson_name       <- paste0(master_name, ".geojson")
-  master_geojson_filepath   <- paste0(vpu_dir, "/", master_geojson_name)
+  master_geojson_filepath   <- paste0(vpu_dir, "/merged/", master_geojson_name)
+  
+  # updated_gpkg_name  <- gsub(".gpkg", "-output.gpkg", master_gpkg_name)
+  updated_gpkg_name  <- master_gpkg_name
+  updated_filepath   <- paste0(vpu_dir, "/output/", master_gpkg_name)
+  
+  updated_gpkg_exists  <- file.exists(updated_filepath)
   
-  updated_gpkg_name  <- gsub(".gpkg", "_output.gpkg", master_gpkg_name)
-  updated_filepath   <- paste0(vpu_dir, "/", updated_gpkg_name)
+  # remove output file if it already exists
+  if (updated_gpkg_exists) {
+    file.remove(updated_filepath)
+  }
   
   message("VPU Merged FEMA filename: '", master_gpkg_name, "'")
   message("> Simplifying, dissolve, exploding VPU aggregated FEMA polygons... '", basename(master_filepath), "'")
@@ -516,12 +499,12 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   # gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath)
   # file.remove(master_geojson_filepath)
   
-  if (OVERWRITE_FEMA_FILES || !geojson_exists) {
-    system(gpkg_to_geojson_cmd)
-    message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
-    
-    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath)
-  }
+  # if (OVERWRITE_FEMA_FILES || !geojson_exists) {
+  system(gpkg_to_geojson_cmd)
+  message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
+  
+  STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath)
+  # }
   
   # master_gj <- sf::read_sf(master_geojson_filepath)
   # master_gpkg <- sf::read_sf(master_filepath)
@@ -529,13 +512,15 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   # Clean GeoJSON
   message("Simplify, dissolve, explode > '", master_geojson_name, "'")
   output_clean_filename      <- gsub(".geojson", "_clean.geojson", master_geojson_name)
-  output_clean_geojson_path  <- paste0(vpu_dir, "/", output_clean_filename)
+  output_clean_geojson_path  <- paste0(vpu_dir, "/merged/", output_clean_filename)
   
   clean_geojson_exists  <- file.exists(output_clean_geojson_path)
   message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
-  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
 
-  # file.remove(output_clean_geojson_path)
+  if (clean_geojson_exists) {
+    file.remove(output_clean_geojson_path)
+  }
   
   mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ',
                              master_geojson_filepath, 
@@ -554,33 +539,36 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   # message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
   
   STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
-  output_clean_gpkg_filename   <- gsub(".geojson", ".gpkg", master_geojson_name)
-  output_clean_gpkg_path       <- paste0(vpu_dir, "/", output_clean_gpkg_filename)
+  # output_clean_gpkg_filename   <- gsub(".geojson", ".gpkg", master_geojson_name)
+  # output_clean_gpkg_path       <- paste0(vpu_dir, "/merged/", output_clean_gpkg_filename)
   
   # fema_vpu <- sf::read_sf(master_filepath)
   # geojson_to_gpkg_cmd <- paste0("ogr2ogr -f GPKG ", updated_filepath, " ", output_clean_geojson_path)
-  geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070  ", updated_filepath, " ", output_clean_geojson_path)
+  geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070  ", 
+                                updated_filepath, " ", 
+                                output_clean_geojson_path)
   # geojson_to_gpkg_cmd <- paste0("ogr2ogr ", updated_filepath, " ", output_clean_geojson_path)
 
-  updated_gpkg_exists  <- file.exists(updated_filepath)
-  # updated_gpkg_exists
-  # file.remove(updated_filepath)
+  # updated_gpkg_exists  <- file.exists(updated_filepath)
+  # if (updated_gpkg_exists) {
+  #   file.remove(updated_filepath)
+  # }
   
-  if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) {
-    system(geojson_to_gpkg_cmd)
-    message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'")
-  }
+  
+  # if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) {
+  system(geojson_to_gpkg_cmd)
+  message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'")
+  # }
 
   # sf::st_layers(updated_filepath)
   
   # mapview::npts(fema)
-  
   fema  <- 
     sf::read_sf(updated_filepath) %>%
     # sf::read_sf(output_clean_geojson_path) %>%
     # rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% 
     dplyr::mutate(
-      vpu      = gsub("VPU_", "", VPU),
+      vpu      = gsub("vpu-", "", VPU),
       fema_id = as.character(1:dplyr::n())
     ) %>% 
     dplyr::select(
@@ -595,6 +583,7 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
   #   dplyr::mutate(pts = mapview::npts(geom)) %>% 
   #   dplyr::arrange(-pts) 
   
+  # remove before writting updated version
   file.remove(updated_filepath)
   
   sf::write_sf(
@@ -618,47 +607,14 @@ for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
 # ---- Store all FEMA layers in a single conus_fema.gpkg 
 # -------------------------------------------------------------------------------------
 
-fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))]
+FEMA_VPU_SUBFOLDERS
 
-combine_gpkg_files(fema_vpu_layers, '/Volumes/T7SSD/lynker-spatial/cs-extension-polygons/conus_fema.gpkg')
-  
+all_fema_vpu_layers <- list.files(BASE_DIRS_LIST$fema_by_vpu_output_dirs, full.names = TRUE)
 
-combine_gpkg_files <- function(gpkg_paths, output_gpkg) {
-  
-  layer_counter <- list()
-    
-  for (gpkg_path in gpkg_paths) {
-    
-    base_name <- tools::file_path_sans_ext(basename(gpkg_path))
-    # base_name <- gsub("_output.gpkg", "", basename(gpkg_path))
-    
-    if (base_name %in% names(layer_counter)) {
-      layer_counter[[base_name]] <- layer_counter[[base_name]] + 1
-      layer_name <- paste0(base_name, "_", layer_counter[[base_name]])
-    } else {
-      
-      layer_counter[[base_name]] <- 1
-      layer_name <- base_name
-    }
-    
-    tryCatch({
-      sf_layer <- st_read(gpkg_path, quiet = TRUE)
-      
-      sf::st_write(sf_layer, 
-               dsn = output_gpkg, 
-               layer = layer_name, 
-               append = TRUE,
-               quiet = TRUE)
-      
-      message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'")
-      
-    }, error = function(e) {
-      warning("Error processing: ",  basename(gpkg_path))
-      warning(e)
-    })
-  }
-}
+# fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))]
 
+combine_gpkg_files(all_fema_vpu_layers, CONUS_FEMA_GPKG_PATH)
+  
 
 # # -------------------------------------------------------------------------------------
 # # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R
index 6feff0b..cb624ae 100644
--- a/runners/cs_runner2/utils.R
+++ b/runners/cs_runner2/utils.R
@@ -9,7 +9,7 @@
 #         ├── tif/
 #     ├── cs-extension-polygons/
 create_local_hydrofabric_base_dirs <- function(base_dir) {
-  
+  # base_dir <- BASE_DIR
   
   # build paths
   hydrofabric_dir <- paste0(base_dir, "/hydrofabric")
@@ -17,7 +17,7 @@ create_local_hydrofabric_base_dirs <- function(base_dir) {
   # DEM dirs
   dem_dir                    <- file.path(base_dir, "dem")
   dem_vrt_dir                <- file.path(dem_dir, "vrt")
-  dem_tif_dir                <- file.path(dem_dir, "tif")
+  # dem_tif_dir                <- file.path(dem_dir, "tif")
   
   # polygons for transect extensions
   cs_extension_polygons_dir     <- file.path(base_dir, "cs-extension-polygons")
@@ -42,7 +42,7 @@ create_local_hydrofabric_base_dirs <- function(base_dir) {
   # DEM dirs
   create_if_not_exists(dem_dir)
   create_if_not_exists(dem_vrt_dir)
-  create_if_not_exists(dem_tif_dir)
+  # create_if_not_exists(dem_tif_dir)
   
   # extension polygons
   create_if_not_exists(cs_extension_polygons_dir)
@@ -58,6 +58,9 @@ create_local_hydrofabric_base_dirs <- function(base_dir) {
   
   for (path in fema_by_vpu_subdirs) {
     create_if_not_exists(path)
+    create_if_not_exists(file.path(path, "subsets"))
+    create_if_not_exists(file.path(path, "merged"))
+    create_if_not_exists(file.path(path, "output"))
   }
   
 }
@@ -178,7 +181,7 @@ get_base_dir_paths <- function(base_dir) {
   
   dem_dir          <- file.path(base_dir, "dem")
   dem_vrt_dir      <- file.path(base_dir, "dem", "vrt")
-  dem_tif_dir      <- file.path(base_dir, "dem", "tif")
+  # dem_tif_dir      <- file.path(base_dir, "dem", "tif")
   
   cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons")
   
@@ -196,12 +199,16 @@ get_base_dir_paths <- function(base_dir) {
   fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
   fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
   
+  fema_by_vpu_subsets_dirs <- file.path(fema_by_vpu_subdirs, "subsets")
+  fema_by_vpu_merged_dirs  <- file.path(fema_by_vpu_subdirs, "merged")
+  fema_by_vpu_output_dirs  <- file.path(fema_by_vpu_subdirs, "output")
+  
   return(
       list(
       hydrofabric_dir = hydrofabric_dir,
       dem_dir = dem_dir,
       dem_vrt_dir = dem_vrt_dir, 
-      dem_tif_dir = dem_tif_dir,
+      # dem_tif_dir = dem_tif_dir,
       cs_extension_polygons_dir = cs_extension_polygons_dir,
       fema_dir = fema_dir,
       fema_fgb_dir = fema_fgb_dir,
@@ -209,7 +216,10 @@ get_base_dir_paths <- function(base_dir) {
       fema_clean_dir = fema_clean_dir,
       fema_gpkg_dir = fema_gpkg_dir,
       fema_by_vpu_dir = fema_by_vpu_dir,
-      fema_by_vpu_subdirs = fema_by_vpu_subdirs
+      fema_by_vpu_subdirs = fema_by_vpu_subdirs,
+      fema_by_vpu_subsets_dirs = fema_by_vpu_subsets_dirs,
+      fema_by_vpu_merged_dirs = fema_by_vpu_merged_dirs,
+      fema_by_vpu_output_dirs = fema_by_vpu_output_dirs
     )
   )
 }
@@ -259,6 +269,42 @@ get_version_base_dir_paths <- function(base_dir, version) {
   )
 }
 
+combine_gpkg_files <- function(gpkg_paths, output_gpkg) {
+  
+  layer_counter <- list()
+  
+  for (gpkg_path in gpkg_paths) {
+    
+    base_name <- tools::file_path_sans_ext(basename(gpkg_path))
+    # base_name <- gsub("_output.gpkg", "", basename(gpkg_path))
+    
+    if (base_name %in% names(layer_counter)) {
+      layer_counter[[base_name]] <- layer_counter[[base_name]] + 1
+      layer_name <- paste0(base_name, "_", layer_counter[[base_name]])
+    } else {
+      
+      layer_counter[[base_name]] <- 1
+      layer_name <- base_name
+    }
+    
+    tryCatch({
+      sf_layer <- st_read(gpkg_path, quiet = TRUE)
+      
+      sf::st_write(sf_layer, 
+                   dsn = output_gpkg, 
+                   layer = layer_name, 
+                   append = TRUE,
+                   quiet = TRUE)
+      
+      message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'")
+      
+    }, error = function(e) {
+      warning("Error processing: ",  basename(gpkg_path))
+      warning(e)
+    })
+  }
+}
+
 list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
   
   profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""

From 83bb22c12b89ce60d17827ab26298094635db2f4 Mon Sep 17 00:00:00 2001
From: anguswg-ucsb <anguswatters@gmail.com>
Date: Tue, 19 Nov 2024 16:27:33 -0800
Subject: [PATCH 64/64] finished 01_transects script and adding extensions
 script and more util function helpers for managing filepaths/names

---
 runners/cs_runner/utils.R                     |  38 +++-
 runners/cs_runner2/00_driver.R                |   5 +-
 runners/cs_runner2/01_transects.R             | 190 ++++++++++++++++++
 .../cs_runner2/02_extend_transects_by_fema.R  | 118 +++++++++++
 runners/cs_runner2/base_variables.R           |   7 +
 runners/cs_runner2/download_ml_outputs.R      |  28 +++
 runners/cs_runner2/process_fema.R             |  17 +-
 runners/cs_runner2/utils.R                    |  94 ++++++++-
 8 files changed, 474 insertions(+), 23 deletions(-)

diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
index ef43e18..47423c1 100644
--- a/runners/cs_runner/utils.R
+++ b/runners/cs_runner/utils.R
@@ -121,7 +121,11 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   network_dir                <- paste0(version_base_dir, "/network")
   
   # transects
-  transects_dir              <- paste0(version_base_dir, "/transects")
+  transects_dir                  <- paste0(version_base_dir, "/transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
   
   # cross sections dirs
   cross_sections_dir                 <- paste0(version_base_dir, "/cross-sections")
@@ -129,7 +133,9 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   cross_sections_ml_dir              <- paste0(cross_sections_dir, "/dem-ml")
   cross_sections_coastal_bathy_dir   <- paste0(cross_sections_dir, "/dem-coastal-bathy")
   cross_sections_dem_pts_dir         <- paste0(cross_sections_dir, "/dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
   
+ 
   if(with_output) {
     output_dir       <- paste0(version_base_dir, "/outputs")
   }
@@ -148,13 +154,19 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   
   # transects
   create_if_not_exists(transects_dir)
-  
+  create_if_not_exists(transects_base_dir)
+  create_if_not_exists(transects_fema_extended_dir)
+  create_if_not_exists(transects_cs_extended_dir)
+  create_if_not_exists(transects_output_dir)
+    
   # CS pts
   create_if_not_exists(cross_sections_dir)
   create_if_not_exists(cross_sections_dem_dir)
   create_if_not_exists(cross_sections_ml_dir)
   create_if_not_exists(cross_sections_coastal_bathy_dir)
   create_if_not_exists(cross_sections_dem_pts_dir)
+  create_if_not_exists(cross_sections_output_dir)
+  
   
   if(with_output) {
     create_if_not_exists(output_dir)
@@ -233,28 +245,44 @@ get_version_base_dir_paths <- function(base_dir, version) {
   network_dir                <- file.path(version_base_dir, "network")
   
   # transects
-  transects_dir              <- file.path(version_base_dir, "transects")
-  
+  transects_dir                  <- file.path(version_base_dir, "transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
+   
   # cross sections dirs
   cross_sections_dir                 <- file.path(version_base_dir, "cross-sections")
   cross_sections_dem_dir             <- file.path(cross_sections_dir, "dem")
   cross_sections_ml_dir              <- file.path(cross_sections_dir, "dem-ml")
   cross_sections_coastal_bathy_dir   <- file.path(cross_sections_dir, "dem-coastal-bathy")
   cross_sections_dem_pts_dir         <- file.path(cross_sections_dir, "dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
   
   return(
     list(
       hydrofabric_dir    = hydrofabric_dir,
       version_base_dir   = version_base_dir,
+      
       ref_features_dir   = ref_features_dir, 
+      
       network_dir        = network_dir,
+      
       ml_dir = ml_dir,
+      
       transects_dir      = transects_dir,
+      transects_base_dir = transects_base_dir,
+      transects_fema_extended_dir = transects_fema_extended_dir,
+      transects_cs_extended_dir = transects_cs_extended_dir,
+      transects_output_dir = transects_output_dir,
+      
       cross_sections_dir = cross_sections_dir,
       cross_sections_dem_dir     = cross_sections_dem_dir,
       cross_sections_dem_pts_dir = cross_sections_dem_pts_dir,
       cross_sections_ml_dir      = cross_sections_ml_dir,
-      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir
+      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir,
+      cross_sections_output_dir = cross_sections_output_dir
+      
     )
   )
 }
diff --git a/runners/cs_runner2/00_driver.R b/runners/cs_runner2/00_driver.R
index cb98b6e..9c6eaed 100644
--- a/runners/cs_runner2/00_driver.R
+++ b/runners/cs_runner2/00_driver.R
@@ -6,15 +6,16 @@ source("runners/cs_runner2/config_env.R")
 # downloads datasets
 # - Nextgen data
 # - Reference features (for waterbody filtering)
-# - ML outputs
 # - FEMA 100 year floodplain polygons (FGBs)
+#   - Process state level FEMA polygons to CONUS 
+# - ML outputs
 # - 3DEP DEM VRT
 source("runners/cs_runner2/download_conus_nextgen.R")
 source("runners/cs_runner2/download_conus_ref_features.R")
 source("runners/cs_runner2/download_fema.r")
 source("runners/cs_runner2/process_fema.R")
-source("runners/cs_runner2/download_dem_from_vrt.R")
 source("runners/cs_runner2/download_ml_outputs.R")
+source("runners/cs_runner2/download_dem_from_vrt.R")
 
 # generate and upload transects datasets 
 source("runners/cs_runner2/01_transects.R")
diff --git a/runners/cs_runner2/01_transects.R b/runners/cs_runner2/01_transects.R
index e69de29..e11f46c 100644
--- a/runners/cs_runner2/01_transects.R
+++ b/runners/cs_runner2/01_transects.R
@@ -0,0 +1,190 @@
+
+library(sf)
+library(dplyr)
+
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner2/utils.R")
+
+VPU_IDS <- get_vpu_ids()
+
+for (i in seq_along(VPU_IDS)) {
+# for (i in 21:length(VPU_IDS)) {
+  
+  # --------------------------------------------------------------
+  # ---- Setup variables / paths ----
+  # --------------------------------------------------------------
+  # VERSION_DIRS_LIST
+  vpu                       <- VPU_IDS[i]
+  CROSSWALK_ID              <- "id"
+  
+  transect_filenames        <- get_transect_filenames(vpu, sep = "-")
+  base_output_path          <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path)
+  base_transect_file_exists <- file.exists(base_output_path)
+  
+  do_process_transects      <- !base_transect_file_exists || REGENERATE_TRANSECTS
+  # do_not_reprocess_transects <- base_transect_file_exists && !REGENERATE_TRANSECTS
+  
+  if (do_process_transects) {
+    message("Creating VPU ", vpu, " transects:", 
+            "\n - flowpaths: '",
+            basename(CONUS_NEXTGEN_GPKG_PATH), "'"
+    )
+  } else {
+    message(
+      "VPU ", vpu, " transects file already exists at:\n - '", base_output_path, "'", 
+      "\n\n   >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", 
+      basename(base_output_path), "' file"
+    )
+    next
+    
+  }
+  
+  # if we are going to create new transects and the transect file already exists, delete it as to not cause confusion or accidently append to an old dataset
+  if (do_process_transects && base_transect_file_exists) {
+    message("Deleting old transect file:\n > '", base_output_path, "'")
+    file.remove(base_output_path)
+  }
+  
+  # --------------------------------------------------------------
+  # ---- Load flowpaths for VPU from conus_nextgen.gpkg ----
+  # --------------------------------------------------------------
+  
+  # read in nextgen flowlines 
+  flines <- 
+    CONUS_NEXTGEN_GPKG_PATH  %>% 
+    sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'"))
+  
+  has_no_flowlines <- nrow(flines) == 0 
+  
+  if (has_no_flowlines) {
+      message("Skipping VPU ", vpu, " as no flowlines were found in '", basename(CONUS_NEXTGEN_GPKG_PATH), "'..." )
+      next
+    }
+    
+  
+  
+  # Add an estimate bankful width based on Total downsteam drainage area (sqkm, Power law equation) 
+  flines <-
+    flines %>% 
+    hydroloom::rename_geometry("geometry") %>% 
+    hydrofabric3D::add_powerlaw_bankful_width(
+      total_drainage_area_sqkm_col = "tot_drainage_areasqkm", 
+      min_bf_width = 50
+    ) %>% 
+    dplyr::group_by(order) %>% 
+      dplyr::slice(1:10) %>% 
+      dplyr::ungroup() %>% 
+      dplyr::select(
+        id,
+        lengthkm,
+        tot_drainage_areasqkm,
+        bf_width,
+        mainstem,
+        geometry
+      ) 
+    # dplyr::group_by(order)
+  
+  # --------------------------------------------------------------
+  # ---- Generate transects ---- 
+  # --------------------------------------------------------------
+  # flines$geometry %>% sf::st_is_empty() %>% any()
+  
+  # sf::write_sf(
+  #   flines,
+  #   "/Users/anguswatters/Desktop/wrong_cs_ids_flines_error.gpkg"
+  #   # "/Users/anguswatters/Desktop/empty_geom_flines_error.gpkg"
+  # )
+  
+  # create transect lines
+  transects <- hydrofabric3D::cut_cross_sections(
+    net               = flines,                        # flowlines network
+    crosswalk_id      = CROSSWALK_ID,                       # Unique feature ID
+    cs_widths         = flines$bf_width,     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths = 15,
+    num               = 3,                            # number of cross sections per "id" linestring ("hy_id")
+    # smooth            = FALSE,                          # smooth lines
+    # densify           = NULL,  
+    smooth            = TRUE,                          # smooth lines
+    densify           = 3,                             # densify linestring points
+    
+    rm_self_intersect = TRUE,                          # remove self intersecting transects
+    fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+    add               = TRUE                           # whether to add back the original data
+  )  
+  # dplyr::mutate(
+    # cs_source = CS_SOURCE
+  # )
+  # transects$cs_lengthm 
+  # transects$geometry %>% sf::st_length()
+  
+  # transects %>% 
+  #   dplyr::mutate(
+  #     new_cs_lengthm = as.numeric(sf::st_length(.))
+  #   ) %>% 
+  #   dplyr::filter(!dplyr::near(new_cs_lengthm, cs_lengthm, tol =   2))
+  # .Machine$double.eps^1
+  # crosswalk_id <- "id"
+  # # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered
+  # mismatches <-
+  #   transects %>%
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+  #   dplyr::arrange(cs_measure, .by_group = TRUE) %>% 
+  #   dplyr::mutate(
+  #     new_cs_id = 1:dplyr::n()
+  #   ) %>% 
+  #   dplyr::ungroup() %>% 
+  #   dplyr::filter(cs_id != new_cs_id)
+  # mismatches
+  # transects %>% 
+  #   dplyr::filter(id %in% c("wb-1023360", "wb-1023363"))
+  # 
+  # transects %>% 
+  #   dplyr::filter(id %in% c("wb-1023360", "wb-1023363")) %>% 
+  #   mapview::mapview()
+  # 
+  # # FALSE if there are any transects with different cs_ids to the newly created cs_id 
+  # # Otherwise TRUE
+  # has_valid_cs_ids <- !(nrow(mismatches) > 0)
+  # 
+  # # t2 <- transects %>% hydrofabric3D:::renumber_cs_ids("id")
+  # # hydrofabric3D::validate_transects(t2, "id")
+  # transects 
+  is_valid_transects                    <- hydrofabric3D::validate_transects(transects, "id")
+  is_valid_transects_against_flowlines  <- hydrofabric3D::validate_transects_against_flowlines(transects, flines, "id")
+  
+  # is_valid_transects                    <- hydrofabric3D::validate_transects(transects[c(1, 3, 55), ], "id")
+  # is_valid_transects_against_flowlines  <- hydrofabric3D::validate_transects_against_flowlines(transects, flines[c(-1), ], "id")
+  # is_valid_transects_against_flowlines
+  # hydrofabric3D:::rm_multiflowline_intersections(transects, flines[c(-1), ])
+  
+  # select relevent columns
+  transects <-
+    transects %>% 
+    dplyr::mutate(
+      cs_source = CS_SOURCE
+    ) %>% 
+    # hydrofabric3D:::select_transects(CROSSWALK_ID)
+    dplyr::select(
+      dplyr::any_of(CROSSWALK_ID), 
+      cs_id, 
+      cs_lengthm,
+      # cs_lengthm = new_cs_lengthm, 
+      cs_measure,
+      sinuosity,
+      cs_source,
+      geometry
+    )
+  
+  if (is_valid_transects && is_valid_transects_against_flowlines) {
+    message("Saving transects to:\n - filepath: '", base_output_path, "'")
+      sf::write_sf(
+        transects,
+        base_output_path
+      )
+  }
+  
+  message(" --- VPU ", vpu, " transects generation complete! --- ")
+  message()
+  
+}
diff --git a/runners/cs_runner2/02_extend_transects_by_fema.R b/runners/cs_runner2/02_extend_transects_by_fema.R
index e69de29..106dc44 100644
--- a/runners/cs_runner2/02_extend_transects_by_fema.R
+++ b/runners/cs_runner2/02_extend_transects_by_fema.R
@@ -0,0 +1,118 @@
+
+library(sf)
+library(dplyr)
+
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner2/utils.R")
+
+VPU_IDS <- get_vpu_ids()
+
+for (i in seq_along(VPU_IDS)) {
+  # for (i in 21:length(VPU_IDS)) {
+  
+  # --------------------------------------------------------------
+  # ---- Setup variables / paths ----
+  # --------------------------------------------------------------
+  # VERSION_DIRS_LIST
+  # i = 8
+  vpu                       <- VPU_IDS[i]
+  CROSSWALK_ID              <- "id"
+  # cs_filesnames <- get_cross_section_filenames(vpu, sep = "-")
+  transect_filenames           <- get_transect_filenames(vpu, sep = "-")
+ 
+  # TODO: Add a catch for not allowing this all to run if the base_transects do NOT exist 
+  base_transects_path          <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path)
+  fema_transects_output_path   <- paste0(VERSION_DIRS_LIST$transects_fema_extended_dir, "/", transect_filenames$transects_fema_extended_path)
+  
+  base_transect_file_exists    <- file.exists(base_transects_path)
+  fema_transects_file_exists   <- file.exists(fema_transects_output_path)
+  
+  do_process_transects      <- !fema_transects_file_exists || REGENERATE_TRANSECTS
+  
+  if (do_process_transects) {
+    message("Creating VPU ", vpu, " transects:", 
+            "\n - flowpaths: '",
+            basename(CONUS_NEXTGEN_GPKG_PATH), "'",
+            "\n - base transects: '",
+            basename(base_transects_path), "'"
+    )
+  } else {
+    message(
+      "VPU ", vpu, " transects file already exists at:\n - '", fema_transects_output_path, "'", 
+      "\n\n   >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", 
+      basename(fema_transects_output_path), "' file"
+    )
+    next
+    
+  }
+  
+  if (do_process_transects && fema_transects_file_exists) {
+    message("Deleting old fema extended transect file:\n > '", fema_transects_output_path, "'")
+    file.remove(fema_transects_output_path)
+  }
+  
+  # read in nextgen flowlines 
+  flines <- 
+    CONUS_NEXTGEN_GPKG_PATH  %>% 
+    sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) %>% 
+    hydroloom::rename_geometry("geometry")
+  
+  # transects
+  transects <- sf::read_sf(base_transects_path) %>% 
+    hydroloom::rename_geometry("geometry")
+  
+  # FEMA polygons
+  fema <- 
+    CONUS_FEMA_GPKG_PATH %>% 
+    sf::read_sf(layer = get_fema_conus_layer_name(vpu)) %>% 
+    rmapshaper::ms_simplify(keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) %>% 
+    hydroloom::rename_geometry("geometry")
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+  
+  # # add mainstem to  
+  # transects <- 
+  #   transects  %>%
+  #   dplyr::left_join(
+  #       flines %>% 
+  #         sf::st_drop_geometry() %>% 
+  #         dplyr::select(dplyr::any_of(CROSSWALK_ID), mainstem),
+  #     by = CROSSWALK_ID 
+  #   )
+  
+  message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+  
+  # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  extended_transects <- hydrofabric3D::extend_transects_to_polygons(
+    transect_lines         = transects, 
+    polygons               = fema, 
+    flowlines              = flines, 
+    crosswalk_id           = CROSSWALK_ID,
+    grouping_id            = CROSSWALK_ID, 
+    # grouping_id            = "mainstem", 
+    max_extension_distance = 3000,
+    reindex_cs_ids = TRUE
+  )
+  
+  is_valid_transects <- hydrofabric3D::validate_transects(extended_transects, "id")
+  is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(extended_transects, flines, "id")
+  
+  
+  
+  
+  
+}
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
\ No newline at end of file
diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R
index c716e29..7588bb9 100644
--- a/runners/cs_runner2/base_variables.R
+++ b/runners/cs_runner2/base_variables.R
@@ -27,6 +27,13 @@ VERSION_DIRS_LIST            <- get_version_base_dir_paths(BASE_DIR, VERSION)
 # string to fill in "CS_SOURCE" column in output datasets
 CS_SOURCE          <- "hydrofabric3D"
 
+# -------------------------------------------------------------------------------------
+# ---- Processing flags (cache data) ----
+# -------------------------------------------------------------------------------------
+
+# cache transects by setting REGENERATE_TRANSECTS to FALSE
+REGENERATE_TRANSECTS      <- TRUE
+
 # -------------------------------------------------------------------------------------
 # ---- S3 BUCKET NAMES ----
 # -------------------------------------------------------------------------------------
diff --git a/runners/cs_runner2/download_ml_outputs.R b/runners/cs_runner2/download_ml_outputs.R
index e69de29..4d14634 100644
--- a/runners/cs_runner2/download_ml_outputs.R
+++ b/runners/cs_runner2/download_ml_outputs.R
@@ -0,0 +1,28 @@
+# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
+
+# load config variables
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Download conus_nextgen.gpkg 
+# ---------------------------------------------------------------------------
+
+copy_cmd <- paste0('aws s3 cp ', CONUS_ML_S3_URI, " ", CONUS_ML_PARQUET_PATH)
+message("Copying S3 object:\n", CONUS_ML_S3_URI)
+
+if (!file.exists(CONUS_ML_PARQUET_PATH)) {
+  tryCatch({
+    system(copy_cmd)
+    message("Download '", basename(CONUS_ML_PARQUET_PATH), "' complete!")
+    message("------------------")
+  }, error = function(e) {
+    message("Error downloading conus_nextgen.gpkg")
+    message(e)
+    stop()
+  })
+  
+} else {
+  message("'", basename(CONUS_ML_PARQUET_PATH), "' file already exists at\n > '", CONUS_ML_PARQUET_PATH, "'")
+}
diff --git a/runners/cs_runner2/process_fema.R b/runners/cs_runner2/process_fema.R
index 522d427..50b2106 100644
--- a/runners/cs_runner2/process_fema.R
+++ b/runners/cs_runner2/process_fema.R
@@ -23,11 +23,9 @@ library(geos)
 library(fastmap)
 library(nngeo)
 
-# devtools::install_github("anguswg-ucsb/hydrofabric3D")
-
-# TODO: Steps that converts FGB to geojson and then geojson to gpkg can be put into a single loop
-# TODO: Delete old files as needed
-
+# ONLY runs if the main CONUS FEMA gpkg hasnt been created yet
+if (!file.exists(CONUS_FEMA_GPKG_PATH)) {
+  
 # -------------------------------------------------------------------------------------
 # ---- OVERWRITE_FEMA_FILES constant logical ----
 # ---- > if TRUE, processing steps will be run again 
@@ -616,6 +614,15 @@ all_fema_vpu_layers <- list.files(BASE_DIRS_LIST$fema_by_vpu_output_dirs, full.n
 combine_gpkg_files(all_fema_vpu_layers, CONUS_FEMA_GPKG_PATH)
   
 
+# -------------------------------------------------------------------------------------
+
+} else {
+  message("'", basename(CONUS_FEMA_GPKG_PATH), "'already exists at \n> ", CONUS_FEMA_GPKG_PATH)
+}
+
+# -------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+
 # # -------------------------------------------------------------------------------------
 # # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
 # # -------------------------------------------------------------------------------------
diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R
index cb624ae..5413d5c 100644
--- a/runners/cs_runner2/utils.R
+++ b/runners/cs_runner2/utils.R
@@ -98,14 +98,14 @@ get_vpu_ids <- function() {
 # base_dir/
 #   └── lynker-spatial/
 #     ├── hydrofabric/
-    #     ├── version_number/
-    #         ├── network/
-    #         ├── transects/
-    #         ├── cross-sections/
-          #         ├── dem/
-          #         ├── dem-ml/
-          #         ├── dem-coastal-bathy/
-          #         ├── dem-points/
+#     ├── version_number/
+#         ├── network/
+#         ├── transects/
+#         ├── cross-sections/
+#         ├── dem/
+#         ├── dem-ml/
+#         ├── dem-coastal-bathy/
+#         ├── dem-points/
 create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   # version = "v3.0"
   # base_dir <- BASE_DIR
@@ -124,7 +124,11 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   network_dir                <- paste0(version_base_dir, "/network")
   
   # transects
-  transects_dir              <- paste0(version_base_dir, "/transects")
+  transects_dir                  <- paste0(version_base_dir, "/transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
   
   # cross sections dirs
   cross_sections_dir                 <- paste0(version_base_dir, "/cross-sections")
@@ -132,6 +136,8 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   cross_sections_ml_dir              <- paste0(cross_sections_dir, "/dem-ml")
   cross_sections_coastal_bathy_dir   <- paste0(cross_sections_dir, "/dem-coastal-bathy")
   cross_sections_dem_pts_dir         <- paste0(cross_sections_dir, "/dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
+  
   
   if(with_output) {
     output_dir       <- paste0(version_base_dir, "/outputs")
@@ -151,6 +157,10 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   
   # transects
   create_if_not_exists(transects_dir)
+  create_if_not_exists(transects_base_dir)
+  create_if_not_exists(transects_fema_extended_dir)
+  create_if_not_exists(transects_cs_extended_dir)
+  create_if_not_exists(transects_output_dir)
   
   # CS pts
   create_if_not_exists(cross_sections_dir)
@@ -158,6 +168,8 @@ create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
   create_if_not_exists(cross_sections_ml_dir)
   create_if_not_exists(cross_sections_coastal_bathy_dir)
   create_if_not_exists(cross_sections_dem_pts_dir)
+  create_if_not_exists(cross_sections_output_dir)
+  
   
   if(with_output) {
     create_if_not_exists(output_dir)
@@ -243,7 +255,11 @@ get_version_base_dir_paths <- function(base_dir, version) {
   network_dir                <- file.path(version_base_dir, "network")
   
   # transects
-  transects_dir              <- file.path(version_base_dir, "transects")
+  transects_dir                  <- file.path(version_base_dir, "transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
   
   # cross sections dirs
   cross_sections_dir                 <- file.path(version_base_dir, "cross-sections")
@@ -251,24 +267,37 @@ get_version_base_dir_paths <- function(base_dir, version) {
   cross_sections_ml_dir              <- file.path(cross_sections_dir, "dem-ml")
   cross_sections_coastal_bathy_dir   <- file.path(cross_sections_dir, "dem-coastal-bathy")
   cross_sections_dem_pts_dir         <- file.path(cross_sections_dir, "dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
   
   return(
     list(
       hydrofabric_dir    = hydrofabric_dir,
       version_base_dir   = version_base_dir,
+      
       ref_features_dir   = ref_features_dir, 
+      
       network_dir        = network_dir,
+      
       ml_dir = ml_dir,
+      
       transects_dir      = transects_dir,
+      transects_base_dir = transects_base_dir,
+      transects_fema_extended_dir = transects_fema_extended_dir,
+      transects_cs_extended_dir = transects_cs_extended_dir,
+      transects_output_dir = transects_output_dir,
+      
       cross_sections_dir = cross_sections_dir,
       cross_sections_dem_dir     = cross_sections_dem_dir,
       cross_sections_dem_pts_dir = cross_sections_dem_pts_dir,
       cross_sections_ml_dir      = cross_sections_ml_dir,
-      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir
+      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir,
+      cross_sections_output_dir = cross_sections_output_dir
+      
     )
   )
 }
 
+
 combine_gpkg_files <- function(gpkg_paths, output_gpkg) {
   
   layer_counter <- list()
@@ -305,6 +334,49 @@ combine_gpkg_files <- function(gpkg_paths, output_gpkg) {
   }
 }
 
+get_transect_filenames <- function(vpu, sep = "-", ext = ".gpkg") {
+  # ext <- ".gpkg" 
+  # sep = "_"
+  transects_base_path          <- paste0(paste0(c(vpu, "transects", "base"), collapse = sep) , ext)
+  transects_fema_extended_path <- paste0(paste0(c(vpu, "transects", "extended", "by", "fema"), collapse = sep) , ext)
+  transects_cs_extended_path   <- paste0(paste0(c(vpu, "transects", "extended", "by", "cs", "attributes"), collapse = sep) , ext)
+  transects_output_path        <- paste0(paste0(c(vpu, "transects"), collapse = sep) , ext)
+  
+  return(
+    list(
+      transects_base_path = transects_base_path,
+      transects_fema_extended_path = transects_fema_extended_path,
+      transects_cs_extended_path = transects_cs_extended_path,
+      transects_output_path = transects_output_path
+    )
+  )
+}
+
+get_cross_section_filenames <- function(vpu, sep = "-", ext = ".parquet") {
+  # ext <- ".parquet" 
+  # sep = "_"
+  cs_pts_base_path           <- paste0(paste0(c(vpu, "cross", "sections", "base"), collapse = sep) , ext)
+  cs_pts_fema_extended_path  <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "fema"), collapse = sep) , ext)
+  cs_pts_cs_extended_path    <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "cs", "attributes"), collapse = sep) , ext)
+  cs_pts_output_path         <- paste0(paste0(c(vpu, "cross", "sections"), collapse = sep) , ext)
+  
+  return(
+    list(
+      cs_pts_base_path = cs_pts_base_path,
+      cs_pts_fema_extended_path = cs_pts_fema_extended_path,
+      cs_pts_cs_extended_path = cs_pts_cs_extended_path,
+      cs_pts_output_path = cs_pts_output_path
+    )
+  )
+}
+
+# derive the FEMA VPU layer name from the conus_fema.gpkg
+get_fema_conus_layer_name <- function(vpu) {
+  
+  return(paste0("fema-vpu-", vpu))
+  
+}
+
 list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
   
   profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""