diff --git a/.gitignore b/.gitignore
index 752c28f..f6b39ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,4 @@ vignettes/tutorial
 check
 runners/secret
 runners/data
-in-progress
+in-progress
\ No newline at end of file
diff --git a/runners/cs_runner/01_transects.R b/runners/cs_runner/01_transects.R
index 0880d33..ee609aa 100644
--- a/runners/cs_runner/01_transects.R
+++ b/runners/cs_runner/01_transects.R
@@ -1,104 +1,113 @@
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-# source("runners/cs_runner/config.R")
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
 
-# # load libraries
-# library(terrainSliceR)
+# # # # load libraries
+# library(hydrofabric3D)
 # library(dplyr)
 # library(sf)
+# install.packages("devtools")
 
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
-
-# transect bucket prefix
-transects_prefix <- paste0(s3_bucket, "v20/3D/transects/")
+# # transect bucket prefix
+# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
 
 # paths to nextgen datasets and model attribute parquet files
-nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
-model_attr_files <- list.files(model_attr_dir, full.names = FALSE)
-
-# string to fill in "cs_source" column in output datasets
-net_source <- "terrainSliceR"
+NEXTGEN_FILES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
+# model_attr_files <- list.files(MODEL_ATTR_DIR, full.names = FALSE)
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-                x    = nextgen_files,
-                y    = model_attr_files,
-                base = base_dir
+                x    = NEXTGEN_FILES,
+                y    = NEXTGEN_FILES,
+                base = BASE_DIR
                 )
 
 # loop over each VPU and generate cross sections, then save locally and upload to S3 bucket
 for(i in 1:nrow(path_df)) {
-
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
   
-  # model attributes file and full path
-  model_attr_file <- path_df$y[i]
-  model_attr_path <- paste0(model_attr_dir, model_attr_file)
+  vpu <- path_df$vpu[i]
 
-  message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  # Get FEMA by VPU directory and files for current VPU 
+  fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))])
+  # fema_vpu_dir <- paste0(FEMA_VPU_SUBFOLDERS[grepl(paste0("VPU_", vpu), basename(FEMA_VPU_SUBFOLDERS))], "/merged")
+
+  vpu_fema_files <- list.files(fema_vpu_dir, full.names = TRUE)
+  vpu_fema_file  <- vpu_fema_files[grepl(paste0(vpu, "_output.gpkg"), vpu_fema_files)]
+  
+  message("Creating VPU ", vpu, " transects:", 
+          "\n - flowpaths: '",
+          nextgen_file, "'",
+           "\n - FEMA polygons: '", 
+          basename(vpu_fema_file), "'"
+          )
+  
+  # message("Creating VPU ", path_df$vpu[i], " transects:\n - flowpaths: '", nextgen_file, "'\n - model attributes: '", model_attr_file, "'")
+  # sf::write_sf(
+  #   dplyr::slice(dplyr::filter(flines, order == 2), 2), 
+  #   "/Users/anguswatters/Desktop/example_flowline.gpkg"
+  #   )
   
   # read in nextgen data
   flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
-
-  #  model attributes
-  model_attrs <- arrow::read_parquet(model_attr_path)
-
-  # join flowlines with model atttributes
-  flines <- dplyr::left_join(
-    flines,
-    dplyr::select(
-      model_attrs,
-      id, eTW
-    ),
-    by = "id"
-  )
   
   # calculate bankfull width
   flines <-
     flines %>%
-    dplyr::mutate(
-      bf_width = 11 * eTW
-    ) %>%
-    dplyr::mutate( # if there are any NAs, use exp(0.700    + 0.365* log(tot_drainage_areasqkm)) equation to calculate bf_width
-      bf_width = dplyr::case_when(
-        is.na(bf_width) ~ exp(0.700    + 0.365* log(tot_drainage_areasqkm)),
-        TRUE            ~ bf_width
-      )
-    ) %>%
+    hydrofabric3D::add_powerlaw_bankful_width(
+      total_drainage_area_sqkm_col = "tot_drainage_areasqkm", 
+      min_bf_width = 50
+      ) %>% 
     dplyr::select(
       hy_id = id,
       lengthkm,
       tot_drainage_areasqkm,
       bf_width,
+      mainstem,
       geometry = geom
     )
-  
+    # dplyr::mutate(
+    #   bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+    # ) %>%
+    # dplyr::select(
+    #   hy_id = id,
+    #   lengthkm,
+    #   tot_drainage_areasqkm,
+    #   bf_width,
+    #   mainstem,
+    #   geometry = geom
+    # )
+
   # flines$bf_width <- ifelse(is.na(flines$bf_width),  exp(0.700    + 0.365* log(flines$tot_drainage_areasqkm)), flines$bf_width)
 
   time1 <- Sys.time()
- # system.time({
-    # create transect lines
-    transects <- terrainSliceR::cut_cross_sections(
-      net               = flines,                        # flowlines network
-      id                = "hy_id",                       # Unique feature ID
-      cs_widths         = pmax(50, flines$bf_width), # cross section width of each "id" linestring ("hy_id")
-      num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
-      smooth            = TRUE,                          # smooth lines
-      densify           = 3,                             # densify linestring points
-      rm_self_intersect = TRUE,                          # remove self intersecting transects
-      fix_braids        = FALSE,                         # whether to fix braided flowlines or not
-      #### Arguments used for when fix_braids = TRUE
-      # terminal_id       = NULL,
-      # braid_threshold   = NULL,
-      # version           = 2,
-      # braid_method      = "comid",
-      # precision         = 1,
-      add               = TRUE                           # whether to add back the original data
-    )
-  # })
-    
+
+  # create transect lines
+  transects <- hydrofabric3D::cut_cross_sections(
+    net               = flines,                        # flowlines network
+    crosswalk_id      = "hy_id",                       # Unique feature ID
+    cs_widths         = flines$bf_width,     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+    num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+    smooth            = TRUE,                          # smooth lines
+    densify           = 3,                             # densify linestring points
+    rm_self_intersect = TRUE,                          # remove self intersecting transects
+    fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+    #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+    # terminal_id       = NULL,
+    # braid_threshold   = NULL,
+    # version           = 2,
+    # braid_method      = "comid",
+    # precision         = 1,
+    add               = TRUE                           # whether to add back the original data
+  )
+  
+  gc()
+  
   time2 <- Sys.time()
   time_diff <- round(as.numeric(time2 - time1 ), 2)
   
@@ -106,43 +115,126 @@ for(i in 1:nrow(path_df)) {
   
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_transects.gpkg")
-  out_path <- paste0(transects_dir, out_file)
-  
-  message("Saving transects to:\n - filepath: '", out_path, "'")
+  out_path <- paste0(TRANSECTS_DIR, out_file)
   
-  # add cs_source column and keep just the desired columns to save and upload to S3
-  transects <-
+  # add cs_source column and rename cs_widths to cs_lengthm
+  transects <- 
     transects %>%
     dplyr::mutate(
-      cs_source = net_source
-    ) %>%
+      cs_source = CS_SOURCE
+    )
+  
+  # ---------------------------------------------------------------------
+  # --- Extend transects out to FEMA 100yr floodplains
+  # ---------------------------------------------------------------------
+  message("Reading in FEMA polygons...") 
+  
+  # fema polygons and transect lines
+  fema <- sf::read_sf(vpu_fema_file)
+  
+  message("Simplifying FEMA polygons...")
+  message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+  
+  # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+  # keep 1% of the original points for speed
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+  
+  message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+  message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+  
+  transects <- 
+    transects  %>%
+    dplyr::left_join(
+      dplyr::select(sf::st_drop_geometry(flines),
+                    hy_id,
+                    mainstem
+      ),
+      by = "hy_id" 
+    )
+  
+  # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  transects <- hydrofabric3D::extend_transects_to_polygons(
+    transect_lines         = transects, 
+    polygons               = fema, 
+    flowlines              = flines, 
+    crosswalk_id           = "hy_id",
+    grouping_id     = "mainstem", 
+    max_extension_distance = 3000 
+  )
+  
+  message("FEMA extensions complete! - ( ", Sys.time(), " )")
+  
+  transects  <- dplyr::select(transects, -tmp_id)
+  transects  <- hydrofabric3D::add_tmp_id(transects)
+  
+  transects <-
+    transects %>%  
+    # dplyr::select(-cs_lengthm) %>% 
+    # dplyr::mutate(is_fema_extended = left_is_extended | right_is_extended) %>% 
     dplyr::select(
-      hy_id,
+      hy_id, 
+      cs_id, 
+      cs_lengthm,
+      # cs_lengthm = new_cs_lengthm, 
       cs_source,
-      cs_id,
       cs_measure,
-      cs_lengthm = cs_widths,
       geometry
+      # is_extended,
+      # is_fema_extended,
+      # geometry = geom
     )
-
-  # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
+  
+  gc()
+   
+  # # ---------------------------------------------------------------------
+  message("Saving transects to:\n - filepath: '", out_path, "'")
+  
+  # save transects with only columns to be uploaded to S3 (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
   sf::write_sf(
-    transects,
+    # save dataset with only subset of columns to upload to S3
+    dplyr::select(transects, 
+                  hy_id,
+                  cs_source,
+                  cs_id,
+                  cs_measure,
+                  cs_lengthm,
+                  # sinuosity,
+                  geometry
+    ),
     out_path
-  )
+    )
   
   # command to copy transects geopackage to S3
-  if (!is.null(aws_profile)) {
-    copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file, 
-                             ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-  } else {
-    copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", transects_prefix, out_file)
-  }
+  copy_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_TRANSECTS_DIR, out_file, 
+                   ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))
+                   )
   
-
   message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
           copy_to_s3, 
           "'\n==========================")
   
   system(copy_to_s3, intern = TRUE)
+  
+  message("Overwritting local copy of transects to include 'is_extended' column...\n==========================")
+  
+  # Overwrite transects with additional columns for development purposes (is_extended) to have a local copy of dataset with information about extensions
+  sf::write_sf(
+    dplyr::select(
+      dplyr::mutate(transects, is_extended = FALSE),
+      hy_id,
+      cs_source,
+      cs_id,
+      cs_measure,
+      cs_lengthm,
+      # sinuosity,
+      is_extended,
+      geometry
+    ),
+    out_path
+  )
+  
+  rm(fema, transects, flines)
+  gc()
 }
diff --git a/runners/cs_runner/02_cs_pts.R b/runners/cs_runner/02_cs_pts.R
index da6e3e8..1614e10 100644
--- a/runners/cs_runner/02_cs_pts.R
+++ b/runners/cs_runner/02_cs_pts.R
@@ -1,137 +1,509 @@
 # Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
-# source("runners/cs_runner/config.R")
+source("runners/cs_runner/config.R")
 
 # # load libraries
-# library(terrainSliceR)
-# library(dplyr)
-# library(sf)
-
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
-
-# cross section bucket prefix
-cs_pts_prefix <- paste0(s3_bucket, "v20/3D/dem-cross-sections/")
+library(hydrofabric3D)
+library(dplyr)
+library(sf)
 
 # paths to nextgen datasets
-nextgen_files <- list.files(nextgen_dir, full.names = FALSE)
+NEXTGEN_FILES  <- list.files(NEXTGEN_DIR, full.names = FALSE)
 
 # paths to nextgen datasets
-transect_files <- list.files(transects_dir, full.names = FALSE)
+transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
+transect_files <- transect_files[!grepl("updated_", transect_files)]
+
+REF_FEATURES   <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
 
-# string to fill in "cs_source" column in output datasets
-cs_source <- "terrainSliceR"
+# reference features dataframe
+ref_df <- data.frame(
+  vpu      = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = REF_FEATURES
+)
 
 # ensure the files are in the same order and matched up by VPU
 path_df <- align_files_by_vpu(
-                x    = nextgen_files,
-                y    = transect_files,
-                base = base_dir
-              )
+  x    = NEXTGEN_FILES,
+  y    = transect_files,
+  base = BASE_DIR
+) %>% 
+  dplyr::left_join(
+    ref_df,
+    by = "vpu"
+  )
 
 # loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
 # then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
 # Save parquet locally and upload to specified S3 bucket
-for (i in 1:nrow(path_df)) {
-
+for (i in 20:nrow(path_df)) {
+  
+  start <- Sys.time()
+  
   # nextgen file and full path
   nextgen_file <- path_df$x[i]
-  nextgen_path <- paste0(nextgen_dir, nextgen_file)
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
   
   # model attributes file and full path
   transect_file <- path_df$y[i]
-  transect_path <- paste0(transects_dir, transect_file)
-  message("Creating VPU ", path_df$vpu[i], " cross section points:\n - flowpaths: '", nextgen_file, "'\n - transects: '", transect_file, "'")
+  transect_path <- paste0(TRANSECTS_DIR, transect_file)
+  
+  # model attributes file and full path
+  ref_file <- path_df$ref_file[i]
+  ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
+  
+  # current VPU being processed
+  VPU   <- path_df$vpu[i]
+  
+  start <- Sys.time()
+  
+  message("Creating VPU ", VPU, 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - transects: '", transect_file, "'", 
+          "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+  )
   
   ################### 
+  message("Reading in transects...\n > ", transect_file)
+  # read in transects data
+  transects <- sf::read_sf(transect_path)
   
-    # read in transects data
-    transects <- sf::read_sf(transect_path)
+  message("Reading in flowlines... \n > ", nextgen_file)
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
   
-    # read in nextgen data
-    flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  message("Reading in waterbodies... \n > ", ref_file)
+  # read in waterbodies reference features layer
+  waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+
+  # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+  feature_subsets <- wb_intersects(flines, transects, waterbodies)
+
+  # replace flowlines and transects objects with updated versions in "updated_features"
+  flines    <- flines[feature_subsets$valid_flowlines, ]
+  transects <- transects[feature_subsets$valid_transects, ]
+  
+  rm(waterbodies)
+  gc()
   
-    transects <-
-      transects  %>%
-      dplyr::rename(lengthm = cs_lengthm)
+  start_cs_pts <- Sys.time()
+  # # ------------------------------------------------------------------------
+  # # ------ TESTING DATA -------
+  # # ------------------------------------------------------------------------
+  #   flines <-
+  #     flines %>%
+  #     dplyr::slice(1:3500)
+  # 
+  # transects <-
+  #   transects %>%
+  #   dplyr::filter(hy_id %in% flines$id)
 
-    # get start time for log messages
-    time1 <- Sys.time()
-    
-    # get cross section point elevations
-    cs_pts <- terrainSliceR::cross_section_pts(
-              cs             = transects[lengths(sf::st_intersects(transects, flines)) == 1, ],
-              points_per_cs  = NULL,
-              min_pts_per_cs = 10,
-              dem            = DEM_URL
-              )
-    
-    # get end time for log messages
-    time2 <- Sys.time()
-    time_diff <- round(as.numeric(time2 - time1 ), 2)
+  # ------------------------------------------------------------------------
+  
+  message("Extracting cross section points (", start_cs_pts, ")")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 1: Extract cs points from DEM ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # system.time({
+  
+  # get cross section point elevations
+  cs_pts <- hydrofabric3D::cross_section_pts(
     
-    message("\n\n ---> Cross section point elevations processed in ", time_diff)
+    cs             = transects,
+    crosswalk_id   = "hy_id",
+    points_per_cs  = NULL,
+    min_pts_per_cs = 10,
+    dem            = DEM_PATH
+  )
+
+  # })
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg")
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg")
+  
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg")
+  # cs_pts %>% 
+  #   dplyr::group_by(hy_id, cs_id) %>% 
+  #   dplyr::filter(!any(is.na(Z))) %>% 
+  #   dplyr::ungroup()
+  # 
+  # cs_pts %>% 
+  #   hydrofabric3D::drop_incomplete_cs_pts("hy_id")
+  
+  # system.time({
+  
+  # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+  cs_pts <- 
+  # cs_pts2 <- 
+    cs_pts %>% 
+    # dplyr::group_by(hy_id, cs_id) %>% 
+    # dplyr::filter(!any(is.na(Z))) %>% 
+    # dplyr::ungroup() %>% 
+    hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+    hydrofabric3D::classify_points(
+      crosswalk_id             = "hy_id", 
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+      )  
+  
+  # })
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id  
+  # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+  
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # dplyr::rename(flines, hy_id = id)
+  # profvis::profvis({
+  # system.time({
     
-    # Remove any cross section that has ANY missing (NA) Z values. 
-    cs_pts <-
-      cs_pts %>%
-      # dplyr::filter(hy_id %in% c("wb-849054", "wb-845736")) %>% 
-      dplyr::group_by(hy_id, cs_id) %>% 
-      dplyr::filter(!any(is.na(Z))) %>% 
-      dplyr::ungroup()
+  # # NOTE: new inplace method for improving (rectifying) any invalid cross sections where we dont have banks and relief
+  # fixed_pts <- hydrofabric3D::improve_invalid_cs2(
+  #   cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  #   net            = dplyr::rename(flines, hy_id = id),    # original flowline network
+  #   # net            = flines,    # original flowline network
+  #   transects      = transects, # original transect lines
+  #   points_per_cs  = NULL, 
+  #   min_pts_per_cs = 10, # number of points per cross sections
+  #   dem            = DEM_PATH, # DEM to extract points from
+  #   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  #   pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  #   fix_ids = FALSE,
+  #   verbose = TRUE
+  # )
+  
+  
+  # system.time({
+    fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = dplyr::rename(flines, hy_id = id),    # original flowline network
+      # net            = flines,    # original flowline network
+      transects      = transects, # original transect lines
+      crosswalk_id   = "hy_id",
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_PATH, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
+  # })
+  
+  # transects %>% 
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::group_by(id, cs_id)
+  # fixed_pts2$is_extended %>% sum()
+  ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+  # # TODO: This is taking A LOT time to process as inputs get larger, an improvement should be looked into more
+  # fixed_pts <- hydrofabric3D::rectify_cs(
+  # # cs_pts <- hydrofabric3D::rectify_flat_cs(
+  #   cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  #   net            = flines,    # original flowline network
+  #   transects      = transects, # original transect lines
+  #   points_per_cs  = NULL, 
+  #   min_pts_per_cs = 10, # number of points per cross sections
+  #   dem            = DEM_PATH, # DEM to extract points from
+  #   scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  #   pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  #   fix_ids = FALSE,
+  #   verbose = TRUE
+  # )
+  
+  # get a summary dataframe and print out details message
+  rectify_summary <- hydrofabric3D::rectify_summary(cs_pts, fixed_pts, verbose = TRUE)
+  rectify_summary <- 
+    rectify_summary %>% 
+    dplyr::mutate(
+      vpu = VPU
+    ) %>% 
+    dplyr::relocate(vpu, metric, value)
+  
+  readr::write_csv(rectify_summary, paste0(META_PATH, "nextgen_", VPU, "_cross_sections_metadata.csv"))
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 4: Update transects with extended transects (if exists) ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # get the counts of each point type to add this data to the transects dataset
+  point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts)
+  # point_type_counts <- hydrofabric3D::get_point_type_counts(fixed_pts, crosswalk_id = "hy_id")
+  
+  # # check the number of cross sections that were extended
+  # fixed_pts$is_extended %>% table()
+  message("Subsetting cross section points generated after extending transects...")
+  
+  # extract cross section points that have an "is_extended" value of TRUE
+  extended_pts <-
+    fixed_pts %>% 
+    dplyr::filter(is_extended) %>% 
+    hydrofabric3D::add_tmp_id()
+  
+  # extract transects that have a "hy_id" in the "extended_pts" dataset
+  update_transects <-
+    transects %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+  
+  # if any transects were extended, update the transects dataset, and overwrite local and S3 transects geopackages
+  if (nrow(update_transects) > 0) {
+    message("Updating ", nrow(update_transects), " transects")
     
-    # classify the cross section points
-    cs_pts <-
-      cs_pts %>%
-      dplyr::rename(cs_widths = lengthm) %>%
-      terrainSliceR::classify_points() %>%
-      dplyr::mutate(
-        X = sf::st_coordinates(.)[,1],
-        Y = sf::st_coordinates(.)[,2]
-      ) %>%
-      dplyr::select(
-        hy_id, cs_id, pt_id,
-        cs_lengthm = cs_widths,
-        relative_distance,
-        X, Y, Z,
-        class
+    update_transects <-
+      update_transects %>%
+      # apply extend_by_percent function to each transect line: 
+      hydrofabric3D:::extend_by_percent(
+        pct        = EXTENSION_PCT,
+        length_col = "cs_lengthm"
+      )
+  
+    # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+    # remove old transects that have "tmp_id" in "extended_pts" (transects that were unchanged and are "good_to_go")
+    # and then replace with old transects with the "update_transects" 
+    out_transects <- 
+      transects %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+      dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+      dplyr::bind_rows( # bring in the new updated extended transects
+        dplyr::mutate(
+          update_transects,
+          is_extended = TRUE
         )
+      )  
     
-  # Drop point geometries, leaving just X, Y, Z values
-  cs_pts <- sf::st_drop_geometry(cs_pts)
+  } else {
+    
+    out_transects <- 
+      transects %>% 
+      hydrofabric3D::add_tmp_id() %>% 
+      dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)) %>% 
+      dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+  }
+  
+  # finalize new transects
+  out_transects <- 
+    out_transects %>% 
+    dplyr::left_join(
+      point_type_counts,
+      by = c("hy_id", "cs_id")
+    ) %>% 
+    dplyr::left_join(
+      dplyr::ungroup(
+        dplyr::slice( 
+          dplyr::group_by(
+            dplyr::select(sf::st_drop_geometry(fixed_pts), 
+                          hy_id, cs_id, bottom, left_bank, right_bank, valid_banks, has_relief), 
+            hy_id, cs_id),
+          1)
+      ),
+      by = c("hy_id", "cs_id")
+    ) %>% 
+    dplyr::select(hy_id, cs_source, cs_id, cs_measure, cs_lengthm, 
+                  # sinuosity, 
+                  is_extended, 
+                  left_bank_count, right_bank_count, channel_count, bottom_count, 
+                  bottom, left_bank, right_bank, valid_banks, has_relief,
+                  geom
+                  )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Re enumerate the transects & cross section points "cs_id" ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # make a dataframe that has a new_cs_id column that has 
+  # the cs_id renumbered to fill in any missing IDs,
+  # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id
+  # The dataframe below will be used to join the "new_cs_id" with 
+  # the original "cs_ids" in the final cross section POINTS and UPDATED TRANSECTS output datasets
+  renumbered_ids <-
+    fixed_pts %>% 
+    sf::st_drop_geometry() %>% 
+    # dplyr::filter(hy_id %in% c("wb-2402800", "wb-2398282", "wb-2400351")) %>%
+    dplyr::select(hy_id, cs_id, pt_id, cs_measure) %>% 
+    dplyr::group_by(hy_id, cs_id) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup() %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::group_by(hy_id) %>% 
+    dplyr::mutate(
+      new_cs_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(new_cs_id, tmp_id)
+  
+  # Renumber the transects to have correct CS IDs
+  out_transects <- dplyr::left_join(
+                      hydrofabric3D::add_tmp_id(out_transects),
+                      renumbered_ids,
+                      by = "tmp_id"
+                    ) %>% 
+                      dplyr::select(-cs_id, -tmp_id) %>% 
+                      dplyr::select(hy_id, cs_source, 
+                                    cs_id = new_cs_id, 
+                                    cs_measure, cs_lengthm, 
+                                    # sinuosity,
+                                    is_extended, 
+                                    left_bank_count, right_bank_count, channel_count, bottom_count, 
+                                    bottom, left_bank, right_bank, valid_banks, has_relief,
+                                    geometry = geom
+                      )
+  
+  # Renumber the cross sections points to have correct CS IDs
+  fixed_pts <- dplyr::left_join(
+                    hydrofabric3D::add_tmp_id(fixed_pts),
+                    renumbered_ids,
+                    by = "tmp_id"
+                  ) %>% 
+                    dplyr::select(-cs_id, -tmp_id) %>% 
+                    dplyr::rename(cs_id = new_cs_id)
 
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  fixed_pts <-
+    fixed_pts %>% 
+    dplyr::mutate(
+      X = sf::st_coordinates(.)[,1],
+      Y = sf::st_coordinates(.)[,2]
+    ) %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(
+      hy_id, cs_id, pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    )
+  
+  # # Drop point geometries, leaving just X, Y, Z values
+  # fixed_pts <- sf::st_drop_geometry(fixed_pts)
+  
   # add Z_source column for source of elevation data
-  cs_pts <-
-    cs_pts %>%
+  fixed_pts <-
+    fixed_pts %>%
     dplyr::mutate(
-      Z_source = cs_source
-      ) %>%
-    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, class)
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
   
-  ################### 
+  ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  fixed_pts <- hydrofabric3D::classify_points(
+                  cs_pts                    = fixed_pts, 
+                  crosswalk_id              = "hy_id",
+                  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+    message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  } else {
+    message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  }
+  
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  
+  ############################################################################## 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Re upload the updated transects geopackage to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  updated_path <- gsub(transect_file, paste0("updated_", transect_file), transect_path)
+  
+  ## Save local and REUPLOAD TRANSECTS to S3 to update for any extended cross sections
+  message("Saving updated transects to:\n - filepath: '", updated_path, "'")
+  
+  # save flowlines to out_path (lynker-spatial/01_transects/transects_<VPU num>.gpkg)
+  sf::write_sf(
+    out_transects,
+    # transect_path
+    updated_path
+  )
+  
+  # command to copy transects geopackage to S3
+  trans_to_s3 <- paste0("aws s3 cp ", updated_path, " ", S3_TRANSECTS_DIR, transect_file, 
+                        ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)))
+  
+  message("Copy VPU ", path_df$vpu[i], " transects to S3:\n - S3 copy command:\n'", 
+          trans_to_s3, 
+          "'\n==========================")
+  
+  system(trans_to_s3, intern = TRUE)
+  
+  ############################################################################## 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
   
   # name of file and path to save transects gpkg too
   out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
-  out_path <- paste0(cs_pts_dir, out_file)
+  out_path <- paste0(CS_PTS_DIR, out_file)
   
   message("Saving cross section points to:\n - filepath: '", out_path, "'")
   
   # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
-  arrow::write_parquet(cs_pts, out_path)
+  arrow::write_parquet(fixed_pts, out_path)
   
   # command to copy cross section points parquet to S3
-  if (!is.null(aws_profile)) {
-    copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                                    ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile)))
-  } else {
-    copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file)
-    
-  }
-
+  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file,
+                              ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE)))
+  
   message("Copy VPU ", path_df$vpu[i], " cross sections to S3:\n - S3 copy command:\n'", 
-          paste0("aws s3 cp ", out_path, " ", cs_pts_prefix, out_file,
-                 ifelse(is.null(aws_profile), "", paste0(" --profile ", aws_profile))), 
+          paste0("aws s3 cp ", out_path, " ", S3_CS_PTS_DIR, out_file,
+                 ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))), 
           "'\n==========================")
   
   system(copy_cs_pts_to_s3, intern = TRUE)
-
+  
+  end <- Sys.time()
+  
+  message("Finished cross section point generation for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(fixed_pts)
+  gc()
+  gc()
 }
+
+# ###########################################################################################################################################
diff --git a/runners/cs_runner/03_driver.R b/runners/cs_runner/03_driver.R
deleted file mode 100644
index cce576d..0000000
--- a/runners/cs_runner/03_driver.R
+++ /dev/null
@@ -1,13 +0,0 @@
-### Run this file to have all runner scripts run in order
-
-# downloads nextgen datasets 
-source("runners/cs_runner/config.R")
-
-# downloads nextgen datasets 
-source("runners/cs_runner/download_nextgen.R")
-
-# generate and upload transects datasets 
-source("runners/cs_runner/01_transects.R")
-
-# generate and upload cross sections points datasets 
-source("runners/cs_runner/02_cs_pts.R")
diff --git a/runners/cs_runner/03_inject_ml.R b/runners/cs_runner/03_inject_ml.R
new file mode 100644
index 0000000..f29efc7
--- /dev/null
+++ b/runners/cs_runner/03_inject_ml.R
@@ -0,0 +1,390 @@
+# ----------------------------------------------------------------------------------------------------------------
+# ---- data paths -----
+# ----------------------------------------------------------------------------------------------------------------
+library(dplyr)
+library(hydrofabric3D)
+library(sf)
+library(patchwork)
+
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# cross section bucket prefix
+S3_CS_ML_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/")
+# S3_CS_PTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, "v20/3D/dem-cross-sections/")
+
+ML_OUTPUTS_PATH <- list.files(ML_OUTPUTS_DIR, full.names = TRUE)
+
+# paths to nextgen datasets
+NEXTGEN_FILES   <- list.files(NEXTGEN_DIR, full.names = FALSE)
+
+# paths to nextgen datasets
+cs_files        <- list.files(CS_PTS_DIR, full.names = FALSE)
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = NEXTGEN_FILES,
+  y    = cs_files,
+  base = BASE_DIR
+) 
+# dplyr::left_join(
+#   ref_df,
+#   by = "vpu"
+# )
+
+# ML Outputs
+ml_output <- arrow::read_parquet(ML_OUTPUTS_PATH)
+
+# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# Save parquet locally and upload to specified S3 bucket
+for (i in 1:nrow(path_df)) {
+  # i = 8
+  start <- round(Sys.time())
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
+  
+  # model attributes file and full path
+  cs_file      <- path_df$y[i]
+  cs_pts_path  <- paste0(CS_PTS_DIR, cs_file)
+  
+  # current VPU being processed
+  VPU = path_df$vpu[i]
+  
+  message("Augmenting DEM cross sections with ML estimated widths/depths: ", VPU, 
+          " cross section points:",
+          "'\n - cross section points: '", cs_file, "'", 
+          "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_FILE, "'", 
+          # "'\n - ML estimated widths/depths: '", ML_OUTPUTS_S3_URI, "'", 
+          "\n - CONUS network file: '", CONUS_NETWORK_URI, "'",
+          "\n - flowpaths: '", nextgen_file,
+          # "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+          )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Read in data -----
+  # ----------------------------------------------------------------------------------------------------------------
+  message("Loading data...")
+  
+  # CONUS network parquet
+  net <- 
+    CONUS_NETWORK_URI %>% 
+    arrow::open_dataset() %>%
+    dplyr::filter(vpu == VPU) %>%
+    dplyr::collect()
+  
+  # Cross section points parquet
+  cs_pts    <- arrow::read_parquet(cs_pts_path)
+  
+  # TODO: Not needed 
+  # fline_net <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Extract the max hydroseq "hy_id" for each flowline in the CONUS network parquet -----
+  # Use this to join "hy_id" to "hf_id" in ML outputs data
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  net_subset <- 
+    net %>% 
+    dplyr::select(id, hf_id, hf_hydroseq) %>% 
+    # dplyr::filter(id %in% unique(cs$hy_id)) %>%
+    dplyr::filter(stringr::str_detect(id, "^wb-") & !is.na(id)) %>% 
+    dplyr::group_by(id) %>% 
+    dplyr::slice_max(hf_hydroseq, with_ties = FALSE) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(hy_id = id, 
+                  hf_id,
+                  hf_hydroseq
+    ) 
+  
+  number_hyids_in_cs_pts    <- length(unique(cs_pts$hy_id ))
+  number_hyids_in_conus_net <- length(unique(net_subset$hy_id))
+  
+  message("VPU ", VPU, ": ", 
+          "\n - Number of CONUS network hy_ids: '", number_hyids_in_conus_net, "'",
+          "\n - Number of cross section points hy_ids: '", number_hyids_in_cs_pts, "'"
+  )
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Find the stream orders for the flowlines in the network -----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # stream_order <- 
+  #   fline_net %>% 
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::select(hy_id = id, 
+  #                 order, 
+  #                 # hydroseq, 
+  #                 tot_drainage_areasqkm
+  #   ) %>% 
+  #   dplyr::filter(hy_id %in% net_subset$hy_id) 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Subset ML data to specific VPU and add "hy_id" column to ML data -----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # Join hy_id onto the ML outputs and then remove the rows WITHOUT matches in hy_id
+  #  this should give us a (nearly) one-to-one cross walk between "hy_id" in the cross section points 
+  # and "hf_id" in the ML outputs dataset
+  ml_subset <- 
+    ml_output %>% 
+    dplyr::left_join(
+      net_subset,
+      by = "hf_id"
+    ) %>% 
+    dplyr::filter(!is.na(hy_id)) %>%
+    dplyr::relocate(hy_id, hf_id) 
+  
+  message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+  # join the ML outputs data to the cross section points
+  cs_pts <- 
+    cs_pts %>% 
+    dplyr::left_join(
+      dplyr::select(ml_subset, 
+                    hy_id, 
+                    hf_id, 
+                    owp_tw_inchan, 
+                    owp_y_inchan, 
+                    owp_tw_bf, 
+                    owp_y_bf, 
+                    owp_dingman_r),
+      by = "hy_id"
+    ) 
+  
+  message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+  
+  cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = "hy_id")
+  
+  # TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+  # TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+  cs_pts <-
+    cs_pts %>% 
+    dplyr::left_join(
+      cs_bottom_lengths, 
+      by = c("hy_id", "cs_id")
+    ) %>% 
+    dplyr::mutate(
+      owp_tw_inchan = dplyr::case_when(
+        owp_tw_inchan <= 0 ~ bottom_length,
+        TRUE               ~ owp_tw_inchan
+      ),
+      owp_tw_bf = dplyr::case_when(
+        owp_tw_bf <= 0     ~ bottom_length,
+        TRUE               ~ owp_tw_bf
+      )
+    ) %>% 
+    dplyr::select(-bottom_length)
+  
+  # extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+  # TODO: look at this stuff with Arash (04/09/2024)
+  missing_cs <- 
+    cs_pts %>% 
+    dplyr::filter(is.na(hf_id) | 
+                  is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+                  is.na(owp_tw_bf) | is.na(owp_y_bf) |
+                  is.na(owp_dingman_r)) %>% 
+    hydrofabric3D::add_tmp_id()
+  
+  # TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id" 
+  readr::write_csv(
+    dplyr::select(missing_cs, -tmp_id), 
+    paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+    )
+
+  # Split the cross sections into 2 groups:
+  # - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+  # - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+  inchannel_cs <-
+    cs_pts %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+    dplyr::select(-tmp_id) %>% 
+    dplyr::filter(valid_banks & has_relief) %>% 
+    # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+    # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+    # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+    dplyr::rename(        
+      TW        = owp_tw_inchan,    
+      DEPTH     = owp_y_inchan,
+      DINGMAN_R = owp_dingman_r
+    )
+  
+  bankful_cs   <- 
+    cs_pts %>% 
+    hydrofabric3D::add_tmp_id() %>% 
+    dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+    dplyr::select(-tmp_id) %>% 
+    dplyr::filter(!valid_banks | !has_relief) %>% 
+    dplyr::rename(
+      TW        = owp_tw_bf, 
+      DEPTH     = owp_y_bf,
+      DINGMAN_R = owp_dingman_r
+    )
+  
+  # bankful_cs %>% 
+  #   dplyr::slice(1000:1200) %>% 
+  #   hydrofabric3D::plot_cs_pts(color = "point_type")
+  
+  # sanity check that all rows are accounted for after splitting up data
+  split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs)
+  # split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+  
+  if (!split_kept_all_rows) {
+    warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+    "\nsome points were not put in either group.", 
+    "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+    "values or contain values other than TRUE/FALSE")
+    )
+  }
+  message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+  
+  # system.time({
+    
+    # Add bathymetry using "inchannel" estimates
+    inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+        cross_section_pts = inchannel_cs,
+        crosswalk_id = "hy_id"
+      )
+    
+  # })
+  # arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+  # ml_subset %>%
+  #   dplyr::filter(hy_id == "wb-1005207") %>%
+  #   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+  #   .$owp_y_inchan
+  message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+  
+  # system.time({
+  # Add bathymetry using "bankful" estimates
+  bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = bankful_cs,
+    crosswalk_id = "hy_id"
+  )
+  # })
+  
+  # combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+  final_cs <- dplyr::bind_rows(
+                dplyr::select(
+                  inchannel_cs,
+                  -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point
+                ),
+                dplyr::select(
+                  bankful_cs,
+                  -hf_id, -TW, -DEPTH, -DINGMAN_R, -dplyr::starts_with("owp"), -is_dem_point
+                ),
+                dplyr::select(
+                  missing_cs,
+                  -hf_id, -dplyr::starts_with("owp"), -tmp_id
+                )
+              ) %>%
+    dplyr::group_by(hy_id, cs_id) %>%
+    tidyr::fill(
+      c(cs_lengthm, Z_source)
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(
+      -point_type,
+      -class,
+      -bottom, -left_bank, -right_bank,
+      -has_relief, -valid_banks
+    )
+  
+  message(round(Sys.time()), " - Reclassifying cross section point types...")
+  
+  # system.time({
+  # reclassify
+  final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                             crosswalk_id = "hy_id")
+  # })
+  
+  # final_uids <- final_cs %>% hydrofabric3D::get_unique_tmp_ids()
+  # random_uids <- sample(x=final_uids, size=12)
+  # cs_subset <-  dplyr::filter(hydrofabric3D::add_tmp_id(final_cs), 
+                        # tmp_id %in% random_uids) 
+  # hydrofabric3D::classify_points(cs_subset) %>%  hydrofabric3D::plot_cs_pts(color = "point_type")
+  
+  starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = "hy_id")
+  ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = "hy_id")
+
+  has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
+  all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
+  all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids)
+
+  # throw some warnings if:
+  # - the number of uids in the input is different from the output
+  # - there are missing hy_id/cs_id
+  if (!has_same_number_of_uids) {
+    warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                   "starting cross section points from the final cross section points:",
+                   "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                   "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+                   ))
+  }
+
+  if (!all_starting_uids_in_ending_uids) {
+    number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+
+    # starting_uids %in% ending_uids
+    warning(
+      paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+            "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+             )
+      )
+
+    # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+    #          "is missing from the output cross section points"))
+
+  }
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+
+  # name of file and path to save transects gpkg too
+  out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
+  out_path <- paste0(CS_OUTPUT_DIR, out_file)
+  
+  message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", out_path, "'")
+  
+  # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+  arrow::write_parquet(final_cs, out_path)
+  
+  s3_save_uri <- paste0(S3_CS_ML_PTS_DIR, out_file)
+  
+  # command to copy cross section points parquet to S3
+  copy_cs_pts_to_s3 <- paste0("aws s3 cp ", 
+                              out_path,
+                              " ", 
+                              s3_save_uri,
+                              ifelse(is.null(AWS_PROFILE), "", paste0(" --profile ", AWS_PROFILE))
+                              )
+  
+  message(
+    "Copy VPU ", path_df$vpu[i], " ML augmented cross sections to S3:\n - S3 copy command:\n'",
+    copy_cs_pts_to_s3, "'",
+    "'\n=========================="
+    )
+
+  system(copy_cs_pts_to_s3, intern = TRUE)
+  
+  end <- round(Sys.time())
+  
+  message("Finished augmenting cross section points with ML for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(net, net_subset, 
+     final_cs, cs_pts, 
+     inchannel_cs, bankful_cs)
+  gc()
+  gc()
+  
+}
+  
+
+    
+  
\ No newline at end of file
diff --git a/runners/cs_runner/04_driver.R b/runners/cs_runner/04_driver.R
new file mode 100644
index 0000000..212e1ac
--- /dev/null
+++ b/runners/cs_runner/04_driver.R
@@ -0,0 +1,26 @@
+### Run this file to have all runner scripts run in order
+
+# downloads nextgen datasets 
+source("runners/cs_runner/config.R")
+
+# downloads datasets
+# - Nextgen data
+# - Reference features (for waterbody filtering)
+# - ML outputs
+# - FEMA 100 year floodplain polygons (FGBs)
+source("runners/cs_runner/download_nextgen.R")
+source("runners/cs_runner/download_ref_features.R")
+source("runners/cs_runner/download_ml_outputs.R")
+source("runners/cs_runner/download_fema100.R")
+
+# simplify, dissolve, FEMA polygons and partition FEMA polygons by VPU 
+source("runners/cs_runner/partition_fema_by_vpu.R")
+
+# generate and upload transects datasets 
+source("runners/cs_runner/01_transects.R")
+
+# generate and upload cross sections points datasets 
+source("runners/cs_runner/02_cs_pts.R")
+
+# Apply machine learning topwidths and depths estimates to DEM cross section points
+source("runners/cs_runner/03_inject_ml.R")
diff --git a/runners/cs_runner/bathy.R b/runners/cs_runner/bathy.R
new file mode 100644
index 0000000..7a550f9
--- /dev/null
+++ b/runners/cs_runner/bathy.R
@@ -0,0 +1,337 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+
+# Unique Flowline ID column name
+CROSSWALK_ID <- "id"
+
+VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+
+ML_CROSSWALK_ID <- "id"
+
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#   vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#   arrow::read_parquet(prq) %>% 
+#     dplyr::mutate(vpu_id = vpu_id) 
+# }
+# ) %>% 
+#   dplyr::bind_rows() %>% 
+#   dplyr::select(
+#     dplyr::any_of(ML_CROSSWALK_ID),
+#     vpu_id,
+#     owp_y_bf, owp_y_inchan, 
+#     owp_tw_bf, owp_tw_inchan,
+#     owp_dingman_r
+#                 )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+# ml_outputs <- 
+# ml_outputs %>%
+# dplyr::distinct(
+#   dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+#   vpu_id,
+#   owp_y_bf, owp_y_inchan,
+#   owp_tw_bf, owp_tw_inchan,
+#   owp_dingman_r
+# )
+
+# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+# rm(ml_outputs, ml)
+ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml")
+
+ml_outputs <- 
+  ml_outputs %>% 
+  dplyr::select(
+    dplyr::any_of(ML_CROSSWALK_ID),
+    vpuid,
+    owp_y_bf = YCC, 
+    owp_y_inchan = Y,
+    owp_tw_bf = TopWdthCC, 
+    owp_tw_inchan = TopWdth,
+    owp_dingman_r = dingman_r
+  )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+ml_outputs <-
+  ml_outputs %>%
+  dplyr::distinct(
+    dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+    vpu_id,
+    owp_y_bf, owp_y_inchan,
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+  )
+
+# ---------------------------------------------------------------------------------
+# ---- Read in CS PTS data ----
+# ---------------------------------------------------------------------------------
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+
+cs_pts  <- arrow::read_parquet(CS_PTS_OUTPUT_PATH)
+
+# ---------------------------------------------------------------------------------
+# ---- Join CS PTS  with ML data ---
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+# ml_outputs %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::count(id) %>% 
+#   dplyr::arrange(-n)
+
+# join the ML outputs data to the cross section points
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    dplyr::select(ml_outputs, 
+                  dplyr::any_of(CROSSWALK_ID),
+                  owp_tw_inchan, 
+                  owp_y_inchan, 
+                  owp_tw_bf, 
+                  owp_y_bf, 
+                  owp_dingman_r
+    ),
+    by = CROSSWALK_ID
+  ) 
+
+# ---------------------------------------------------------------------------------
+# ---- Fixing negative depths/widths estimates ----
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+
+cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID)
+
+# TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+# TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    cs_bottom_lengths, 
+    by = c(CROSSWALK_ID, "cs_id")
+    # by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::mutate(
+    owp_tw_inchan = dplyr::case_when(
+      owp_tw_inchan <= 0 ~ bottom_length,
+      TRUE               ~ owp_tw_inchan
+    ),
+    owp_tw_bf = dplyr::case_when(
+      owp_tw_bf <= 0     ~ bottom_length,
+      TRUE               ~ owp_tw_bf
+    )
+  ) %>% 
+  dplyr::select(-bottom_length)
+
+# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+# TODO: look at this stuff with Arash (04/09/2024)
+missing_cs <- 
+  cs_pts %>% 
+  dplyr::filter(
+    is.na(.data[[CROSSWALK_ID]]) | 
+      is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+      is.na(owp_tw_bf) | is.na(owp_y_bf) |
+      is.na(owp_dingman_r)
+  ) %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID)
+
+# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id"
+# readr::write_csv(
+#   dplyr::select(missing_cs, -tmp_id), 
+#   paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+# )
+
+# Split the cross sections into 2 groups:
+# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+# - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+inchannel_cs <-
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(valid_banks & has_relief) %>% 
+  # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+  # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+  # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+  dplyr::rename(        
+    TW        = owp_tw_inchan,    
+    DEPTH     = owp_y_inchan,
+    DINGMAN_R = owp_dingman_r
+  ) 
+  # dplyr::slice(1:1000)
+
+bankful_cs   <- 
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(!valid_banks | !has_relief) %>% 
+  dplyr::rename(
+    TW        = owp_tw_bf, 
+    DEPTH     = owp_y_bf,
+    DINGMAN_R = owp_dingman_r
+  ) 
+  # dplyr::slice(1:1000)
+
+# # Replace any topwidth values that are GREATER than the actual cross section length (meters)
+# bankful_cs2 <- hydrofabric3D:::fix_oversized_topwidths(
+#   cross_section_pts = bankful_cs2,
+#   crosswalk_id = CROSSWALK_ID
+# )
+# bankful_cs %>% 
+#   dplyr::filter(TW >= cs_lengthm) %>% 
+#   dplyr::select(TW)
+
+# sanity check that all rows are accounted for after splitting up data
+split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs))
+# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+
+if (!split_kept_all_rows) {
+  warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+                 "\nsome points were not put in either group.", 
+                 "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+                 "values or contain values other than TRUE/FALSE")
+  )
+}
+
+message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+# tmp <- 
+#   inchannel_cs %>% 
+#   dplyr::slice(1:10000)
+# system.time({
+  
+  # Add bathymetry using "inchannel" estimates
+  inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = inchannel_cs,
+    # cross_section_pts = tmp,
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+# })
+
+  gc()
+
+
+# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+# ml_subset %>%
+#   dplyr::filter(hy_id == "wb-1005207") %>%
+#   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+#   .$owp_y_inchan
+message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+# system.time({
+  
+  # Add bathymetry using "bankful" estimates
+  bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = bankful_cs,
+    # cross_section_pts = dplyr::slice(bankful_cs, 1:10000),
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+# })
+
+# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+final_cs <- dplyr::bind_rows(
+  dplyr::select(
+    inchannel_cs,
+    # inchannel_cs2,
+    # -hf_id, 
+    -TW, -DEPTH, -DINGMAN_R, 
+    # -is_dem_point,
+    -dplyr::starts_with("owp")
+  ),
+  dplyr::select(
+    bankful_cs,
+    # bankful_cs2,
+    # -hf_id, 
+    -TW, -DEPTH, -DINGMAN_R, 
+    # -is_dem_point,
+    -dplyr::starts_with("owp")
+  ),
+  dplyr::select(
+    dplyr::mutate(
+      missing_cs,
+      is_dem_point = FALSE
+    ),
+    # -hf_id, 
+    # -is_dem_point,
+    -dplyr::starts_with("owp"), 
+    -tmp_id
+  )
+) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  tidyr::fill(
+    c(cs_lengthm, Z_source)
+  ) %>%
+  dplyr::ungroup() %>%
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet")
+# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet")
+
+
+message(round(Sys.time()), " - Reclassifying cross section point types...")
+
+# reclassify
+final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                           crosswalk_id = CROSSWALK_ID,
+                                           pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID)
+ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID)
+
+has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
+all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
+all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids)
+
+# throw some warnings if:
+# - the number of uids in the input is different from the output
+# - there are missing hy_id/cs_id
+if (!has_same_number_of_uids) {
+  warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                 "starting cross section points from the final cross section points:",
+                 "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                 "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+  ))
+}
+
+if (!all_starting_uids_in_ending_uids) {
+  number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+  
+  # starting_uids %in% ending_uids
+  warning(
+    paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+           "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+    )
+  )
+  
+  # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+  #          "is missing from the output cross section points"))
+  
+}
+tmp_path <-   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs.parquet")
+message("saving file", tmp_path)
+
+arrow::write_parquet(
+  final_cs,
+  tmp_path
+)
+
diff --git a/runners/cs_runner/config.R b/runners/cs_runner/config.R
index 855cd9b..b70ef37 100644
--- a/runners/cs_runner/config.R
+++ b/runners/cs_runner/config.R
@@ -2,77 +2,284 @@
 pacman::p_load(
   archive,
   hydrofabric,
-  terrainSliceR
+  hydrofabric3D
 )
 
+# # install.packages("devtools")
+# devtools::install_github("anguswg-ucsb/hydrofabric3D")
+
 # load root directory 
 source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner/utils.R")
 
 sf::sf_use_s2(FALSE)
 
-# name of bucket with nextgen data
-nextgen_bucket <- "lynker-spatial"
+### Cross section point
+
+# # -------------------------------------------------------------------------------------
+# # ----- S3 names ------
+# # -------------------------------------------------------------------------------------
+
+# # AWS S3 bucket URI 
+# LYNKER_SPATIAL_HF_S3_URI <- "s3://lynker-spatial/"
+
+# # name of bucket with nextgen data
+# LYNKER_SPATIAL_S3_BUCKET_NAME <- "lynker-spatial"
+
+# # the name of the folder in the S3 bucket with the nextgen data
+# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
+
+# # full URI to the S3 bucket folder with the nextgen data 
+# S3_BUCKET_NEXTGEN_DIR_URI  <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR)
+
+# # reference features S3 bucket prefix
+# S3_BUCKET_REF_FEATURES_URI <- "s3://lynker-spatial/00_reference_features/gpkg/"
+
+# # S3 prefix/folder of version run
+# VERSION <- "v20.1"
+
+# # -------------------------------------------------------------------------------------
+
+# # -------------------------------------------------------------------------------------
+# # ----- Local directories ------
+# # -------------------------------------------------------------------------------------
 
-# directory to copy nextgen bucket data too
-nextgen_dir <- paste0(base_dir, "/pre-release/")
+# ### LOCAL DIRS
 
-# model attributes directory
-model_attr_dir <- paste0(base_dir, "/model_attributes/")
+# # directory to copy nextgen bucket data too
+# NEXTGEN_DIR      <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
+# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/")
 
-# cross-section data model data directories
-transects_dir <- paste0(base_dir, "/01_transects/")
-cs_pts_dir <- paste0(base_dir, "/02_cs_pts/")
+# # model attributes directory
+# MODEL_ATTR_DIR   <- paste0(BASE_DIR, "/model_attributes/")
 
-# final output directory with geopackages per VPU
-final_dir <- paste0(base_dir, "/cross_sections/")
+# # cross-section data model data directories
+# TRANSECTS_DIR    <- paste0(BASE_DIR, "/01_transects/")
+# CS_PTS_DIR       <- paste0(BASE_DIR, "/02_cs_pts/")
 
+# # final output directory with geopackages per VPU
+# CS_OUTPUT_DIR    <- paste0(BASE_DIR, "/cross_sections/")
+
+# # directory to copy nextgen bucket data too
+# REF_FEATURES_DIR <- paste0(BASE_DIR, "/00_reference_features/")
+
+# # make a directory for the ML outputs data
+# ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
+
+# -------------------------------------------------------------------------------------
+# ----- Create local directories ------
+# -------------------------------------------------------------------------------------
 # create directories 
-dir.create(transects_dir, showWarnings = FALSE)
-dir.create(cs_pts_dir,    showWarnings = FALSE)
-dir.create(final_dir,     showWarnings = FALSE)
-# dir.create(model_attr_dir,  showWarnings = FALSE)
-
-
-##### UTILITY FUNCTION FOR MATCHING FILES BASED ON VPU STRING ######
-# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
-# make sure they are aligned and in the same order
-# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
-# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
-# base is a character vector of the base directory of the files. Defaults to NULL
-# Returns a dataframe with VPU, x, and y columns
-align_files_by_vpu <- function(
-    x, 
-    y, 
-    base = NULL
-) {
-
-  # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period
-  regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*"
+dir.create(TRANSECTS_DIR, showWarnings = FALSE)
+dir.create(CS_PTS_DIR,    showWarnings = FALSE)
+dir.create(REF_FEATURES_DIR,     showWarnings = FALSE)
+dir.create(paste0(REF_FEATURES_DIR, "gpkg/"),     showWarnings = FALSE)
+dir.create(CS_OUTPUT_DIR,     showWarnings = FALSE)
+dir.create(ML_OUTPUTS_DIR,     showWarnings = FALSE)
+
+# create the directory if it does NOT exist
+if(!dir.exists(NEXTGEN_DIR)) {
+  message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'")
   
-  # path dataframe for X filepaths
-  x_paths <- data.frame(x = x)
+  dir.create(NEXTGEN_DIR)
+}
+
+# # create the directory if it does NOT exist
+# if(!dir.exists(MODEL_ATTR_DIR)) {
+#   message("Directory does not exist at: \n\t'", MODEL_ATTR_DIR, "'\nCreating directory at: \n\t'", MODEL_ATTR_DIR, "'")
+#   dir.create(MODEL_ATTR_DIR)
+# }
+# dir.create(MODEL_ATTR_DIR,  showWarnings = FALSE)
+
+# -------------------------------------------------------------------------------------
+
+# -------------------------------------------------------------------------------------
+# ----- Get the paths / locations of reference_features data ------
+# -------------------------------------------------------------------------------------
+
+## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
+
+# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+list_ref_features <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '"
+            
+            # Regular expression pattern to match object keys
+            PATTERN="reference_features.gpkg"
+
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# --------------------------------------------------------------------------
+# ---- Get a list of reference features geopackages ----
+# --------------------------------------------------------------------------
+
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+ref_features <- system(list_ref_features, intern = TRUE)
+
+# ref features datasets
+ref_features_keys  <- paste0(S3_BUCKET_REF_FEATURES_URI, ref_features)
+ref_features_files <- paste0(REF_FEATURES_DIR, "gpkg/", ref_features)
+
+# --------------------------------------------------------------------------
+# ---- Create empty file structure for a "new_domain" ----
+# --------------------------------------------------------------------------
+
+create_new_domain_dirs(BASE_DIR, NEW_DOMAIN_DIRNAME, with_output = TRUE)
+
+# --------------------------------------------------------------------------
+# ---- Create empty file structure for a "domain_with_fema" ----
+# --------------------------------------------------------------------------
+
+create_new_domain_dirs(BASE_DIR, DOMAIN_WITH_FEMA_DIRNAME, with_output = TRUE)
+
+# --------------------------------------------------------------------------
+# ---- Get locations of ML parquet files in S3 ---
+# --------------------------------------------------------------------------
+
+VPU_ML_BATHYMETRY_URIS <- unlist(
+  lapply(VPU_ML_BATHYMETRY_S3_DIRS, function(s3_dir) {
+    s3_file <- list_s3_objects(s3_dir, ".parquet$", AWS_PROFILE)
+    paste0(s3_dir, s3_file)
+  })
+)
+
+# -------------------------------------------------------------------------------------
+# ---- Download ML parquets for DOMAIN_WITH_FEMA ----
+# -------------------------------------------------------------------------------------
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (s3_uri in VPU_ML_BATHYMETRY_URIS) {
+  # message(s3_uri)
+  # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=18/part-0.parquet"
+  # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=03W/part-0.parquet"
+  # s3_uri <- "s3://lynker-hydrofabric/hydrofabric/nextgen/bathymetry/multisource_river_attributes/vpuid=21/"
   
-  # path dataframe for Y filepaths
-  y_paths <- data.frame(y = y)
+  is_parquet <- endsWith(s3_uri, ".parquet")
+  vpu_id     <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
   
-  # generate VPU IDs based on file path regular expression matching with "regex_pattern" above
-  x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x)
-  y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y)
+  message("Checking S3 bucket for VPU ", vpu_id, " ML data...")
   
-  # match paths based on VPU column
-  matched_paths <- dplyr::left_join(
-    x_paths,
-    y_paths,
-    by = "vpu"
-  ) 
+  if (is_parquet) {
+    s3_file    <- basename(s3_uri)
+    # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
+    new_file_name <-     paste0(vpu_id, "_ml.parquet")
+    # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file))
+    
+    local_save_path <- paste0(DOMAIN_WITH_FEMA_ML_DIR, "/", new_file_name)
+    
+    if(!file.exists(local_save_path)) {
+      copy_cmd <- paste0('aws s3 cp ', s3_uri,
+                         " ", local_save_path, " --profile ", AWS_PROFILE)
+      
+      message("S3 object:\n > '", s3_uri, "'")
+      message("Downloading S3 object to:\n > '", local_save_path, "'")
+      # message("Copy command:\n > '", copy_cmd, "'")
+      
+      system(copy_cmd)
+      
+      message(" > '", new_file_name, "' download complete!")
+      message("----------------------------------")
+    } else {
+      message("File already exists at:\n > '", local_save_path, "'")
+    }
+    
+  } else {
+    message("No S3 bucket ML data for VPU ", vpu_id, "...")
+  }
+
+}
+
+# VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+# 
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#       vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#       arrow::read_parquet(prq) %>% 
+#         dplyr::mutate(vpu_id = vpu_id) 
+#       }
+#     ) %>% 
+#   dplyr::bind_rows()
+
+# --------------------------------------------------------------------------
+# ---- Get locations of diffusive domain DEM files in S3 ----
+# --------------------------------------------------------------------------
+
+COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, 
+                                       list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)
+                                       )
+
+# -------------------------------------------------------------------------------------
+# ---- Download diffusive domain DEM files from S3 for DOMAIN_WITH_FEMA ----
+# -------------------------------------------------------------------------------------
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (s3_uri in COASTAL_BATHY_DEM_S3_URIS) {
+  message(s3_uri)
   
-  # reorder columns
-  matched_paths <- dplyr::relocate(matched_paths, vpu, x, y)
+  is_tif <- endsWith(s3_uri, ".tif")
+  # vpu_id     <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
   
-  if(!is.null(base)) {
-    matched_paths$base_dir <- base
-  }
+  message("Checking S3 bucket for DEM data...")
   
-  return(matched_paths)
+  if (is_tif) {
+    
+    s3_file    <- basename(s3_uri)
+    # vpu_id <- gsub(".*vpuid=([a-zA-Z0-9]+).*", "\\1", s3_uri)
+    # new_file_name <-     paste0(vpu_id, "_ml.parquet")
+    # new_file_name <- gsub("-", "_", paste0(vpu_id, "_", s3_file))
+    
+    local_save_path <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", s3_file)
+    
+    if (!file.exists(local_save_path)) {
+      copy_cmd <- paste0('aws s3 cp ', s3_uri,
+                         " ", local_save_path, " --profile ", AWS_PROFILE)
+      
+      message("S3 object:\n > '", s3_uri, "'")
+      message("Downloading S3 object to:\n > '", local_save_path, "'")
+      # message("Copy command:\n > '", copy_cmd, "'")
+      
+      system(copy_cmd)
+      
+      message(" > '", s3_file, "' download complete!")
+      message("----------------------------------")
+    } else {
+      message("File already exists at:\n > '", local_save_path, "'")
+    }
+    
+  } else {
+    message("No S3 bucket DEM data... ")
+  }
   
 }
+
+COASTAL_BATHY_DEM_PATHS <- list.files(DOMAIN_WITH_FEMA_DEM_DIR, full.names = TRUE)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runners/cs_runner/config_vars.R b/runners/cs_runner/config_vars.R
index 9738869..bee5b45 100644
--- a/runners/cs_runner/config_vars.R
+++ b/runners/cs_runner/config_vars.R
@@ -1,16 +1,293 @@
-### EDIT base_dir, aws_profile, and DEM_URL ###
-base_dir    <- '/Users/anguswatters/Desktop/lynker-spatial'
+### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ###
+
+# ---------------------------------------------------------------------------------
+# ---- General paths and constants variables ----
+# - edit to match your local environment
+# - BASE_DIR: base directory for local file storage
+# - AWS_PROFILE: AWS profile to run CLI commands
+# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for
+# ---------------------------------------------------------------------------------
+# Base directory for local file storage
+BASE_DIR           <- '/Users/anguswatters/Desktop/lynker-spatial'
 
 # AWS profile to run CLI commands 
-aws_profile <- "angus-lynker"
+AWS_PROFILE        <- "angus-lynker"
+
+# S3 prefix/folder of version run
+VERSION            <- "v20.1"
+
+# string to fill in "CS_SOURCE" column in output datasets
+CS_SOURCE          <- "hydrofabric3D"
+
+# name of bucket with nextgen data
+LYNKER_SPATIAL_S3_BUCKET_NAME     <- "lynker-spatial"
+LYNKER_SPATIAL_HF_S3_PREFIX   <- "hydrofabric"
+
+# AWS S3 bucket URI 
+LYNKER_SPATIAL_BASE_S3_URI <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/")
+LYNKER_SPATIAL_HF_S3_URI      <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/")
+# LYNKER_SPATIAL_HF_S3_URI      <- "s3://lynker-spatial/"
+
+# -------------------------------------------------------------------------------------
+# ---- S3 output directories -----
+# - transects 
+# - cross section points
+# - ML cross section points
+# -------------------------------------------------------------------------------------
+
+# transect bucket prefix
+S3_TRANSECTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
+
+# cross section bucket prefix
+S3_CS_PTS_DIR      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/")
+
+# cross section bucket prefix
+S3_CS_ML_PTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/")
+
+# -------------------------------------------------------------------------------------
+# ---- S3 nextgen data paths / directories -----
+# -------------------------------------------------------------------------------------
+
+# the name of the folder in the S3 bucket with the nextgen data
+S3_BUCKET_NEXTGEN_DIR       <- paste0(VERSION, "/gpkg/")
+# S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
+
+# full URI to the S3 bucket folder with the nextgen data 
+S3_BUCKET_NEXTGEN_DIR_URI   <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR)
+
+# reference features S3 bucket prefix
+S3_BUCKET_REF_FEATURES_URI  <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/")
+# S3_BUCKET_REF_FEATURES_URI  <- "s3://lynker-spatial/00_reference_features/gpkg/"
+
+# ----------------------------------------------------------------------------
+# ---- Machine learning data path variables ----
+# ----------------------------------------------------------------------------
+
+ML_OUTPUTS_S3_FILE   <- "channel_ml_outputs.parquet"
+
+# ML_OUTPUTS_S3_DIR    <- paste0(VERSION, "/3D/ml-outputs/")
+# ML_OUTPUTS_S3_DIR  <- "v20.1/3D/ml-outputs/"
+
+ML_OUTPUTS_S3_URI      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# ML_OUTPUTS_S3_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE)
+
+ML_OUTPUTS_PATH      <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE)
+
+# path to the remote CONUS net parquet file
+CONUS_NETWORK_FILE   <- "conus_net.parquet"
+CONUS_NETWORK_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE)
+
+# ----------------------------------------------------------------------------
+
+
+# -------------------------------------------------------------------------------------
+# ---- Local directory / path variables ----
+# -------------------------------------------------------------------------------------
+
+# directory to copy nextgen bucket data too
+NEXTGEN_DIR      <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
+# NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/")
+
+# # model attributes directory
+# MODEL_ATTR_DIR   <- paste0(BASE_DIR, "/model_attributes/")
+
+# cross-section data model data directories
+TRANSECTS_DIR    <- paste0(BASE_DIR, "/01_transects/")
+CS_PTS_DIR       <- paste0(BASE_DIR, "/02_cs_pts/")
+
+# final output directory with geopackages per VPU
+CS_OUTPUT_DIR    <- paste0(BASE_DIR, "/cross_sections/")
+
+# directory to copy nextgen bucket data too
+REF_FEATURES_DIR      <- paste0(BASE_DIR, "/00_reference_features/")
+REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/")
+
+# make a directory for the ML outputs data
+ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
+
+DEM_DIR      <- paste0(BASE_DIR, "/dem")
+DEM_VRT_DIR  <- paste0(DEM_DIR, "/vrt")
+DEM_TIF_DIR  <- paste0(DEM_DIR, "/tif")
+
+# -------------------------------------------------------------------------------------
+# ---- Create local directory / path variables (FEMA data) ----
+# -------------------------------------------------------------------------------------
+
+# location of FEMA 100 year flood plain FGB files
+FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
+LYNKER_HF_FEMA_S3_PREFIX  <- "FEMA100/"
+LYNKER_HF_FEMA_S3_URI            <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX)
+
+# FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+FEMA_FGB_PATH        <- paste0(BASE_DIR, "/FEMA100")
+FEMA_GEOJSON_PATH    <- paste0(BASE_DIR, "/FEMA100_geojson")
+FEMA_CLEAN_PATH      <- paste0(BASE_DIR, "/FEMA100_clean")
+FEMA_GPKG_PATH       <- paste0(BASE_DIR, "/FEMA100_gpkg")
+FEMA_GPKG_BB_PATH    <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
+
+FEMA_BY_VPU_PATH     <- paste0(BASE_DIR, "/FEMA_BY_VPU")
+VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+
+FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+# FEMA_VPU_SUBFOLDERS <- paste0(
+#                         FEMA_BY_VPU_PATH, "/VPU_",
+#                         unlist(
+#                           lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) {
+#                             unlist(regmatches(vpu_file_names,  gregexpr("\\d+[A-Za-z]*", vpu_file_names)))})
+#                           )
+#                         )
+
+CS_EXTENSION_POLYGONS_DIR <- paste0(BASE_DIR, "/cs-extension-polygons")
+CONUS_FEMA_GPKG_PATH      <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg')
+
+# paste0(BASE_DIR/ 
+# -------------------------------------------------------------------------------------
+# ---- OVERWRITE_FEMA_FILES constant logicals----
+# ---- > if TRUE, processing steps will be run again 
+#          and overwrite existing previously processed files
+# TODO: Describe these variables
+# -------------------------------------------------------------------------------------
+
+# Default is TRUE (i.e. a fresh processing run is done from start to finish)
+OVERWRITE_FEMA_FILES  <- TRUE
+DELETE_STAGING_GPKGS  <- TRUE # remove intermediary files from the main output folder
+
+# ----------------------------------------------------------------------------
+# ---- Cross section point extraction constant variables ----
+# ----------------------------------------------------------------------------
 
 # DEM URL
-DEM_URL     <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+# scale argument for cross_section_pts() function. 
+# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
+# Default setting is 50% of the original transect lines length (0.5)
+EXTENSION_PCT  <- 0.5
+
+# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
+# 1% of the cross sections length is the default value we are using 
+# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
+PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01
+
+# Whether to collect meta data from runs to generate an output CSV (currently only being created in 02_cs_pts.R)
+# TODO: Probably delete this 
+COLLECT_META <- TRUE
+
+# Where should meta data CSVs be saved to? 
+# Local path to save CSVs of cross section meta data during each iteration
+# TODO: Probably delete this 
+META_PATH    <- '/Users/anguswatters/Desktop/cs_meta/'
+# META_PATH <-  "/local/path/to/save/cross_section_meta_data/"
+
+# -------------------------------------------------------------------------------------
+# ---- (New single domain) Local directory / path variables ----
+# -------------------------------------------------------------------------------------
+
+# directory for new domain data 
+NEW_DOMAIN_DIRNAME  <- "new_domain"
+NEW_DOMAIN_DIR      <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME)
+
+NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines"
+NEW_DOMAIN_FLOWLINES_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME)
+
+NEW_DOMAIN_DEM_DIRNAME <- "dem"
+NEW_DOMAIN_DEM_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME)
+
+NEW_DOMAIN_FLOWLINES_FILE  <- "AllDiffusiveCombined.gpkg"
+NEW_DOMAIN_FLOWLINES_PATH  <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE)
+
+# # Local DEM file
+# NEW_DOMAIN_DEM_FILE        <- "hi_dem.tif"
+# NEW_DOMAIN_DEM_PATH        <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE)
+
+# Remote DEM file
+NEW_DOMAIN_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+NEW_DOMAIN_TRANSECTS_DIRNAME         <- "transects"
+NEW_DOMAIN_CS_PTS_DIRNAME            <- "cs_pts"
+NEW_DOMAIN_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+
+NEW_DOMAIN_TRANSECTS_DIR          <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME)
+NEW_DOMAIN_CS_PTS_DIR             <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME)
+NEW_DOMAIN_CROSS_SECTIONS_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME)
+
+# -------------------------------------------------------------------------------------
+# ---- (New single domain) Local directory / path variables ----
+# -------------------------------------------------------------------------------------
+
+# directory for new domain data 
+DOMAIN_WITH_FEMA_DIRNAME  <- "domain_with_fema"
+DOMAIN_WITH_FEMA_DIR      <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME)
+
+DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines"
+DOMAIN_WITH_FEMA_FLOWLINES_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME)
+
+DOMAIN_WITH_FEMA_SUBSET_DIRNAME  <- "domain_subset"
+DOMAIN_WITH_FEMA_SUBSET_DIR      <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME)
+
+DOMAIN_WITH_FEMA_DEM_DIRNAME     <- "dem"
+DOMAIN_WITH_FEMA_DEM_DIR         <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME)
+
+DOMAIN_WITH_FEMA_FLOWLINES_FILE  <- "ls_conus.gpkg"
+DOMAIN_WITH_FEMA_FLOWLINES_PATH  <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE)
+
+# Geopackage containing area to subset flowlines to before processing
+DOMAIN_WITH_FEMA_SUBSET_FILE     <- "AllDiffusiveCombined.gpkg"
+DOMAIN_WITH_FEMA_SUBSET_PATH     <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE)
+
+# # Local DEM file
+# DOMAIN_WITH_FEMA_DEM_FILE        <- "hi_dem.tif"
+# DOMAIN_WITH_FEMA_DEM_PATH        <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE)
+
+# Remote DEM file
+DOMAIN_WITH_FEMA_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME         <- "transects"
+DOMAIN_WITH_FEMA_CS_PTS_DIRNAME            <- "cs_pts"
+DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+DOMAIN_WITH_FEMA_OUTPUT_DIRNAME            <- "outputs"
+DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME       <- "vpu-subsets"
+DOMAIN_WITH_FEMA_ML_DIRNAME                <- "ml"
+# aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt
+DOMAIN_WITH_FEMA_TRANSECTS_DIR          <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME)
+DOMAIN_WITH_FEMA_CS_PTS_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME)
+DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME)
+DOMAIN_WITH_FEMA_OUTPUT_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME)
+DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR        <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME)
+DOMAIN_WITH_FEMA_ML_DIR                 <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME)
+
+ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data")
+# LYNKER_SPATIAL_BASE_S3_URI
+LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI   <- "s3://lynker-hydrofabric/"
+ML_BATHYMETRY_S3_DATA_DIR               <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/"
+ML_BATHYMETRY_S3_URI                    <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR)
+VPU_ML_BATHYMETRY_S3_DIRS               <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/")
+
+COASTAL_BATHY_DEM_S3_DIR     <- "coastal_bathy/diffusive_domain/"
+COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR)
+# COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+
+
+# # # --------------------------------------------------------------------------
+# # # ---- Get locations of diffusive domain DEM files in S3 ----
+# # # --------------------------------------------------------------------------
+# COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+# 
+# # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, 
+# #                                        list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)
+# #                                        )
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-# # create the directory if it does NOT exist
-# if(!dir.exists(base_dir)) {
-#   message(glue::glue('Base directory does not exist...\nCreating directory: {base_dir}'))
-#   dir.create(base_dir)
-# }
 
-### EDIT ###
diff --git a/runners/cs_runner/domain_with_fema.R b/runners/cs_runner/domain_with_fema.R
new file mode 100644
index 0000000..f673ffb
--- /dev/null
+++ b/runners/cs_runner/domain_with_fema.R
@@ -0,0 +1,1561 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+
+# Unique Flowline ID column name
+CROSSWALK_ID <- "id"
+
+# ---------------------------------------------------------------------
+# --- Read in flowlines
+# ---------------------------------------------------------------------
+# sf::st_layers(DOMAIN_WITH_FEMA_SUBSET_PATH)
+
+# Subsetting area
+aoi     <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides")
+aoi     <- rmapshaper::ms_simplify(aoi, keep = 0.05)
+
+id_col <- "id"
+# id_col <- "divide_id"
+
+# query the conus.gpkg for matching IDs
+ids        <- unique(aoi[[id_col]])
+query_ids  <- paste0(paste0("'", ids, "'"), collapse= ", ")
+
+gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+layer       <- gpkg_layers$name[gpkg_layers$name == "flowpaths"]
+
+wkt <- 
+  aoi %>% 
+  rmapshaper::ms_dissolve() %>% 
+  # rmapshaper::ms_explode() %>% 
+  # sf::st_as_sfc() %>%
+  sf::st_sf() %>%
+  sf::st_geometry() %>%
+  sf::st_as_text()
+
+# wkt   <- sf::st_as_text(sf::st_geometry(aoi))
+# wkt <-
+#   aoi %>%
+#   # sf::st_bbox() %>%
+#   sf::st_as_sfc() %>%
+#   sf::st_sf() %>%
+#   sf::st_geometry() %>%
+#   sf::st_as_text()
+
+# length(wkt)
+
+# read in flowlines based on IDs in AOI
+flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths",
+                      wkt_filter = wkt
+                      # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids),
+                      # query = query
+                      )
+
+# bad_ids <- c("wb-14538", "wb-14686",  "wb-14687")
+# flines <- 
+#   flines %>% 
+#   dplyr::filter(id %in% bad_ids)
+# ---------------------------------------------------------------------
+# --- Split flowlines by VPU
+# ---------------------------------------------------------------------
+
+# VPUs polygons
+VPU_boundaries   <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines))
+
+# add a VPU ID column to each flowline
+flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID")
+
+# TODO: improve this, manual remap some VPUIDs so that there are 
+# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected 
+flines <- 
+  flines %>% 
+  dplyr::mutate(
+    VPUID = dplyr::case_when(
+      VPUID == "12"     ~ "11, 12, 13",
+      VPUID == "12, 13" ~ "11, 12, 13",
+      VPUID == "11, 12" ~ "11, 12, 13",
+      VPUID == "01"     ~ "01, 02",
+      TRUE              ~ VPUID
+    )
+  ) 
+# rm(flines2)
+# unnest_ids(flines2$VPUID2)
+
+# set of unique VPUs
+
+VPU_IDS <- unnest_ids(flines$VPUID)
+# VPU_IDS <- unnest_ids(flines2$VPUID2)
+VPU_IDS
+
+# all possible FEMA dirs
+AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ]
+
+flines %>%
+  sf::st_drop_geometry() %>%
+  dplyr::group_by(VPUID) %>%
+  dplyr::count()
+# unique(flines$VPUID)
+
+# calculate bankfull width
+flines <- 
+  flines %>% 
+  hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    VPUID,
+    # hy_id = id,
+    lengthkm,
+    tot_drainage_areasqkm,
+    bf_width,
+    mainstem
+  ) %>% 
+  hydroloom::rename_geometry("geometry")
+
+transects <- hydrofabric3D::cut_cross_sections(
+  net               = flines,                        # flowlines network
+  crosswalk_id      = CROSSWALK_ID,                     # Unique feature ID
+  cs_widths         = flines$bf_width,
+  # cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+  # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+  num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+  smooth            = TRUE,                          # smooth lines
+  densify           = 3,                             # densify linestring points
+  rm_self_intersect = TRUE,                          # remove self intersecting transects
+  fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+  #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+  # terminal_id       = NULL,
+  # braid_threshold   = NULL,
+  # version           = 2,
+  # braid_method      = "comid",
+  # precision         = 1,
+  add               = TRUE                           # whether to add back the original data
+)
+
+sf::write_sf(
+  transects,
+  "/Users/anguswatters/Desktop/tmp.gpkg"
+)
+
+`# flines <-
+#   flines %>%
+#   dplyr::mutate(
+#     bf_width = hydrofabric3D::calc_powerlaw_bankful_width(tot_drainage_areasqkm)
+#     # bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+#   ) %>%
+#   # hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
+#   dplyr::select(
+#     dplyr::any_of(CROSSWALK_ID),
+#     VPUID,
+#     # hy_id = id,
+#     lengthkm,
+#     tot_drainage_areasqkm,
+#     bf_width,
+#     mainstem
+#   ) %>% 
+#   hydroloom::rename_geometry("geometry")
+
+
+transects <- 
+  transects %>% 
+  dplyr::left_join(
+    flines %>% 
+      dplyr::select(id, VPUID, tot_drainage_areasqkm) %>% 
+      sf::st_drop_geometry(),
+    by = "id"
+  )
+
+# read each FEMA geopackage into a list 
+fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+transects$VPUID %>% unique()
+
+GROUP_VPU_IDS <- unnest_ids(transects$VPUID)
+
+# all FEMA dirs for the current area
+GROUP_FEMA_DIRS  <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ]
+GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))]
+
+fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+
+fema <- 
+  fema %>% 
+  dplyr::bind_rows() %>% 
+  dplyr::mutate(
+    fema_id = 1:dplyr::n()
+  ) 
+
+message("Simplifying FEMA polygons...")
+message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+
+# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+# keep 1% of the original points for speed
+fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+
+message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+
+transects <- 
+  transects  %>%
+  dplyr::left_join(
+    dplyr::select(sf::st_drop_geometry(flines),
+                  dplyr::any_of(CROSSWALK_ID),
+                  mainstem
+    ),
+    by = CROSSWALK_ID
+  )
+
+# TODO: make sure this 3000m extension distance is appropriate across VPUs 
+# TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+ext_transects <- hydrofabric3D::extend_transects_to_polygons(
+  transect_lines         = transects, 
+  polygons               = fema, 
+  flowlines              = flines, 
+  crosswalk_id           = CROSSWALK_ID,
+  grouping_id            = "mainstem", 
+  max_extension_distance = 3000 
+)
+
+sf::write_sf(
+  ext_transects,
+  "/Users/anguswatters/Desktop/tmp_ext.gpkg"
+)
+
+ext_transects <- 
+  ext_transects %>% 
+  dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, lengthm, sinuosity, geometry)
+
+# get cross section point elevations
+cs_pts <- hydrofabric3D::cross_section_pts(
+  cs             = ext_transects,
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL,
+  min_pts_per_cs = 10,
+  dem            = DEM_PATH
+)
+
+sf::write_sf(
+  cs_pts,
+  "/Users/anguswatters/Desktop/tmp_cs_pts.gpkg"
+)
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points  ----
+# ----------------------------------------------------------------------------------------------------------------
+# cs_pts2 %>% 
+#   dplyr::slice(1:200) %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type")
+
+cs_pts <- 
+  # cs_pts2 <- 
+  cs_pts %>% 
+  hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% 
+  hydrofabric3D::classify_points(
+    crosswalk_id             = CROSSWALK_ID, 
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )  
+
+# })
+
+ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+
+# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# system.time({
+fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  net            = flines,    # original flowline network
+  # net            = flowlines,    # original flowline network
+  transects      = ext_transects, # original transect lines
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL, 
+  min_pts_per_cs = 10, # number of points per cross sections
+  dem            = DEM_PATH, # DEM to extract points from
+  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  fix_ids = FALSE,
+  verbose = TRUE
+)
+# })
+
+ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+ext_transects <- 
+  ext_transects %>% 
+  dplyr::mutate(cs_source = CS_SOURCE)
+
+out_transects <- match_transects_to_extended_cs_pts(
+  transect_lines = ext_transects, 
+  fixed_cs_pts   = fixed_pts, 
+  crosswalk_id   = CROSSWALK_ID,
+  extension_pct  = EXTENSION_PCT
+)
+
+sf::write_sf(
+  out_transects,
+  "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+)
+
+trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id")
+ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id")
+fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id")
+
+all(trans_uids %in% ext_trans_uids)
+all(ext_trans_uids %in% trans_uids)
+
+all(trans_uids %in% fixed_pts_uids)
+all(fixed_pts_uids %in% trans_uids)
+
+all(ext_trans_uids %in% fixed_pts_uids)
+all(fixed_pts_uids %in% ext_trans_uids)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+# out_transects  <- hydrofabric3D:::renumber_cs_ids(
+#                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+#                                                   crosswalk_id  = "hy_id"
+#                                                   ) %>% 
+#                                                   dplyr::select(-pt_id)
+
+fixed_pts2      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+out_transects2  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+sf::write_sf(
+  out_transects2,
+  "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg"
+)
+object.size(out_transects2)
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, 
+    pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  )
+
+# add Z_source column for source of elevation data
+fixed_pts <-
+  fixed_pts %>%
+  dplyr::mutate(
+    Z_source = CS_SOURCE
+  ) %>%
+  dplyr::relocate(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+    class, point_type, 
+    bottom, left_bank, right_bank, valid_banks, has_relief)
+
+ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Aligning banks and smoothing bottoms...")
+fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID)
+
+ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Reclassifying cross section points...")
+
+fixed_pts <- hydrofabric3D::classify_points(
+  cs_pts                    = fixed_pts, 
+  crosswalk_id              = CROSSWALK_ID,
+  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+} else {
+  message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+}
+
+if(all(ids_before_align %in% ids_after_align)) {
+  message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+} else {
+  message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+}
+
+if(all(ids_after_align %in% ids_after_reclassify)) {
+  message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+} else {
+  message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+}
+
+sf::write_sf(
+  out_transects,
+  paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") 
+)
+
+arrow::write_parquet(
+  fixed_pts,
+  paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") 
+)
+# save the flowlines subset 
+DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg")
+
+sf::write_sf(
+  flines, 
+  DOMAIN_WITH_FEMA_FLOWLINE_SUBSET_PATH
+)
+
+# split the flowlines into groups by VPU
+fline_groups <- dplyr::group_split(flines, VPUID)
+
+# loop through each VPU group of flowlines and save a local copy
+for (i in seq_along(fline_groups)) {
+  
+  flowlines  <- fline_groups[[i]]
+  VPU        <- unique(flowlines$VPUID)
+  
+  # save the flowlines subset 
+  vpu_flowlines_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", gsub(", ", "_", VPU), "_flowlines.gpkg")
+  
+  sf::write_sf(
+    flowlines, 
+    vpu_flowlines_path
+  )
+}
+
+for (i in seq_along(fline_groups)) {
+  
+  # i = 3
+  flowlines  <- fline_groups[[i]]
+  VPU        <-  unique(flowlines$VPUID)
+  
+  message("(", i, "/", length(fline_groups), ")\n", 
+          " > Processing flowlines in VPU group: '", VPU, "'")
+  
+  # unique VPUs for group
+  GROUP_VPU_IDS <- unnest_ids(flowlines$VPUID)
+  
+  # all FEMA dirs for the current area
+  GROUP_FEMA_DIRS  <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ]
+  GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))]
+  # GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)
+  # GROUP_FEMA_FILES <- GROUP_FEMA_FILES[grepl("_output.gpkg", GROUP_FEMA_FILES)]
+  
+  # ((flowlines$bf_width) / 11)[1] * 11
+  
+  # create transect lines
+  transects <- hydrofabric3D::cut_cross_sections(
+    net               = flowlines,                        # flowlines network
+    crosswalk_id      = CROSSWALK_ID,                     # Unique feature ID
+    cs_widths         = flowlines$bf_width,
+    # cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+    num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+    smooth            = TRUE,                          # smooth lines
+    densify           = 3,                             # densify linestring points
+    rm_self_intersect = TRUE,                          # remove self intersecting transects
+    fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+    #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+    # terminal_id       = NULL,
+    # braid_threshold   = NULL,
+    # version           = 2,
+    # braid_method      = "comid",
+    # precision         = 1,
+    add               = TRUE                           # whether to add back the original data
+  )
+# }
+  
+  # add cs_source column and rename cs_widths to cs_lengthm
+  transects <- 
+    transects %>%
+    dplyr::mutate(
+      cs_source = CS_SOURCE
+    )
+
+  # ---------------------------------------------------------------------
+  # --- Extend transects out to FEMA 100yr floodplains
+  # ---------------------------------------------------------------------
+  message("Reading in FEMA polygons...") 
+  
+  # unique(flowlines$VPUID)
+  
+  # fema polygons and transect lines
+  # fema <- sf::read_sf(vpu_fema_file)
+  
+  # read each FEMA geopackage into a list 
+  fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+  
+  fema <- 
+    fema %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::mutate(
+      fema_id = 1:dplyr::n()
+    ) 
+
+  message("Simplifying FEMA polygons...")
+  message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+  
+  # TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+  # keep 1% of the original points for speed
+  fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+  
+  message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+  message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+  
+  transects <- 
+    transects  %>%
+    dplyr::left_join(
+      dplyr::select(sf::st_drop_geometry(flowlines),
+                    dplyr::any_of(CROSSWALK_ID),
+                    mainstem
+      ),
+      by = CROSSWALK_ID
+    )
+  
+  # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  ext_transects <- hydrofabric3D::extend_transects_to_polygons(
+    transect_lines         = transects, 
+    polygons               = fema, 
+    flowlines              = flowlines, 
+    crosswalk_id           = CROSSWALK_ID,
+    grouping_id            = "mainstem", 
+    max_extension_distance = 3000 
+  )
+  
+  # mapview::mapview(transects, color = "green") + 
+   # mapview::mapview(transects2, color = "red")
+  
+  # transects <- 
+  #   transects %>% 
+  ext_transects <- 
+    ext_transects %>% 
+    hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+    dplyr::mutate(is_extended = FALSE) %>%
+    dplyr::select(
+      dplyr::any_of(CROSSWALK_ID),
+      cs_id, 
+      cs_lengthm,
+      cs_source,
+      cs_measure,
+      is_extended,
+      geometry
+    )
+  
+  out_path <- paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/",   
+                     paste(GROUP_VPU_IDS, collapse = "_"), "_transects.gpkg" 
+                     )
+  
+  # ext_transects %>%
+  #   dplyr::filter(id == "wb-2414904") %>%
+  #   .$geometry %>% 
+  #   mapview::mapview()
+  
+  # ext_transects %>%
+  #   dplyr::slice(90000:100000) %>% 
+  #   .$geometry %>% 
+  #   mapview::mapview()
+  
+  # wb-2414904
+  
+  # length_data <- 
+  #   ext_transects %>% 
+  #   dplyr::mutate(
+  #     line_len = as.numeric(sf::st_length(geometry))
+  #   )
+  
+  message("Writting transect lines for VPU group: '", VPU, "'",
+          "\n > '", out_path, "'")
+  
+  sf::write_sf(ext_transects, out_path)
+  
+  message("Finished writting transects!")
+  
+}
+
+# ---------------------------------------------------------------------
+# --- Bind together transects into single dataset
+# ---------------------------------------------------------------------
+vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T)
+
+transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)]
+flowline_paths <- vpu_subset_paths[grepl("_flowlines.gpkg", vpu_subset_paths)]
+
+# read each transect geopackages into a list 
+transects <- lapply(transect_paths, function(gpkg) sf::read_sf(gpkg))
+# transects[[5]]
+
+transects <- 
+  transects %>% 
+  dplyr::bind_rows()
+
+paths_df <- data.frame(
+  t = transect_paths,
+  f = flowline_paths
+) %>% 
+  dplyr::mutate(
+    vpu = gsub("_transects.gpkg", "", basename(t))
+  )
+
+for (i in 1:nrow(paths_df)) {
+  # i =3
+  # i = 2
+  VPU     <- paths_df$vpu[i]
+  t_path  <- paths_df$t[i]
+  f_path  <- paths_df$f[i]
+  
+  message("Creating VPU ", VPU, " cross section points:", 
+          "\n - flowpaths: '", f_path, "'",
+          "\n - transects: '", t_path, "'"
+  )
+  
+  ################### 
+  message("Reading in transects...\n > ", t_path)
+  # read in transects data
+  transects <- sf::read_sf(t_path)
+  
+  message("Reading in flowlines... \n > ", f_path)
+  # read in nextgen data
+  flowlines <- sf::read_sf(f_path)
+  
+  message("Extracting cross section points...")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 1: Extract cs points from DEM ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # system.time({
+  
+  # get cross section point elevations
+  cs_pts <- hydrofabric3D::cross_section_pts(
+    cs             = transects,
+    crosswalk_id   = CROSSWALK_ID,
+    points_per_cs  = NULL,
+    min_pts_per_cs = 10,
+    dem            = DEM_PATH
+  )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points  ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # cs_pts2 %>% 
+  #   dplyr::slice(1:200) %>% 
+  #   dplyr::rename(hy_id = id) %>% 
+  #   hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type")
+  
+  cs_pts <- 
+    # cs_pts2 <- 
+    cs_pts %>% 
+    hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% 
+    hydrofabric3D::classify_points(
+      crosswalk_id             = CROSSWALK_ID, 
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+    )  
+  
+  # })
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+  # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+  
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # system.time({
+    fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = flowlines,    # original flowline network
+      # net            = flowlines,    # original flowline network
+      transects      = transects, # original transect lines
+      crosswalk_id   = CROSSWALK_ID,
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_PATH, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
+  # })
+  
+  ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Update transects with extended transects (if exists) ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  out_transects <- match_transects_to_extended_cs_pts(
+    transect_lines = transects, 
+    fixed_cs_pts   = fixed_pts, 
+    crosswalk_id   = CROSSWALK_ID,
+    extension_pct = EXTENSION_PCT
+  )
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Re enumerate the transects & cross section points "cs_id" ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+  # out_transects  <- hydrofabric3D:::renumber_cs_ids(
+  #                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+  #                                                   crosswalk_id  = "hy_id"
+  #                                                   ) %>% 
+  #                                                   dplyr::select(-pt_id)
+  
+  fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+  out_transects  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+  
+  # out_transects %>% 
+    # dplyr::filter(id == "wb-2425607") %>% .$geometry %>% plot()
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 4: Update transects with extended transects (if exists) ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  fixed_pts <-
+    fixed_pts %>% 
+    dplyr::mutate(
+      X = sf::st_coordinates(.)[,1],
+      Y = sf::st_coordinates(.)[,2]
+    ) %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(
+      dplyr::any_of(CROSSWALK_ID),
+      cs_id, 
+      pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    )
+  
+  # add Z_source column for source of elevation data
+  fixed_pts <-
+    fixed_pts %>%
+    dplyr::mutate(
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(
+      dplyr::any_of(CROSSWALK_ID),
+      cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
+  
+  ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID)
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  
+  fixed_pts <- hydrofabric3D::classify_points(
+    cs_pts                    = fixed_pts, 
+    crosswalk_id              = CROSSWALK_ID,
+    pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+  
+  if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+    message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  } else {
+    message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  }
+  
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  sf::write_sf(
+    out_transects,
+    paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") 
+  )
+  
+  arrow::write_parquet(
+    fixed_pts,
+    paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_cs_pts.parquet") 
+  )
+  
+}
+
+# TODO: Save all cross section points + transects into single geopackages (transects.gpkg, cs_pts.gpkg)
+
+# ---------------------------------------------------------------------------------
+# ---- Merge all the transects / cs points into single files  
+# ---------------------------------------------------------------------------------
+
+vpu_subset_paths <- list.files(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, full.names = T)
+
+# ---------------------------------------------------------------------------------
+# ---- Read all transects and save to single gpkg ----
+# ---------------------------------------------------------------------------------
+
+transect_paths <- vpu_subset_paths[grepl("_transects.gpkg", vpu_subset_paths)]
+
+transects <- lapply(transect_paths, function(gpkg) {sf::read_sf(gpkg) }) %>% 
+  dplyr::bind_rows() %>% 
+  hydroloom::rename_geometry("geometry")
+
+TRANSECTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg") 
+
+sf::write_sf(
+  transects,
+  TRANSECTS_OUTPUT_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Read all CS PTs and save to single parquet ----
+# ---------------------------------------------------------------------------------
+
+cs_pts_paths   <- vpu_subset_paths[grepl("_cs_pts.parquet", vpu_subset_paths)]
+cs_pts <- lapply(cs_pts_paths, function(prq) arrow::read_parquet(prq)) %>% 
+  dplyr::bind_rows()
+
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+
+arrow::write_parquet(
+  cs_pts,
+  CS_PTS_OUTPUT_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Inject ML predicted top widths / Dingman's R ----
+# ---------------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------------
+# ---- Read in ML data ----
+# ---------------------------------------------------------------------------------
+VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+
+ML_CROSSWALK_ID <- "id"
+
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#   vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#   arrow::read_parquet(prq) %>% 
+#     dplyr::mutate(vpu_id = vpu_id) 
+# }
+# ) %>% 
+#   dplyr::bind_rows() %>% 
+#   dplyr::select(
+#     dplyr::any_of(ML_CROSSWALK_ID),
+#     vpu_id,
+#     owp_y_bf, owp_y_inchan, 
+#     owp_tw_bf, owp_tw_inchan,
+#     owp_dingman_r
+#                 )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+# ml_outputs <- 
+  # ml_outputs %>%
+  # dplyr::distinct(
+  #   dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+  #   vpu_id,
+  #   owp_y_bf, owp_y_inchan,
+  #   owp_tw_bf, owp_tw_inchan,
+  #   owp_dingman_r
+  # )
+
+# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+# rm(ml_outputs, ml)
+ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml")
+
+ml_outputs <- 
+  ml_outputs %>% 
+  dplyr::select(
+    dplyr::any_of(ML_CROSSWALK_ID),
+    vpuid,
+    owp_y_bf = YCC, 
+    owp_y_inchan = Y,
+    owp_tw_bf = TopWdthCC, 
+    owp_tw_inchan = TopWdth,
+    owp_dingman_r = dingman_r
+                )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+ml_outputs <-
+  ml_outputs %>%
+  dplyr::distinct(
+    dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+    vpu_id,
+    owp_y_bf, owp_y_inchan,
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+  )
+
+# ---------------------------------------------------------------------------------
+# ---- Read in CS PTS data ----
+# ---------------------------------------------------------------------------------
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+
+cs_pts  <- arrow::read_parquet(CS_PTS_OUTPUT_PATH)
+
+# ---------------------------------------------------------------------------------
+# ---- Join CS PTS  with ML data ---
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+# ml_outputs %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::count(id) %>% 
+#   dplyr::arrange(-n)
+
+# join the ML outputs data to the cross section points
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    dplyr::select(ml_outputs, 
+                  dplyr::any_of(CROSSWALK_ID),
+                  owp_tw_inchan, 
+                  owp_y_inchan, 
+                  owp_tw_bf, 
+                  owp_y_bf, 
+                  owp_dingman_r
+                  ),
+    by = CROSSWALK_ID
+  ) 
+
+# ---------------------------------------------------------------------------------
+# ---- Fixing negative depths/widths estimates ----
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+
+cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID)
+
+# TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+# TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    cs_bottom_lengths, 
+    by = c(CROSSWALK_ID, "cs_id")
+    # by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::mutate(
+    owp_tw_inchan = dplyr::case_when(
+      owp_tw_inchan <= 0 ~ bottom_length,
+      TRUE               ~ owp_tw_inchan
+    ),
+    owp_tw_bf = dplyr::case_when(
+      owp_tw_bf <= 0     ~ bottom_length,
+      TRUE               ~ owp_tw_bf
+    )
+  ) %>% 
+  dplyr::select(-bottom_length)
+
+# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+# TODO: look at this stuff with Arash (04/09/2024)
+missing_cs <- 
+  cs_pts %>% 
+  dplyr::filter(
+          is.na(.data[[CROSSWALK_ID]]) | 
+          is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+          is.na(owp_tw_bf) | is.na(owp_y_bf) |
+          is.na(owp_dingman_r)
+        ) %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID)
+
+# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id"
+# readr::write_csv(
+#   dplyr::select(missing_cs, -tmp_id), 
+#   paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+# )
+
+# Split the cross sections into 2 groups:
+# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+# - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+inchannel_cs <-
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(valid_banks & has_relief) %>% 
+  # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+  # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+  # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+  dplyr::rename(        
+    TW        = owp_tw_inchan,    
+    DEPTH     = owp_y_inchan,
+    DINGMAN_R = owp_dingman_r
+  )
+
+bankful_cs   <- 
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(!valid_banks | !has_relief) %>% 
+  dplyr::rename(
+    TW        = owp_tw_bf, 
+    DEPTH     = owp_y_bf,
+    DINGMAN_R = owp_dingman_r
+  )
+
+
+# sanity check that all rows are accounted for after splitting up data
+split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs))
+# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+
+if (!split_kept_all_rows) {
+  warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+                 "\nsome points were not put in either group.", 
+                 "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+                 "values or contain values other than TRUE/FALSE")
+  )
+}
+
+message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+# tmp <- 
+#   inchannel_cs %>% 
+#   dplyr::slice(1:10000)
+system.time({
+  
+# Add bathymetry using "inchannel" estimates
+inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+  cross_section_pts = inchannel_cs,
+  # cross_section_pts = tmp,
+  crosswalk_id      = CROSSWALK_ID
+)
+
+})
+
+
+
+# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+# ml_subset %>%
+#   dplyr::filter(hy_id == "wb-1005207") %>%
+#   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+#   .$owp_y_inchan
+message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+system.time({
+  
+# Add bathymetry using "bankful" estimates
+bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+  cross_section_pts = bankful_cs,
+  # cross_section_pts = dplyr::slice(bankful_cs, 1:10000),
+  crosswalk_id      = CROSSWALK_ID
+)
+
+})
+
+# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+final_cs <- dplyr::bind_rows(
+                dplyr::select(
+                  inchannel_cs,
+                  # inchannel_cs2,
+                  # -hf_id, 
+                  -TW, -DEPTH, -DINGMAN_R, 
+                  # -is_dem_point,
+                  -dplyr::starts_with("owp")
+                ),
+                dplyr::select(
+                  bankful_cs,
+                  # bankful_cs2,
+                  # -hf_id, 
+                  -TW, -DEPTH, -DINGMAN_R, 
+                  # -is_dem_point,
+                  -dplyr::starts_with("owp")
+                ),
+                dplyr::select(
+                  dplyr::mutate(
+                    missing_cs,
+                    is_dem_point = FALSE
+                  ),
+                  # -hf_id, 
+                  # -is_dem_point,
+                  -dplyr::starts_with("owp"), 
+                  -tmp_id
+                )
+              ) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  tidyr::fill(
+    c(cs_lengthm, Z_source)
+  ) %>%
+  dplyr::ungroup() %>%
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet")
+# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet")
+
+
+message(round(Sys.time()), " - Reclassifying cross section point types...")
+
+# reclassify
+final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                            crosswalk_id = CROSSWALK_ID,
+                                            pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                                            )
+
+starting_uids <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID)
+ending_uids   <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID)
+
+has_same_number_of_uids          <- length(starting_uids) == length(ending_uids)
+all_starting_uids_in_ending_uids <- all(starting_uids %in% ending_uids)
+all_ending_uids_in_starting_uids <- all(ending_uids %in% starting_uids)
+
+# throw some warnings if:
+# - the number of uids in the input is different from the output
+# - there are missing hy_id/cs_id
+if (!has_same_number_of_uids) {
+  warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                 "starting cross section points from the final cross section points:",
+                 "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                 "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+  ))
+}
+
+if (!all_starting_uids_in_ending_uids) {
+  number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+  
+  # starting_uids %in% ending_uids
+  warning(
+    paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+           "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+    )
+  )
+  
+  # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+  #          "is missing from the output cross section points"))
+  
+}
+
+# tmp_path <-   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# message("saving file", tmp_path)
+# 
+# final_cs <- arrow::read_parquet(tmp_path)
+# 
+# # save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet)
+# arrow::write_parquet(
+#   dplyr::select(final_cs, 
+#                 -is_dem_point
+#   ), 
+#   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# )
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---------------------------------------------------------------------------------
+
+CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'")
+
+# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet)
+arrow::write_parquet(
+  dplyr::select(final_cs, 
+                -is_dem_point
+  ), 
+  CROSS_SECTIONS_OUTPUT_PATH
+)
+
+INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet")
+arrow::write_parquet(
+  final_cs,
+  INTERNAL_CROSS_SECTIONS_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Substitue diffusive domain DEMs Z values ---- 
+# ---------------------------------------------------------------------------------
+final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+
+bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) {
+                      r       <- terra::rast(path)
+                      extent  <- terra::ext(r)
+                      # r <- terra::project(r, "EPSG:5070")
+                      # terra::set.crs(r, "EPSG:5070")
+                      ext_df          <- data.frame(lapply(extent, function(i) {i}))
+                      ext_df$crs      <- terra::crs(r)
+                      ext_df$file     <- basename(path)
+                      ext_df$path     <- path 
+                      
+                      return(ext_df)
+              }) %>% 
+  dplyr::bind_rows()
+# final_cs
+
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections_is_dem_point.parquet")
+# arrow::write_parquet(
+#   final_cs,
+#   INTERNAL_CROSS_SECTIONS_PATH
+# )
+
+# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+# 
+# arrow::write_parquet(
+#   final_cs,
+#   START_PATH
+# )
+
+# rm(i, cs, df, pts_subset, has_pts_in_bb, bb, updated_depths)
+
+for (i in 1:nrow(bb_df)) {
+  
+  # i = 1
+  
+  message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...")
+  
+  df <- bb_df[i, ]
+  
+  START_EPSG_CODE <- 5070
+  
+  cs <- arrow::read_parquet(START_PATH)
+  cs <- 
+    cs %>% 
+    # dplyr::slice(1:10000) %>% 
+    sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE)
+  
+  # convert to bounding box CRS 
+  cs <- 
+    cs %>% 
+    sf::st_transform(df$crs[1])
+  
+  # create bounding box shape
+  bb <- sf::st_as_sf(
+          sf::st_as_sfc(
+            sf::st_bbox(
+                c(xmin =   df$xmin, 
+                  xmax =   df$xmax, 
+                  ymax =   df$ymax, 
+                  ymin =   df$ymin
+                  ), 
+                crs = sf::st_crs(df$crs)
+              )
+            )
+          )
+  
+  # get pts that are in the bounding box
+  pts_subset <- sf::st_filter(
+    cs, 
+    bb
+  )  
+  
+  has_pts_in_bb <- nrow(pts_subset) > 0
+  
+  message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box")
+  
+  if(!has_pts_in_bb) {
+    message("   > No points to update!")
+    next
+  }
+  
+  if(has_pts_in_bb) {
+    
+    message("   > Loading Raster")
+    
+    # load DEM
+    dem <- terra::rast(df$path)   
+    
+    message("   > Extracting new cross section depth values from DEM...")
+    
+    updated_depths <- 
+      pts_subset %>% 
+      dplyr::mutate(
+        Z2 = hydrofabric3D:::extract_pt_val(dem, .),
+        Z_source2 = df$file
+        # Z = extract_pt_val(terra::rast(dem), .)
+        ) %>% 
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id, 
+        # Z,
+        Z2,
+        # Z_source,
+        Z_source2
+        )
+    
+    message("   > Replacing old depth values with new depth values...")
+    cs <- 
+      cs %>% 
+      dplyr::left_join(
+        updated_depths,
+        by = c(CROSSWALK_ID, "cs_id", "pt_id")
+      ) %>% 
+      dplyr::mutate(
+        Z = dplyr::case_when(
+          !is.na(Z2)  ~ Z2,
+          TRUE        ~ Z
+        ),
+        Z_source = dplyr::case_when(
+          !is.na(Z_source2)  ~ Z_source2,
+          TRUE               ~ Z_source
+        )
+      ) %>% 
+      dplyr::select(-Z2, -Z_source2)
+    
+    message("   > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...")
+    cs <- 
+      cs %>% 
+      sf::st_transform(START_EPSG_CODE) 
+    
+    message("   > Dropping point geometries and preserving X / Y coordinates...")
+    
+    cs <- 
+      cs %>% 
+      dplyr::mutate(
+        X = sf::st_coordinates(.)[,1],
+        Y = sf::st_coordinates(.)[,2]
+      ) %>%
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        # hy_id, 
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id,
+        cs_lengthm,
+        relative_distance,
+        X, Y, 
+        Z,
+        class, point_type,
+        Z_source,
+        bottom, left_bank, right_bank, valid_banks,
+        has_relief 
+        
+        # newly added columns (03/06/2024)
+      )
+    
+    message("   > Overwritting original cross section points parquet with updated depth values ...")
+    message("      > '", START_PATH, "'")
+    
+    arrow::write_parquet(cs, START_PATH)
+    
+  }
+  
+  }
+
+cross_sections <- arrow::read_parquet(START_PATH)
+
+cross_sections <-
+  cross_sections %>% 
+  dplyr::select(
+          -point_type,
+          -class,
+          -bottom, -left_bank, -right_bank,
+          -has_relief, -valid_banks
+          )
+
+# reclassify
+# system.time({
+cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, 
+                                           crosswalk_id = CROSSWALK_ID,
+                                           pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+                                           )
+# })
+
+# final_cross_sections %>% 
+#   dplyr::filter(id == "wb-1000") %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(color = "point_type")
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---- Diffusive Domain DEM + FEMA + ML 
+# ---------------------------------------------------------------------------------
+
+CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+
+message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'")
+
+# sum(is.na(final_cross_sections$id))
+# sum(is.na(final_cross_sections$cs_id))
+# sum(is.na(final_cross_sections$pt_id))
+# sum(is.na(final_cross_sections$X))
+
+# save cross section points as a parquet to out_path (domain/outputs/cross_sections.parquet)
+arrow::write_parquet(
+  # dplyr::select(final_cs, 
+  #               -is_dem_point
+  # ), 
+  cross_sections,
+  CROSS_SECTIONS_ML_OUTPUT_PATH
+)
+
+# cross_sections %>% 
+#   dplyr::select(id, Z_source) %>% 
+#   dplyr::group_by(Z_source) %>% 
+#   dplyr::count(Z_source) %>% 
+#   dplyr::arrange(-n)
+
+# CROSS_SECTIONS_ML_OUTPUT_PATH
+# 
+# CROSS_SECTIONS_OUTPUT_PATH
+# 
+# CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+# 
+# CROSS_SECTIONS_PATH      <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross_sections.parquet")
+# DEM_CROSS_SECTIONS_PATH  <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross_sections.parquet")
+# 
+# cs <- arrow::read_parquet(CROSS_SECTIONS_PATH) %>% 
+#   dplyr::rename(hy_id = id)
+# 
+# dem_cs <- arrow::read_parquet(DEM_CROSS_SECTIONS_PATH) %>% 
+#   dplyr::rename(hy_id = id)
+# 
+# z_ids <- 
+#   cs %>% 
+#   dplyr::group_by(Z_source) %>% 
+#   dplyr::slice(1:4) %>% 
+#   dplyr::pull(hy_id)
+# 
+# # subset_ids <- z_ids[1:1]
+# # z_ids <- z_ids[1:2]
+# # for (i in seq_along(z_ids)) {
+# for (i in seq_along(z_ids)) {
+#   message(i) 
+#   # i = 1
+#   subset_id <- z_ids[i]
+#   
+#   new_cs <- 
+#     cs %>% 
+#     dplyr::filter(hy_id %in% subset_id)
+#   
+#   old_cs <- 
+#     dem_cs %>% 
+#     dplyr::filter(hy_id %in% subset_id)
+#   
+#   title_str <-  paste0(
+#                 unique(new_cs$Z_source)[1], " vs. ",
+#                 unique(old_cs$Z_source)[1]
+#               )
+#   
+#   save_path = paste0(
+#     "/Users/anguswatters/Desktop/z_source_plots/",
+#                   gsub(".tif", "", unique(new_cs$Z_source)[1]), 
+#                   "_vs_",
+#                   unique(old_cs$Z_source)[1], "_", i, ".png"
+#                   )
+#   
+#   message("making plot at \n > '", save_path, "'")
+#   
+#   comp_plot <- dplyr::bind_rows(
+#                   new_cs,
+#                   old_cs
+#                 ) %>% 
+#                   ggplot2::ggplot() + 
+#                   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                                    y = Z,
+#                                                    color = Z_source
+#                   ),
+#                   size = 3,
+#                   alpha = 0.9
+#                   ) + 
+#                   ggplot2::labs(title = title_str) + 
+#                   ggplot2::facet_wrap(hy_id~cs_id) +
+#                   ggplot2::theme_bw() +
+#                   ggplot2::theme(
+#                     plot.title = ggplot2::element_text(size = 14),
+#                     legend.text  = ggplot2::element_text(size = 14),
+#                     legend.title = ggplot2::element_text(size = 14, face = "bold")
+#                   )
+#   
+#   ggplot2::ggsave(plot = comp_plot,
+#                   filename = save_path,
+#                   scale = 1
+#                   )
+#   
+#   }
+# 
+# 
+# dplyr::bind_rows(
+#   cs %>% 
+#     dplyr::filter(hy_id %in% subset_ids),
+#   dem_cs %>% 
+#     dplyr::filter(hy_id %in% subset_ids) 
+#     
+# ) %>% 
+#   ggplot2::ggplot() + 
+#   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                    y = Z,
+#                                    color = Z_source
+#                                    
+#   )) + 
+#   ggplot2::labs(title = "al_nwfl_ncei_1 vs hydrofabric3D") + 
+#   ggplot2::facet_wrap(hy_id~cs_id)
+# 
+# cs %>% 
+#   dplyr::filter(hy_id %in% subset_ids) %>% 
+#   ggplot2::ggplot() + 
+#   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                    y = Z,
+#                                    color = point_type
+#                                    
+#   )) + 
+#   ggplot2::facet_wrap(hy_id~cs_id)
+# 
+# 
+# dem_cs %>% 
+#   dplyr::filter(hy_id %in% subset_ids) %>% 
+#   ggplot2::ggplot() + 
+#   ggplot2::geom_point(ggplot2::aes(x = pt_id, 
+#                                    y = Z,
+#                                    color = point_type
+#                                    
+#                                    )) + 
+#   ggplot2::facet_wrap(hy_id~cs_id)
+# 
+# 
+# 
+# 
+# 
+# 
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runners/cs_runner/domain_with_fema2.R b/runners/cs_runner/domain_with_fema2.R
new file mode 100644
index 0000000..3d574e0
--- /dev/null
+++ b/runners/cs_runner/domain_with_fema2.R
@@ -0,0 +1,2245 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+
+# Unique Flowline ID column name
+CROSSWALK_ID <- "id"
+
+# ---------------------------------------------------------------------
+# --- Read in flowlines
+# ---------------------------------------------------------------------
+
+# Subsetting area
+aoi     <- sf::read_sf(DOMAIN_WITH_FEMA_SUBSET_PATH, layer = "divides")
+aoi     <- rmapshaper::ms_simplify(aoi, keep = 0.05)
+
+id_col <- "id"
+# id_col <- "divide_id"
+
+# query the conus.gpkg for matching IDs
+ids        <- unique(aoi[[id_col]])
+query_ids  <- paste0(paste0("'", ids, "'"), collapse= ", ")
+
+gpkg_layers <- sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+layer       <- gpkg_layers$name[gpkg_layers$name == "flowpaths"]
+
+wkt <- 
+  aoi %>% 
+  rmapshaper::ms_dissolve() %>% 
+  # rmapshaper::ms_explode() %>% 
+  # sf::st_as_sfc() %>%
+  sf::st_sf() %>%
+  sf::st_geometry() %>%
+  sf::st_as_text()
+
+# read in flowlines based on IDs in AOI
+flines <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpaths",
+                      wkt_filter = wkt
+                      # query = sprintf("SELECT * FROM \"%s\" WHERE %s IN (%s)", layer, id_col, query_ids),
+                      # query = query
+                      )
+
+# flines <- 
+#   flines %>% 
+#   dplyr::slice(1:1000)
+
+# bad_ids <- c("wb-14538", "wb-14686",  "wb-14687")
+# flines <- 
+#   flines %>% 
+#   dplyr::filter(id %in% bad_ids)
+# ---------------------------------------------------------------------
+# --- Split flowlines by VPU
+# ---------------------------------------------------------------------
+
+# VPUs polygons
+VPU_boundaries   <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines))
+
+# add a VPU ID column to each flowline
+flines <- add_intersects_ids(x = flines, y = VPU_boundaries, id_col = "VPUID")
+
+# TODO: improve this, manual remap some VPUIDs so that there are 
+# TODO: less small subsets of flowlines because a small bit of a different VPU is intersected 
+flines <- 
+  flines %>% 
+  dplyr::mutate(
+    VPUID = dplyr::case_when(
+      VPUID == "12"     ~ "11, 12, 13",
+      VPUID == "12, 13" ~ "11, 12, 13",
+      VPUID == "11, 12" ~ "11, 12, 13",
+      VPUID == "01"     ~ "01, 02",
+      TRUE              ~ VPUID
+    )
+  ) 
+
+# rm(flines2)
+# unnest_ids(flines2$VPUID2)
+
+# set of unique VPUs
+
+VPU_IDS <- unnest_ids(flines$VPUID)
+# VPU_IDS <- unnest_ids(flines2$VPUID2)
+VPU_IDS
+
+# all possible FEMA dirs
+AOI_FEMA_DIRS <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", VPU_IDS) ]
+
+flines %>%
+  sf::st_drop_geometry() %>%
+  dplyr::group_by(VPUID) %>%
+  dplyr::count()
+
+flines %>%
+  sf::st_drop_geometry() %>%
+  dplyr::group_by(order) %>%
+  dplyr::count()
+# unique(flines$VPUID)
+
+# calculate bankfull width
+flines <- 
+  flines %>% 
+  hydrofabric3D::add_powerlaw_bankful_width("tot_drainage_areasqkm", 50) %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    VPUID,
+    # hy_id = id,
+    lengthkm,
+    tot_drainage_areasqkm,
+    bf_width,
+    order,
+    mainstem
+  ) %>% 
+  hydroloom::rename_geometry("geometry")
+
+
+sf::write_sf(
+  flines,
+  paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/flowlines_subset.gpkg")
+)
+
+transects <- hydrofabric3D::cut_cross_sections(
+  net               = flines,                        # flowlines network
+  crosswalk_id      = CROSSWALK_ID,                     # Unique feature ID
+  cs_widths         = flines$bf_width,
+  # cs_widths         = pmax(50, flowlines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+  # cs_widths         = pmax(50, flowlines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+  num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+  smooth            = TRUE,                          # smooth lines
+  densify           = 3,                             # densify linestring points
+  rm_self_intersect = TRUE,                          # remove self intersecting transects
+  fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+  #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+  # terminal_id       = NULL,
+  # braid_threshold   = NULL,
+  # version           = 2,
+  # braid_method      = "comid",
+  # precision         = 1,
+  add               = TRUE                           # whether to add back the original data
+)
+
+transects <- 
+  transects %>% 
+  dplyr::mutate(cs_source = CS_SOURCE) %>% 
+  dplyr::select(id,
+                cs_source,
+                cs_id,
+                cs_measure,
+                ds_distance,
+                cs_lengthm,
+                sinuosity,
+                geometry
+  )
+
+# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre_extension_transects.gpkg")
+
+sf::write_sf(
+  transects,
+  paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg")
+)
+
+# [1] "wb-2416492" "wb-2414869" "wb-2415765" "wb-2416657" "wb-2415479" "wb-2420307" "wb-2425952" "wb-2421269" "wb-2425750"
+# [10] "wb-2422028" "wb-2419212" "wb-2426092" "wb-2414971" "wb-2416120" "wb-2414960" "wb-2433259" "wb-14381"   "wb-18022"  
+# [19] "wb-15173"   "wb-15160"   "wb-571"     "wb-2413739" "wb-508015"  "wb-507770"  "wb-2424752" "wb-2424545" "wb-2422849"
+# [28] "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714" "wb-2419650" "wb-2419650" "wb-2416492" "wb-2414869" "wb-2415765"
+# [37] "wb-2416657" "wb-2415479" "wb-2420307" "wb-2419694" "wb-2419694" "wb-2419974" "wb-2419974" "wb-2420065" "wb-2420065"
+# [46] "wb-2425952" "wb-2420678" "wb-2420678" "wb-2421269" "wb-2419695" "wb-2419695" "wb-2425750" "wb-2422028" "wb-2419212"
+# [55] "wb-2426091" "wb-2414971" "wb-2407999" "wb-2407999" "wb-2426131" "wb-2426131" "wb-2416120" "wb-2433525" "wb-2433525"
+# [64] "wb-2433259" "wb-14381"   "wb-14576"   "wb-14576"   "wb-18022"   "wb-15173"   "wb-15160"   "wb-11097"   "wb-11097"  
+# [73] "wb-14804"   "wb-14804"   "wb-571"     "wb-2408123" "wb-2408123" "wb-2413739" "wb-508015"  "wb-507770"  "wb-2424752"
+# [82] "wb-2435791" "wb-2435791" "wb-2435798" "wb-2435798" "wb-2435779" "wb-2435779" "wb-2435781" "wb-2435781" "wb-2424545"
+# [91] "wb-2422849" "wb-2417939" "wb-2425090" "wb-2422212" "wb-2422714"
+
+# transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg"))
+
+# out_transects
+# out_transects2
+
+transects <- 
+  transects %>% 
+  dplyr::left_join(
+    flines %>% 
+      dplyr::select(id, VPUID, mainstem, tot_drainage_areasqkm) %>% 
+      sf::st_drop_geometry(),
+    by = "id"
+  )
+
+# sf::write_sf(
+#   transects,
+#   paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg")
+# )
+
+GROUP_VPU_IDS <- unnest_ids(transects$VPUID)
+
+# all FEMA dirs for the current area
+GROUP_FEMA_DIRS  <- FEMA_VPU_SUBFOLDERS[basename(FEMA_VPU_SUBFOLDERS) %in% paste0("VPU_", GROUP_VPU_IDS) ]
+GROUP_FEMA_FILES <- list.files(GROUP_FEMA_DIRS, full.names = T)[grepl("_output.gpkg", list.files(GROUP_FEMA_DIRS, full.names = T))]
+
+
+# read each FEMA geopackage into a list 
+fema <- lapply(GROUP_FEMA_FILES, function(gpkg) sf::read_sf(gpkg))
+# transects$VPUID %>% unique()
+
+fema <- 
+  fema %>% 
+  dplyr::bind_rows() %>% 
+  dplyr::mutate(
+    fema_id = 1:dplyr::n()
+  ) 
+
+message("Simplifying FEMA polygons...")
+message(" - Number of geoms BEFORE simplifying: ", nrow(fema))
+
+# TODO: this should be a function argument OR removed, shouldn't probably forcibly and silently simplify the input polygons without user knowing..
+# keep 1% of the original points for speed
+fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16)
+# fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+
+# sf::write_sf(
+#   fema,
+#   paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg")
+# )
+
+message(" - Number of geoms AFTER simplifying: ", nrow(fema))
+message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+
+# transects <- 
+#   transects  %>%
+#   dplyr::left_join(
+#     dplyr::select(sf::st_drop_geometry(flines),
+#                   dplyr::any_of(CROSSWALK_ID),
+#                   mainstem
+#     ),
+#     by = CROSSWALK_ID
+#   )
+
+# test_ids <- c("wb-2416492", "wb-2414869", "wb-2415765" , "wb-2416657" , "wb-2415479",
+#               "wb-2420307" , "wb-2425952" , "wb-2421269" , "wb-2425750",
+#               "wb-14804",   "wb-14804",   "wb-571",     "wb-2408123", "wb-2408123"
+#               )
+# test_trans <- 
+#   transects %>% 
+#   dplyr::filter(id %in% test_ids)
+# 
+# test_flines <- 
+#   flines %>% 
+#   dplyr::filter(id %in% test_ids)
+
+# test_fema
+# test_trans$VPUID
+
+# TODO: make sure this 3000m extension distance is appropriate across VPUs 
+# TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+# transects <- hydrofabric3D::extend_transects_to_polygons(
+ext_transects <- hydrofabric3D::extend_transects_to_polygons(
+  transect_lines         = transects,
+  polygons               = fema,
+  flowlines              = flines,
+  # transect_lines         = test_trans, 
+  # polygons               = fema, 
+  # flowlines              = test_flines,
+  crosswalk_id           = CROSSWALK_ID,
+  grouping_id            = "mainstem", 
+  max_extension_distance = 3000 
+)
+
+# sf::write_sf(
+#   flines,
+#   paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg")
+# )
+
+# flines <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_flines.gpkg"))
+# transects <- sf::read_sf(paste0("/Users/anguswatters/Desktop/tmp_trans.gpkg"))
+# fema <- sf::read_sf( paste0("/Users/anguswatters/Desktop/tmp_fema.gpkg"))
+# 
+# transect_lines <- transects
+# polygons <- fema
+# flowlines <- flines
+# bad_id   <- "wb-10813"
+# mainstem <- "1977479"
+# crosswalk_id           = "id"
+# grouping_id            = "mainstem"
+# max_extension_distance = 3000 
+
+
+# hydrofabric3D:::rm_self_intersections(ext_transects)
+# transects <- 
+#   transects %>% 
+ext_transects <-
+  ext_transects %>%
+  dplyr::mutate(cs_source = CS_SOURCE) %>% 
+  dplyr::select(id,
+                cs_source,
+                cs_id,
+                cs_measure,
+                ds_distance,
+                cs_lengthm,
+                sinuosity,
+                geometry
+                )
+
+# ext_transects <-
+#   ext_transects %>% 
+#   dplyr::select(id, cs_id, cs_lengthm, cs_measure, ds_distance, sinuosity, 
+#                 # tot_drainage_areasqkm, 
+#                 geometry)
+
+sf::write_sf(
+  # transects,
+  ext_transects,
+  paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg")
+)
+
+# get cross section point elevations
+cs_pts <- hydrofabric3D::cross_section_pts(
+  # cs             = transects,
+  cs             = ext_transects,
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL,
+  min_pts_per_cs = 10,
+  dem            = DEM_PATH
+)
+
+# mapview::mapview(cs_pts) + test_trans
+
+sf::write_sf(
+  cs_pts,
+  paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/raw-cs-pts.gpkg")
+)
+
+# DOMAIN_WITH_FEMA_CS_PTS_DIR
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points  ----
+# ----------------------------------------------------------------------------------------------------------------
+# cs_pts2 %>% 
+#   dplyr::slice(1:200) %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(x = "pt_id", color = "point_type")
+
+cs_pts <- 
+  # cs_pts2 <- 
+  cs_pts %>% 
+  hydrofabric3D::drop_incomplete_cs_pts(CROSSWALK_ID) %>% 
+  hydrofabric3D::classify_points(
+    crosswalk_id             = CROSSWALK_ID, 
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )  
+
+# cs_pts %>% 
+#   dplyr::slice(150:250) %>% 
+#   hydrofabric3D::plot_cs_pts("id", color = "point_type", size = 4)
+
+# })
+
+ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts, x = CROSSWALK_ID)$tmp_id  
+# ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+
+# sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+# sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 3: Try to rectify any no relief and invalid banks cross sections ----
+# ----------------------------------------------------------------------------------------------------------------
+
+system.time({
+fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  net            = flines,    # original flowline network
+  # net = test_flines,
+  # net            = flowlines,    # original flowline network
+  # transects      = transects, # original transect lines
+  transects      = ext_transects, # original transect lines
+  crosswalk_id   = CROSSWALK_ID,
+  points_per_cs  = NULL, 
+  min_pts_per_cs = 10, # number of points per cross sections
+  dem            = DEM_PATH, # DEM to extract points from
+  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  fix_ids = FALSE,
+  verbose = TRUE
+)
+})
+
+# sf::write_sf(
+#   test_flines,
+#   "/Users/anguswatters/Desktop/test_flines.gpkg"
+# )
+# 
+# sf::write_sf(
+#   test_trans,
+#   "/Users/anguswatters/Desktop/test_start_trans.gpkg"
+# )
+# 
+# sf::write_sf(
+#   ext_transects,
+#   "/Users/anguswatters/Desktop/test_ext_trans.gpkg"
+# )
+# 
+# sf::write_sf(
+#   cs_pts,
+#   "/Users/anguswatters/Desktop/test_cs_pts.gpkg"
+# )
+
+# mapview::mapview(fixed_pts) +
+#   mapview::mapview(ext_transects, color = "green") +
+#   mapview::mapview(out_transects, color = "red")
+
+fixed_pts$Z %>% is.null() %>% any()
+fixed_pts$Z %>% is.na() %>% any()
+
+ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id  
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+# ext_transects <- 
+#   ext_transects %>% 
+#   dplyr::mutate(cs_source = CS_SOURCE)
+
+# end_pts <- 
+#   fixed_pts %>% 
+#   dplyr::filter(id == "wb-2435034", cs_id == 1) %>% 
+#   # hydrofabric3D::plot_cs_pts("id")
+#   dplyr::slice(
+#     which.min(pt_id),
+#     which.max(pt_id)
+#   )
+# mapview::mapview(end_pts) + tt
+# tt <- 
+#   ext_transects %>% 
+#   dplyr::filter(id == "wb-2435034", cs_id == 1)
+# hydrofabric3D:::match_transects_to_extended_cs_pts()
+
+out_transects <- hydrofabric3D:::match_transects_to_extended_cs_pts(
+  # transect_lines = transects, 
+  transect_lines = ext_transects,
+  fixed_cs_pts   = fixed_pts, 
+  crosswalk_id   = CROSSWALK_ID
+  # extension_pct  = EXTENSION_PCT
+)
+
+# sf::write_sf(
+#   out_transects,
+#   paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+#   # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+# )
+
+# fixed_pts2      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+# out_transects2  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+
+fixed_pts <- 
+  fixed_pts %>% 
+# fixed_pts2 <- 
+  # fixed_pts %>% 
+  hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% 
+  dplyr::group_by(id) %>% 
+  dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+  dplyr::ungroup() %>% 
+  dplyr::group_by(id, cs_id) %>% 
+  dplyr::arrange(pt_id, .by_group = TRUE) %>% 
+  dplyr::ungroup()
+
+
+out_transects <- 
+  out_transects %>% 
+# out_transects2 <- 
+#   out_transects %>% 
+  hydrofabric3D:::renumber_cs_ids(crosswalk_id = CROSSWALK_ID) %>% 
+  dplyr::group_by(id) %>% 
+  dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+  dplyr::ungroup() %>% 
+  dplyr::select(
+    -left_bank_count,
+    -right_bank_count, 
+    -channel_count,
+    -bottom_count,
+    -bottom,
+    -left_bank,
+    -right_bank,
+    -valid_banks,
+    -has_relief,
+    -is_extended
+  ) 
+
+sf::write_sf(
+  out_transects,
+  # out_transects2,
+  paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+  # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+)
+
+# sf::write_sf(
+#   fixed_pts2,
+#   paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg")
+# )
+
+# transects %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+#   dplyr::ungroup()
+# 
+# out_transects2 %>% 
+#   # dplyr::filter(id %in% c("wb-2435034", "wb-2435045")) %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+#   dplyr::ungroup()
+
+# sf::write_sf(
+#   out_transects2,
+#   paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+#   # "/Users/anguswatters/Desktop/tmp_trans_improved.gpkg"
+# )
+# 
+# sf::write_sf(
+#   fixed_pts2,
+#   paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/fixed_cs_pts.gpkg")
+# )
+
+# trans_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id")
+# ext_trans_uids <- hydrofabric3D::get_unique_tmp_ids(ext_transects, "id")
+# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id")
+
+# all(trans_uids %in% ext_trans_uids)
+# all(ext_trans_uids %in% trans_uids)
+# all(trans_uids %in% fixed_pts_uids)
+# all(fixed_pts_uids %in% trans_uids)
+# all(ext_trans_uids %in% fixed_pts_uids)
+# all(fixed_pts_uids %in% ext_trans_uids)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+# out_transects  <- hydrofabric3D:::renumber_cs_ids(
+#                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+#                                                   crosswalk_id  = "hy_id"
+#                                                   ) %>% 
+#                                                   dplyr::select(-pt_id)
+# length(out_transects_uids)
+# length(fixed_pts_uids)
+# 
+# out_transects_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects, "id")
+# fixed_pts_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts, "id")
+# 
+# length(out_transects_uids) == length(fixed_pts_uids) & all(out_transects_uids %in% fixed_pts_uids) & all(fixed_pts_uids %in% out_transects_uids)
+# 
+# fixed_pts2      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = CROSSWALK_ID)
+# out_transects2  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = CROSSWALK_ID)
+# 
+# out_transects2_uids <- hydrofabric3D::get_unique_tmp_ids(out_transects2, "id")
+# fixed_pts2_uids <- hydrofabric3D::get_unique_tmp_ids(fixed_pts2, "id")
+# 
+# length(out_transects2_uids) == length(fixed_pts2_uids) & all(out_transects2_uids %in% fixed_pts2_uids) & all(fixed_pts2_uids %in% out_transects2_uids)
+
+  # paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/extended_transects.gpkg")
+  
+# sf::write_sf(
+#   out_transects2,
+#   # "/Users/anguswatters/Desktop/tmp_trans_improved2.gpkg"
+#   paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/final_transects.gpkg")
+# )
+
+# object.size(out_transects2)
+
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+# fixed_pts2 <-
+  # fixed_pts2 %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, 
+    pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  ) %>% 
+  dplyr::mutate(
+    Z_source = CS_SOURCE
+  ) %>%
+  dplyr::relocate(
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief
+  )
+
+# # add Z_source column for source of elevation data
+# fixed_pts2 <-
+#   fixed_pts2 %>%
+#   dplyr::mutate(
+#     Z_source = CS_SOURCE
+#   ) %>%
+#   dplyr::relocate(
+#     dplyr::any_of(CROSSWALK_ID),
+#     cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source
+#     # class, point_type, 
+#     # bottom, left_bank, right_bank, valid_banks, has_relief
+#     )
+
+ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Aligning banks and smoothing bottoms...")
+fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = CROSSWALK_ID)
+
+ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+message("Reclassifying cross section points...")
+
+fixed_pts <- hydrofabric3D::classify_points(
+  cs_pts                    = fixed_pts, 
+  crosswalk_id              = CROSSWALK_ID,
+  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts, x = CROSSWALK_ID)$tmp_id
+
+if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+} else {
+  message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+}
+
+if(all(ids_before_align %in% ids_after_align)) {
+  message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+} else {
+  message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+}
+
+if(all(ids_after_align %in% ids_after_reclassify)) {
+  message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+} else {
+  message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+}
+
+# sf::write_sf(
+#   out_transects,
+#   paste0(DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR, "/", VPU, "_transects.gpkg") 
+# )
+# 
+
+arrow::write_parquet(
+  fixed_pts,
+  paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")
+)
+# paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")
+
+
+# ---------------------------------------------------------------------------------
+# ---- Inject ML predicted top widths / Dingman's R ----
+# ---------------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------------
+# ---- Read in ML data ----
+# ---------------------------------------------------------------------------------
+
+VPU_ML_BATHYMETRY_PATHS <- list.files(DOMAIN_WITH_FEMA_ML_DIR, full.names = T)
+
+ML_CROSSWALK_ID <- "id"
+
+# ml_outputs <- lapply(VPU_ML_BATHYMETRY_PATHS, function(prq) {
+#   vpu_id     <- gsub(".*ml/([a-zA-Z0-9]+).*", "\\1", prq)
+#   arrow::read_parquet(prq) %>% 
+#     dplyr::mutate(vpu_id = vpu_id) 
+# }
+# ) %>% 
+#   dplyr::bind_rows() %>% 
+#   dplyr::select(
+#     dplyr::any_of(ML_CROSSWALK_ID),
+#     vpu_id,
+#     owp_y_bf, owp_y_inchan, 
+#     owp_tw_bf, owp_tw_inchan,
+#     owp_dingman_r
+#                 )
+# 
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+# names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+# ml_outputs <- 
+# ml_outputs %>%
+# dplyr::distinct(
+#   dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+#   vpu_id,
+#   owp_y_bf, owp_y_inchan,
+#   owp_tw_bf, owp_tw_inchan,
+#   owp_dingman_r
+# )
+
+# sf::st_layers(DOMAIN_WITH_FEMA_FLOWLINES_PATH)
+# rm(ml_outputs, ml)
+ml_outputs <- sf::read_sf(DOMAIN_WITH_FEMA_FLOWLINES_PATH, layer = "flowpath-attributes-ml")
+
+ml_outputs <- 
+  ml_outputs %>% 
+  dplyr::select(
+    dplyr::any_of(ML_CROSSWALK_ID),
+    vpuid,
+    owp_y_bf = YCC, 
+    owp_y_inchan = Y,
+    owp_tw_bf = TopWdthCC, 
+    owp_tw_inchan = TopWdth,
+    owp_dingman_r = dingman_r
+  )
+
+
+# # rename ML_CROSSWALK_ID (unique ID) to match the CROSSWALK_ID in CS PTS
+# # TODO: This assumes the IDs do correspond with eachother... (built from same flowlines network)
+names(ml_outputs)[names(ml_outputs) == ML_CROSSWALK_ID] = CROSSWALK_ID
+# 
+# # Keep only distinct ID rows
+ml_outputs <-
+  ml_outputs %>%
+  dplyr::distinct(
+    dplyr::across(dplyr::any_of(CROSSWALK_ID)),
+    vpu_id,
+    owp_y_bf, owp_y_inchan,
+    owp_tw_bf, owp_tw_inchan,
+    owp_dingman_r
+  )
+
+# ---------------------------------------------------------------------------------
+# ---- Read in CS PTS data ----
+# ---------------------------------------------------------------------------------
+
+# CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cs_pts.parquet")
+CS_PTS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")
+
+cs_pts  <- arrow::read_parquet(CS_PTS_OUTPUT_PATH)
+
+# ---------------------------------------------------------------------------------
+# ---- Join CS PTS  with ML data ---
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Joining ML width/depths estimates to cross section points...")
+
+# ml_outputs %>% 
+#   dplyr::group_by(id) %>% 
+#   dplyr::count(id) %>% 
+#   dplyr::arrange(-n)
+
+# join the ML outputs data to the cross section points
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    dplyr::select(ml_outputs, 
+                  dplyr::any_of(CROSSWALK_ID),
+                  owp_tw_inchan, 
+                  owp_y_inchan, 
+                  owp_tw_bf, 
+                  owp_y_bf, 
+                  owp_dingman_r
+    ),
+    by = CROSSWALK_ID
+  ) 
+
+# ---------------------------------------------------------------------------------
+# ---- Fixing negative depths/widths estimates ----
+# ---------------------------------------------------------------------------------
+message(round(Sys.time()), " - Replacing any negative width/depth estimates with cross section bottom lengths...")
+
+cs_bottom_lengths <- hydrofabric3D::get_cs_bottom_length(cross_section_pts = cs_pts, crosswalk_id = CROSSWALK_ID)
+
+# TODO: for now we replace any negative TW values with the length of the bottom of the cross section
+# TODO: This method + the negative model output values both need to be looked into (04/05/2024)
+cs_pts <-
+  cs_pts %>% 
+  dplyr::left_join(
+    cs_bottom_lengths, 
+    by = c(CROSSWALK_ID, "cs_id")
+    # by = c("hy_id", "cs_id")
+  ) %>% 
+  dplyr::mutate(
+    owp_tw_inchan = dplyr::case_when(
+      owp_tw_inchan <= 0 ~ bottom_length,
+      TRUE               ~ owp_tw_inchan
+    ),
+    owp_tw_bf = dplyr::case_when(
+      owp_tw_bf <= 0     ~ bottom_length,
+      TRUE               ~ owp_tw_bf
+    )
+  ) %>% 
+  dplyr::select(-bottom_length)
+
+# extract any cross sections that didn't get matched with a "hf_id" and (or?) no ML data
+# TODO: look at this stuff with Arash (04/09/2024)
+missing_cs <- 
+  cs_pts %>% 
+  dplyr::filter(
+    is.na(.data[[CROSSWALK_ID]]) | 
+      is.na(owp_tw_inchan) | is.na(owp_y_inchan) | 
+      is.na(owp_tw_bf) | is.na(owp_y_bf) |
+      is.na(owp_dingman_r)
+  ) %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID)
+
+# TODO: Delete this, but time being keeping this to inspect mismatch in between "hy_id" and "hf_id"
+# readr::write_csv(
+#   dplyr::select(missing_cs, -tmp_id), 
+#   paste0(META_PATH, "nextgen_", path_df$vpu[i], "_cross_sections_missing_hf_ids.csv")
+# )
+
+# Split the cross sections into 2 groups:
+# - "Inchannel cs" group are points with BOTH valid banks AND relief --> These get the INCHANNEL TW and Y values from the ML model
+# - "Bankful cs" group are points WITHOUT valid banks OR any relief  --> These get the BANKFUL TW and Y values from the ML model
+inchannel_cs <-
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>%  # NOTE: makes sure to remove any of the "missing" cross sections without data
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(valid_banks & has_relief) %>% 
+  # NOTE: temporarily rename the top widths, depths, and dingman's R columns so they 
+  # work nicely with the "hydrofabric3D::add_cs_bathymetry()" function which takes a dataframe of cross section points 
+  # with "TW", "DEPTH", and "DINGMAN_R" columns for each cross section
+  dplyr::rename(        
+    TW        = owp_tw_inchan,    
+    DEPTH     = owp_y_inchan,
+    DINGMAN_R = owp_dingman_r
+  )
+
+bankful_cs   <- 
+  cs_pts %>% 
+  hydrofabric3D::add_tmp_id(x = CROSSWALK_ID) %>% 
+  dplyr::filter(!tmp_id %in% unique(missing_cs$tmp_id)) %>% 
+  dplyr::select(-tmp_id) %>% 
+  dplyr::filter(!valid_banks | !has_relief) %>% 
+  dplyr::rename(
+    TW        = owp_tw_bf, 
+    DEPTH     = owp_y_bf,
+    DINGMAN_R = owp_dingman_r
+  )
+
+
+# sanity check that all rows are accounted for after splitting up data
+split_kept_all_rows <- nrow(cs_pts) == (nrow(bankful_cs) + nrow(inchannel_cs) + nrow(missing_cs))
+# split_kept_all_rows <- nrow(cs_pts) == nrow(bankful_cs) + nrow(inchannel_cs)
+
+if (!split_kept_all_rows) {
+  warning(paste0("When splitting cross section points into 'bankful' and 'inchannel' groups,", 
+                 "\nsome points were not put in either group.", 
+                 "\nLikely due to 'valid_banks' and/or 'has_relief' columns have either missing ", 
+                 "values or contain values other than TRUE/FALSE")
+  )
+}
+
+message(round(Sys.time()), " - Adding cross section bathymetry using inchannel widths/depths estimates...")
+# tmp <- 
+#   inchannel_cs %>% 
+#   dplyr::slice(1:10000)
+system.time({
+  
+  # Add bathymetry using "inchannel" estimates
+  inchannel_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = inchannel_cs,
+    # cross_section_pts = tmp,
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+})
+
+
+
+# arrow::write_parquet(inchannel_cs, "/Users/anguswatters/Desktop/test_ml_cs_pts_06.parquet")
+# ml_subset %>%
+#   dplyr::filter(hy_id == "wb-1005207") %>%
+#   dplyr::select(owp_y_inchan, owp_tw_inchan) %>% 
+#   .$owp_y_inchan
+message(round(Sys.time()), " - Adding cross section bathymetry using bankful widths/depths estimates...")
+
+system.time({
+  
+  # Add bathymetry using "bankful" estimates
+  bankful_cs <- hydrofabric3D::add_cs_bathymetry(
+    cross_section_pts = bankful_cs,
+    # cross_section_pts = dplyr::slice(bankful_cs, 1:10000),
+    crosswalk_id      = CROSSWALK_ID
+  )
+  
+})
+
+# combine the inchannel and bankful cross section points back together, fill out missing values and reclassify the points
+final_cs <- dplyr::bind_rows(
+              dplyr::select(
+                inchannel_cs,
+                # inchannel_cs2,
+                # -hf_id, 
+                -TW, -DEPTH, -DINGMAN_R, 
+                # -is_dem_point,
+                -dplyr::starts_with("owp")
+              ),
+              dplyr::select(
+                bankful_cs,
+                # bankful_cs2,
+                # -hf_id, 
+                -TW, -DEPTH, -DINGMAN_R, 
+                # -is_dem_point,
+                -dplyr::starts_with("owp")
+              ),
+              dplyr::select(
+                dplyr::mutate(
+                  missing_cs,
+                  is_dem_point = FALSE
+                ),
+                # -hf_id, 
+                # -is_dem_point,
+                -dplyr::starts_with("owp"), 
+                -tmp_id
+              )
+            ) %>%
+  dplyr::group_by(dplyr::across(dplyr::any_of(c(CROSSWALK_ID, "cs_id")))) %>% 
+  # dplyr::group_by(hy_id, cs_id) %>%
+  tidyr::fill(
+    c(cs_lengthm, Z_source)
+  ) %>%
+  dplyr::ungroup() %>%
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# arrow::write_parquet(final_cs, "/Users/anguswatters/Desktop/tmp.parquet")
+# final_cs <- arrow::read_parquet("/Users/anguswatters/Desktop/tmp.parquet")
+
+
+message(round(Sys.time()), " - Reclassifying cross section point types...")
+
+# reclassify
+final_cs <- hydrofabric3D::classify_points(cs_pts = final_cs, 
+                                           crosswalk_id = CROSSWALK_ID,
+                                           pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+starting_uids  <- hydrofabric3D::get_unique_tmp_ids(cs_pts, x = CROSSWALK_ID)
+ending_uids    <- hydrofabric3D::get_unique_tmp_ids(final_cs, x = CROSSWALK_ID)
+
+has_same_number_of_uids           <- length(starting_uids) == length(ending_uids)
+all_starting_uids_in_ending_uids  <- all(starting_uids %in% ending_uids)
+all_ending_uids_in_starting_uids  <- all(ending_uids %in% starting_uids)
+
+# throw some warnings if:
+# - the number of uids in the input is different from the output
+# - there are missing hy_id/cs_id
+if (!has_same_number_of_uids) {
+  warning(paste0("The number of unique hy_id/cs_id is different in the ",
+                 "starting cross section points from the final cross section points:",
+                 "\n > Starting number of unique hy_id/cs_id: ", length(starting_uids),
+                 "\n > Ending number of unique hy_id/cs_id: ", length(ending_uids)
+  ))
+}
+
+if (!all_starting_uids_in_ending_uids) {
+  number_uids_not_in_ending_uids <- length(starting_uids[!starting_uids %in% ending_uids])
+  
+  # starting_uids %in% ending_uids
+  warning(
+    paste0("Missing hy_id/cs_id in output that are in the starting input cross section points: ",
+           "\n > Number of hy_id/cs_id missing: ", number_uids_not_in_ending_uids
+    )
+  )
+  
+  # warning(paste0(number_uids_not_in_ending_uids, " hy_id/cs_id from the input cross section points ",
+  #          "is missing from the output cross section points"))
+  
+}
+
+# tmp_path <-   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# message("saving file", tmp_path)
+# 
+# final_cs <- arrow::read_parquet(tmp_path)
+# 
+# # save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet)
+# arrow::write_parquet(
+#   dplyr::select(final_cs,
+#                 -is_dem_point
+#   ),
+#   paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/final_cs2.parquet")
+# )
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---------------------------------------------------------------------------------
+
+# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet")
+CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+message(round(Sys.time()), " - Saving ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_OUTPUT_PATH, "'")
+
+# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet)
+arrow::write_parquet(
+  dplyr::select(final_cs, 
+                -is_dem_point
+  ), 
+  CROSS_SECTIONS_OUTPUT_PATH
+)
+
+INTERNAL_CROSS_SECTIONS_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections-is-dem-point.parquet")
+
+arrow::write_parquet(
+  final_cs,
+  INTERNAL_CROSS_SECTIONS_PATH
+)
+
+# ---------------------------------------------------------------------------------
+# ---- Substitue diffusive domain DEMs Z values ---- 
+# ---------------------------------------------------------------------------------
+
+final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+
+bb_df <- lapply(COASTAL_BATHY_DEM_PATHS, function(path) {
+  r       <- terra::rast(path)
+  extent  <- terra::ext(r)
+  # r <- terra::project(r, "EPSG:5070")
+  # terra::set.crs(r, "EPSG:5070")
+  ext_df          <- data.frame(lapply(extent, function(i) {i}))
+  ext_df$crs      <- terra::crs(r)
+  ext_df$file     <- basename(path)
+  ext_df$path     <- path 
+  
+  return(ext_df)
+}) %>% 
+  dplyr::bind_rows()
+
+# final_cs
+
+# INTERNAL_CROSS_SECTIONS_PATH <- paste0("/Users/anguswatters/Desktop/cross-sections-is-dem-point.parquet")
+# arrow::write_parquet(
+#   final_cs,
+#   INTERNAL_CROSS_SECTIONS_PATH
+# )
+
+# CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet")
+# final_cs <- arrow::read_parquet(CROSS_SECTIONS_OUTPUT_PATH)
+# START_PATH <- paste0("/Users/anguswatters/Desktop/cross_sections2.parquet")
+# 
+# arrow::write_parquet(
+#   final_cs,
+#   START_PATH
+# )
+
+# rm(i, count, cs, df, tmp, aoi, new_tmp, pts_subset, dem, has_pts_in_bb, bb, updated_depths)
+
+
+# CROSS_SECTIONS_OUTPUT_PATH
+# START_CROSS_SECTIONS_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+START_PATH    <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+UPDATED_PATH  <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/coastal-bathy_cross-sections.parquet")
+
+count <- 0
+
+for (i in 1:nrow(bb_df)) {
+  
+  # i = 1
+  
+  is_first_iter <- count == 0
+  count         <- count + 1
+  CURR_PATH     <- ifelse(is_first_iter, START_PATH, UPDATED_PATH)
+  
+  # CURR_PATH
+  
+  message(i, " - Checking DEM '", bb_df$file[i], "' for CS PTS...")
+  
+  df <- bb_df[i, ]
+  
+  START_EPSG_CODE <- 5070
+  
+  cs <- arrow::read_parquet(CURR_PATH)
+  
+  cs <- 
+    cs %>% 
+    # dplyr::slice(1:10000) %>% 
+    sf::st_as_sf(coords = c("X", "Y"), crs = START_EPSG_CODE)
+  
+  # convert to bounding box CRS 
+  cs <- 
+    cs %>% 
+    sf::st_transform(df$crs[1])
+  
+  # create bounding box shape
+  bb <- sf::st_as_sf(
+    sf::st_as_sfc(
+      sf::st_bbox(
+        c(xmin =   df$xmin, 
+          xmax =   df$xmax, 
+          ymax =   df$ymax, 
+          ymin =   df$ymin
+        ), 
+        crs = sf::st_crs(df$crs)
+      )
+    )
+  )
+  
+  # get pts that are in the bounding box
+  pts_subset <- sf::st_filter(
+    cs, 
+    bb
+  )  
+  
+  has_pts_in_bb <- nrow(pts_subset) > 0
+  
+  message(nrow(pts_subset), " cs points found within '", df$file, "' DEMs bounding box")
+  
+  if(!has_pts_in_bb) {
+    
+    message("   > No points to update!")
+    message("   > Overwritting original cross section points parquet with updated depth values ...")
+    message("      > '", UPDATED_PATH, "'")
+    
+    cs <- 
+      cs %>% 
+      sf::st_transform(START_EPSG_CODE) %>% 
+      dplyr::mutate(
+        X = sf::st_coordinates(.)[,1],
+        Y = sf::st_coordinates(.)[,2]
+      ) %>%
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        # hy_id, 
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id,
+        cs_lengthm,
+        relative_distance,
+        X, Y, 
+        Z,
+        class, point_type,
+        Z_source,
+        bottom, left_bank, right_bank, valid_banks,
+        has_relief 
+      )
+    
+    arrow::write_parquet(cs, UPDATED_PATH)
+    
+    next
+  }
+  
+  if(has_pts_in_bb) {
+    
+    message("   > Loading Raster")
+    
+    # load DEM
+    dem <- terra::rast(df$path)   
+    
+    message("   > Extracting new cross section depth values from DEM...")
+    
+    updated_depths <- 
+      pts_subset %>% 
+      dplyr::mutate(
+        Z2 = hydrofabric3D:::extract_pt_val(dem, .),
+        Z_source2 = df$file
+        # Z = extract_pt_val(terra::rast(dem), .)
+      ) %>% 
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id, 
+        # Z,
+        Z2,
+        # Z_source,
+        Z_source2
+      )
+    
+    message("   > Replacing old depth values with new depth values...")
+    cs <- 
+      cs %>% 
+      dplyr::left_join(
+        updated_depths,
+        by = c(CROSSWALK_ID, "cs_id", "pt_id")
+      ) %>% 
+      dplyr::mutate(
+        Z = dplyr::case_when(
+          !is.na(Z2)  ~ Z2,
+          TRUE        ~ Z
+        ),
+        Z_source = dplyr::case_when(
+          !is.na(Z_source2)  ~ Z_source2,
+          TRUE               ~ Z_source
+        )
+      ) %>% 
+      dplyr::select(-Z2, -Z_source2)
+    
+    message("   > Projecting CS PTs back to starting CRS (", START_EPSG_CODE, ") ...")
+    cs <- 
+      cs %>% 
+      sf::st_transform(START_EPSG_CODE) 
+    
+    message("   > Dropping point geometries and preserving X / Y coordinates...")
+    
+    cs <- 
+      cs %>% 
+      dplyr::mutate(
+        X = sf::st_coordinates(.)[,1],
+        Y = sf::st_coordinates(.)[,2]
+      ) %>%
+      sf::st_drop_geometry() %>% 
+      dplyr::select(
+        # hy_id, 
+        dplyr::any_of(CROSSWALK_ID),
+        cs_id, pt_id,
+        cs_lengthm,
+        relative_distance,
+        X, Y, 
+        Z,
+        class, point_type,
+        Z_source,
+        bottom, left_bank, right_bank, valid_banks,
+        has_relief 
+        
+        # newly added columns (03/06/2024)
+      )
+    
+    
+    # cs %>% 
+    #   dplyr::slice(1:27) %>% 
+    #   hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4)
+    # hydrofabric3D::classify_points(crosswalk_id = "id")
+    # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4)
+      
+    # cs <- 
+    #   cs %>% 
+    #   # dplyr::slice(1:27) %>% 
+    #   dplyr::select(-class, -point_type, 
+    #                 -valid_banks, -has_relief, 
+    #                 -bottom, -left_bank, -right_bank
+    #                 ) %>% 
+    #   hydrofabric3D::classify_points(crosswalk_id = "id") %>% 
+    #   dplyr::select(
+    #     # hy_id, 
+    #     dplyr::any_of(CROSSWALK_ID),
+    #     cs_id, pt_id,
+    #     Z,
+    #     relative_distance,
+    #     cs_lengthm,
+    #     class,
+    #     point_type,
+    #     X, Y, 
+    #     Z_source,
+    #     bottom, left_bank, right_bank, 
+    #     valid_banks, has_relief 
+    #     # newly added columns (03/06/2024)
+    #   )
+      # hydrofabric3D::plot_cs_pts(crosswalk_id = "id", color = "point_type", size = 4)
+      
+    # tmp <- final_cs %>% dplyr::filter(id == "wb-507785", cs_id == 4)
+    # new_tmp <- cs %>% dplyr::filter(id == "wb-507785", cs_id == 4)
+    
+    # hydrofabric3D::plot_cs_pts(tmp, crosswalk_id = "id", color = "point_type", size = 4)
+    # hydrofabric3D::plot_cs_pts(new_tmp, crosswalk_id = "id", color = "point_type", size = 4)
+    
+    message("   > Overwritting original cross section points parquet with updated depth values ...")
+    message("      > '", UPDATED_PATH, "'")
+    
+    arrow::write_parquet(cs, UPDATED_PATH)
+    
+  }
+  
+}
+
+cross_sections <- arrow::read_parquet(UPDATED_PATH)
+
+# cross_sections %>% 
+#   dplyr::select(Z_source) %>% 
+#   dplyr::group_by(Z_source) %>% 
+#   dplyr::count()
+
+cross_sections <-
+  cross_sections %>% 
+  dplyr::select(
+    -point_type,
+    -class,
+    -bottom, -left_bank, -right_bank,
+    -has_relief, -valid_banks
+  )
+
+# reclassify
+# system.time({
+cross_sections <- hydrofabric3D::classify_points(cs_pts = cross_sections, 
+                                                 crosswalk_id = CROSSWALK_ID,
+                                                 pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+cross_sections <- 
+  cross_sections %>% 
+  dplyr::select(
+    # hy_id, 
+    dplyr::any_of(CROSSWALK_ID),
+    cs_id, pt_id,
+    Z,
+    relative_distance,
+    cs_lengthm,
+    class,
+    point_type,
+    X, Y, 
+    Z_source,
+    bottom, left_bank, right_bank, 
+    valid_banks, has_relief 
+    # newly added columns (03/06/2024)
+    )
+# })
+
+# START_PATH    <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/cross-sections.parquet")
+# UPDATED_PATH  <-  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/coastal-bathy_cross-sections.parquet")
+
+arrow::write_parquet(
+  cross_sections, 
+  paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR,  "/coastal-bathy_cross-sections.parquet")
+  )
+
+# -------------------------------------------------
+# ---- Move final datasets to outputs/ ----
+# -------------------------------------------------
+
+select_cs_pts <- function(cs_pts, crosswalk_id = NULL) {
+  
+  if(is.null(crosswalk_id)) {
+    # crosswalk_id  <- 'hydrofabric_id'
+    stop("Please provide a valid 'crosswalk_id' which uniquely identifies each cross section in 'cs_pts'")
+  }
+  
+  cs_pts <- 
+    cs_pts %>% 
+    dplyr::select(
+    dplyr::any_of(c(
+                    crosswalk_id,
+                    "cs_id",
+                    "pt_id",
+                    "relative_distance",
+                    "cs_lengthm",
+                    "X", 
+                    "Y",
+                    "Z",
+                    "Z_source",
+                    "class", 
+                    "point_type",
+                    "valid_banks",
+                    "has_relief"
+                  )
+                  )
+    )
+  
+  return(cs_pts)
+}
+
+select_transects <- function(transects, crosswalk_id = NULL) {
+  
+  if(is.null(crosswalk_id)) {
+    # crosswalk_id  <- 'hydrofabric_id'
+    stop("Please provide a valid 'crosswalk_id' which uniquely identifies the flowline associated with each transect in 'transects'")
+  }
+  
+  transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  transects <- 
+    transects %>% 
+    dplyr::select(
+      dplyr::any_of(c(
+        crosswalk_id,
+        "cs_source",
+        "cs_id",
+        "cs_measure",
+        "cs_lengthm",
+        "geometry"
+      )
+      )
+    )
+  
+  return(transects)
+}
+
+
+# arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% 
+#   select_cs_pts("id")
+
+# DEM based cross section points parquet
+# - FEMA extended
+# - CS based extensions
+arrow::write_parquet(
+  arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")) %>% 
+    select_cs_pts("id"),
+  # arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CS_PTS_DIR, "/final-cs-pts.parquet")),
+  paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet")
+)
+
+# DEM based transect geometry gpkg 
+# - FEMA extended
+# - CS based extensions
+sf::write_sf(
+  sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")) %>% 
+    select_transects("id"),
+    # dplyr::rename(geometry = geom),
+    paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")
+)
+
+
+# DEM + ML cross section points parquet 
+# - FEMA extended
+# - CS based extensions
+# - ML based Z value updates
+arrow::write_parquet(
+  arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/cross-sections.parquet")) %>% 
+    select_cs_pts("id"),
+  paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/ml_cross-sections.parquet")
+)
+
+# DEM + COASTAL BATHY cross section points parquet 
+# - FEMA extended
+# - CS based extensions
+# - ML based Z value updates
+# - Coastal bathy DEM Z value updates (where applicable)
+arrow::write_parquet(
+  arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR, "/coastal-bathy_cross-sections.parquet")) %>% 
+    select_cs_pts("id"),
+  paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/coastal-bathy_cross-sections.parquet")
+)
+
+# -------------------------------------------------
+# ---- Data validation ----
+# -------------------------------------------------
+
+# paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg")
+# new_trans_path <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")
+
+# sf::st_layers(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg"))
+# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))
+
+# og_transects  <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg"))
+# new_transects <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))
+
+# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR
+
+output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T)
+# rm(transects, dropped_trans, cs_pts, flowlines, lengths_check, transect_lengths, rel_dist_check, wrong_lengths, mismatches, duplicate_ids, cs_pt_lengths, cs_pts_ids)
+
+tmp_ids_list <- lapply(output_files, function(i) {
+    is_gpkg    <- endsWith(i, ".gpkg")
+    is_parquet <- endsWith(i, ".parquet")
+    
+    if(is_gpkg) {
+      x <- 
+        i %>% 
+        sf::read_sf() %>% 
+        hydrofabric3D::get_unique_tmp_ids(x = "id")
+      return(x)
+    }
+    
+    if(is_parquet) {
+      x <- 
+        i %>% 
+        arrow::read_parquet() %>% 
+        hydrofabric3D::get_unique_tmp_ids(x = "id")
+      return(x)
+    }
+  return(NULL)
+    
+    
+  })
+
+
+tmp_ids_list
+
+# tmp_ids_list[[1]] %in% tmp_ids_list[[2]]
+
+res <- list()
+idxs <- seq_along(tmp_ids_list)
+
+for (i in seq_along(tmp_ids_list)) {
+  # idxs <- seq_along(tmp_ids_list)
+  # i = 1
+  curr <- tmp_ids_list[[i]]
+  
+  other_idxs <- idxs[idxs != i]
+  
+  in_all_others <- lapply(tmp_ids_list[other_idxs], function(k) { 
+      all(k %in% curr) & all(curr %in% k) 
+    }) %>% 
+    unlist() %>% 
+    all()
+  res[[i]] <- in_all_others
+}
+
+all_ids_are_matching <- 
+  res %>% 
+  unlist() %>% 
+  all()
+
+# validate_transects(transects, "id")
+if (!all_ids_are_matching) {
+  warning("Not all id/cs_id are included in all transects / cross section point datasets")
+} else {
+ 
+  message("All id / cs_id are correctly within all transects / cross section points datasets") 
+}
+
+trans_path   <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg")
+transects    <- sf::read_sf(trans_path) %>% 
+                dplyr::rename(geometry = geom)
+
+is_validated_transects           <- validate_transects(transects, "id")
+
+is_flowline_validated_transects  <- validate_transects_against_flowlines(transects, flines, "id")
+
+if (!(is_validated_transects && is_flowline_validated_transects)) {
+  stop(trans_path, "  transects failed validity check:",
+       "\n > is_validated_transects: ", is_validated_transects,
+       "\n > is_flowline_validated_transects: ", is_flowline_validated_transects
+  )
+} else {
+  message("Transects look good to go!")
+}
+
+output_files <- list.files(DOMAIN_WITH_FEMA_OUTPUT_DIR, full.names = T)
+cs_pts_files <- output_files[grepl(".parquet", output_files)]
+
+for (i in seq_along(cs_pts_files)) {
+  path <- cs_pts_files[i]
+  message(i, " - Checking cross section points validity for > '",   basename(path), "'")
+  
+  cs_pts <-arrow::read_parquet(path)
+  
+  is_validated_cs_pts              <- validate_cs_pts(cs_pts, crosswalk_id = "id")
+  is_transect_validated_cs_pts     <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id")
+  
+  is_valid <- is_validated_cs_pts & is_transect_validated_cs_pts
+  
+  if(!is_valid) {
+    stop(path, " cs_pts failed validity check:",
+         "\n > is_validated_cs_pts: ", is_validated_cs_pts,
+         "\n > is_transect_validated_cs_pts: ", is_transect_validated_cs_pts
+         )
+  } else {
+    message("'",   basename(path), "' is valid and is good to go!")
+  }
+  
+  # message("'",   basename(path), "' is validated")
+
+  }
+
+# og_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/pre-extension-transects.gpkg"))
+# post_trans <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/post-extension-transects.gpkg"))
+# dplyr::filter(transects, id == "wb-10813") %>% 
+#   sf::st_buffer(2500) 
+# 
+# bb <- 
+#   dplyr::filter(transects, id == "wb-10813") %>% 
+#   sf::st_buffer(5000) %>% 
+#   sf::st_bbox() %>% 
+#   sf::st_as_sfc() %>% 
+#   sf::st_sf()
+# 
+#   sf::st_filter(transects, bb) 
+#   sf::st_filter(transects, bb) 
+#   sf::st_filter(transects, bb) 
+# 
+#   mapview::mapview(dplyr::filter(flines, id == "wb-10813")) +
+#   mapview::mapview(    sf::st_filter(transects, bb) , color  = "green") + 
+#   mapview::mapview(  sf::st_filter(og_trans, bb) , color = "red") + 
+#     mapview::mapview(  sf::st_filter(post_trans, bb) , color = "gold")
+# 
+# transects %>% 
+#   sf::st_filter(
+#     bb
+#   ) %>% mapview::mapview()
+# 
+# mapview::mapview() + mapview::mapview(dplyr::filter(flines, id == "wb-10813")) +
+#   mapview::mapview(  dplyr::filter(transects, id == "wb-10813"), color  = "green") + 
+#   mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red")
+# 
+# 
+# dplyr::filter(og_trans, id == "wb-10813")
+# dplyr::filter(post_trans, id == "wb-10813")
+# dplyr::filter(transects, id == "wb-10813")
+# 
+# mapview::mapview(dplyr::filter(flines, id == "wb-10813")) +
+# mapview::mapview(  dplyr::filter(transects, id == "wb-10813"), color  = "green") + 
+# mapview::mapview(dplyr::filter(og_trans, id == "wb-10813"), color = "red")
+# 
+# transects %>% 
+#   dplyr::group_by(id)  %>% 
+#   dplyr::filter(cs_id == max(cs_id)) %>% 
+#   dplyr::arrange(cs_id)
+# 
+# # cs_pts_files <- 
+# cs_pts <-arrow::read_parquet(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/dem_cross-sections.parquet"))
+# 
+# is_validated_transects           <- validate_transects(transects, "id")
+# is_flowline_validated_transects  <- validate_transects_against_flowlines(transects, flines, "id")
+# 
+# is_validated_cs_pts              <- validate_cs_pts(cs_pts, crosswalk_id = "id")
+# is_transect_validated_cs_pts     <- validate_cs_pts_against_transects(cs_pts, transects, crosswalk_id = "id")
+
+# sf::st_layers(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))_
+
+validate_transects_self_intersections <- function(transects) {
+  return(
+    nrow(transects) == nrow(hydrofabric3D:::rm_self_intersections(transects))
+  )
+}
+
+validate_transects_cs_id_enumeration <- function(transects, crosswalk_id = NULL) {
+  
+  # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered
+  mismatches <-
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+    dplyr::arrange(cs_measure, .by_group = TRUE) %>% 
+    dplyr::mutate(
+      new_cs_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(cs_id != new_cs_id)
+  
+  # FALSE if there are any transects with different cs_ids to the newly created cs_id 
+  # Otherwise TRUE
+  has_valid_cs_ids <- !(nrow(mismatches) > 0)
+  
+  return(
+    has_valid_cs_ids
+  )
+  
+}
+
+validate_transects_cs_length <- function(transects, crosswalk_id = NULL) {
+  
+  # re calculate transect geometry length and compare to cs_lengthm column
+  wrong_lengths <-
+    transects %>%
+    dplyr::mutate(
+      new_cs_length = as.numeric(sf::st_length(.)) 
+    ) %>% 
+    dplyr::filter(cs_lengthm != new_cs_length)
+  
+  # FALSE if there are any transects with different cs_lengthm than the freshly calculated new_cs_length
+  has_correct_lengths <- !(nrow(wrong_lengths) > 0)
+  
+  return(
+    has_correct_lengths
+  )
+  
+}
+
+validate_transects_unique_ids <- function(transects, crosswalk_id = NULL) {
+    
+  duplicate_ids <- 
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% 
+    dplyr::select(tmp_id) %>% 
+    dplyr::group_by(tmp_id) %>% 
+    dplyr::count() %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(n > 1)
+    
+  # FALSE if there are ANY rows in the duplicate_ids dataframe above 
+  # (i.e. a count of greater than 1 for any tmp_id (<crosswalk_id>_cs_id))
+  has_unique_ids <- !(nrow(duplicate_ids) > 0)
+  
+  return(
+    has_unique_ids
+  )
+  
+}
+
+validate_transects_cs_measure <- function(transects) {
+  
+  min_cs_measure <-
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::pull(cs_measure) %>% 
+    min()
+  
+  max_cs_measure <-
+    transects %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::pull(cs_measure) %>% 
+    max()
+  
+  # cs_measure should always be:
+  # greater than or equal to 0 AND 
+  # less than or equal to 100 as its a percentage along a flowline
+  has_valid_cs_measure <- min_cs_measure >= 0 & max_cs_measure <= 100
+  
+  return(
+    has_valid_cs_measure
+  )
+  
+}
+
+validate_transects_has_complete_geometries <- function(transects, crosswalk_id = NULL) {
+  
+  has_empty_geoms <- 
+    transects %>% 
+    sf::st_is_empty() %>% 
+    any()
+  
+  return(
+    !has_empty_geoms
+  )
+  
+}
+
+validate_transects_has_crs <- function(transects) {
+  
+  missing_crs <- 
+    transects %>% 
+    sf::st_crs() %>% 
+    is.na()
+    
+  return(
+    !missing_crs
+  )
+  
+}
+
+validate_transects <- function(transects, 
+                               crosswalk_id = NULL
+                               ) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry")
+  
+  # validate dataframe has all correct columns  
+  has_all_valid_cols         <- hydrofabric3D:::validate_df(
+                                          x = transects, 
+                                          cols = REQUIRED_COLS,
+                                          obj_name = "transects"
+                                          )  
+  
+  # Validate every flowline (id) has a cs_id of 1:number of transects
+  has_valid_cs_ids           <- validate_transects_cs_id_enumeration(transects, crosswalk_id = crosswalk_id)
+  
+  # Validate there are no self intersections
+  has_no_self_intersections  <- validate_transects_self_intersections(transects)
+  
+  # validate the cs_lengthm column equals the actual transect geometry length
+  has_correct_lengths        <- validate_transects_cs_length(transects, crosswalk_id = crosswalk_id)
+  
+  # validate no duplicate id / cs_id combos
+  has_unique_ids             <- validate_transects_unique_ids(transects, crosswalk_id = crosswalk_id)
+  
+  # validate cs measure is never greater than 100 (i think)
+  has_valid_cs_measure       <- validate_transects_cs_measure(transects)
+  
+  # make sure transects have no empty geometries 
+  has_complete_geoemetries   <- validate_transects_has_complete_geometries(transects)
+  
+  # make sure transects have a CRS
+  has_crs                    <- validate_transects_has_crs(transects)
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_validated_transects <- all(
+                              c(
+                                has_all_valid_cols,
+                                has_valid_cs_ids,
+                                has_no_self_intersections,
+                                has_correct_lengths,
+                                has_unique_ids,
+                                has_valid_cs_measure,
+                                has_complete_geoemetries,
+                                has_crs
+                              )
+                            )
+  
+  return(is_validated_transects)
+  
+}
+
+# all ids in transects are in flowlines
+validate_transects_ids_in_flowlines <- function(transects, flowlines, crosswalk_id = NULL) {
+  
+  # flowlines <- flines
+  
+  transect_ids <- 
+    transects %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id)) %>% 
+    dplyr::pull(dplyr::any_of(crosswalk_id)) %>% 
+    unique()
+  
+  flowline_ids <-
+    flowlines %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id)) %>% 
+    dplyr::pull(dplyr::any_of(crosswalk_id)) %>% 
+    unique()
+  
+  all_transect_ids_in_flowline_ids <- all(transect_ids %in% flowline_ids)
+  
+  return(
+    all_transect_ids_in_flowline_ids
+  )
+  
+}
+
+# make sure no transect crosses more than a single flowline, a single time
+validate_transects_flowline_intersections <- function(transects, flowlines) {
+  
+  # flowlines <- flines
+  return (
+    nrow(transects) == nrow(hydrofabric3D:::rm_multiflowline_intersections(transects, flowlines)) 
+    )
+}
+
+# validate 2 SF objects have the same CRS
+validate_same_crs <- function(x, y) {
+  
+  return (
+    sf::st_crs(x) == sf::st_crs(y)
+  )
+  
+}
+
+validate_transects_against_flowlines <- function(transects, 
+                                                 flowlines,  
+                                                 crosswalk_id = NULL
+                                                ) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry")
+  
+  # validate dataframe has all correct columns  
+  has_all_valid_cols         <- hydrofabric3D:::validate_df(
+                                                x = transects, 
+                                                cols = REQUIRED_COLS,
+                                                obj_name = "transects"
+                                              )  
+  
+  # all ids in transects are in flowlines
+  all_transect_ids_in_flowline_ids  <- validate_transects_ids_in_flowlines(transects, flowlines, crosswalk_id = crosswalk_id)
+  
+  # transects only intersects a single flowline, a single time
+  has_valid_flowline_intersects     <- validate_transects_flowline_intersections(transects, flowlines)
+  
+  # transects and flowlines have the same CRS
+  has_same_crs                      <- validate_same_crs(transects, flowlines)
+  
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_flowline_validated_transects <- all(
+                                        c(
+                                          has_all_valid_cols,
+                                          all_transect_ids_in_flowline_ids,
+                                          has_valid_flowline_intersects,
+                                          has_same_crs
+                                        )
+                                      )
+  
+  return(is_flowline_validated_transects)
+  
+}
+
+# validate_transects_against_cs_pts <- function(
+#                                           transects, 
+#                                           cs_pts,  
+#                                           crosswalk_id = NULL
+#                                           ) {
+#   
+#   # # standardize geometry name
+#   # transects <- hydroloom::rename_geometry(transects, "geometry")
+#   
+#   REQUIRED_COLS <- c(crosswalk_id, "cs_id", "cs_source", "cs_measure", "cs_lengthm", "geometry")
+#   
+#   # validate dataframe has all correct columns  
+#   has_all_valid_cols         <- hydrofabric3D:::validate_df(
+#     x = transects, 
+#     cols = REQUIRED_COLS,
+#     obj_name = "transects"
+#   )  
+# }
+
+validate_cs_pts_cs_id_enumeration <- function(cs_pts, crosswalk_id = NULL) {
+  
+  # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered
+  mismatches <-
+    cs_pts %>%
+    # dplyr::slice(1:150) %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+    dplyr::arrange(cs_id, .by_group = TRUE) %>% 
+    dplyr::mutate(
+      new_cs_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(cs_id != new_cs_id)
+  
+  # FALSE if there are any transects with different cs_ids to the newly created cs_id 
+  # Otherwise TRUE
+  has_valid_cs_ids <- !(nrow(mismatches) > 0)
+  
+  return(
+    has_valid_cs_ids
+  )
+  
+}
+
+validate_cs_pts_pt_id_enumeration <- function(cs_pts, crosswalk_id = NULL) {
+  
+  # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section
+  mismatches <-
+    cs_pts %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::arrange(pt_id, .by_group = TRUE) %>% 
+    dplyr::mutate(
+      new_pt_id = 1:dplyr::n()
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::filter(pt_id != new_pt_id)
+  
+  # FALSE if there are any cs_pts with  pt_ids different from the newly created new_pt_id 
+  # Otherwise TRUE
+  has_valid_pt_ids <- !(nrow(mismatches) > 0)
+  
+  return(
+    has_valid_pt_ids
+  )
+  
+}
+
+validate_cs_pts_relative_distance <- function(cs_pts, crosswalk_id = NULL) {
+  
+  # make sure relative distance is greater than or equal to 0
+  min_relative_distance <-
+    cs_pts %>%
+    # dplyr::slice(1:50) %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::pull(relative_distance) %>% 
+    min()
+  
+  
+  # reenumerate the pt_ids to make sure the pt_ids are valid values of 1:number of points in cross section
+  rel_dist_check <-
+    cs_pts %>%
+    # dplyr::slice(1:50) %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance, cs_lengthm) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::summarise(
+      cs_lengthm = max(cs_lengthm),
+      # min_rel_dist = min(relative_distance),
+      max_rel_dist = max(relative_distance)
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::mutate(
+        # TODO: as long as the lengths are within 1 meter, thats equal
+        is_valid_relative_dist = abs(cs_lengthm - max_rel_dist) <= 1
+        # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01)
+      ) 
+    
+    # dplyr::filter(max_rel_dist > cs_lengthm)
+  
+  # relative distance is always greater than or equal to 0 and less than the cross sections length
+  has_valid_relative_dist_min        <- min_relative_distance >= 0
+  has_valid_relative_dist_maximums   <- all(rel_dist_check$is_valid_relative_dist)
+  
+  return(
+    has_valid_relative_dist_min && has_valid_relative_dist_maximums
+  )
+  
+}
+
+validate_cs_pts_point_types <- function(cs_pts) {
+  
+  # make sure only "left_bank", "right_bank", "channel", and "bottom" values exist in cs_pts point_type column
+  valid_point_types  <- c("left_bank", "right_bank", "channel", "bottom")
+  
+  # unique point types in cs_pts
+  unique_point_types <- unique(cs_pts$point_type)
+  
+  has_only_valid_point_types <- all(unique_point_types %in% valid_point_types)
+  
+  return(
+    has_only_valid_point_types
+  )
+  
+}
+
+validate_cs_pts <- function(
+    cs_pts,  
+    crosswalk_id = NULL
+) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", 
+                     "relative_distance", "cs_lengthm", "X", "Y", "Z", "Z_source",
+                     "class", "point_type", "valid_banks", "has_relief"
+  )
+  
+  # validate dataframe has all correct columns  
+  has_all_valid_cols         <- hydrofabric3D:::validate_df(
+    x = cs_pts, 
+    cols = REQUIRED_COLS,
+    obj_name = "cs_pts"
+  )  
+  
+  # make sure valid cs_ids
+  has_valid_cs_pts_cs_ids       <- validate_cs_pts_cs_id_enumeration(cs_pts, crosswalk_id = crosswalk_id)
+  
+  # make sure valid pt_ids
+  has_valid_cs_pts_pt_ids       <- validate_cs_pts_pt_id_enumeration(cs_pts, crosswalk_id = crosswalk_id)
+  
+  # check cs_pts have only valid relative_distance values
+  has_valid_relative_distances  <- validate_cs_pts_relative_distance(cs_pts, crosswalk_id = crosswalk_id)
+  
+  has_valid_point_types         <- validate_cs_pts_point_types(cs_pts)
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_validated_cs_pts <- all(
+    c(
+      has_all_valid_cols,
+      has_valid_cs_pts_cs_ids,
+      has_valid_cs_pts_pt_ids,
+      has_valid_relative_distances,
+      has_valid_point_types
+    )
+  )
+  
+  return(is_validated_cs_pts)
+  
+}
+
+# validate all cs_pts id/cs_ids are in the transects
+validate_cs_pt_ids_in_transects <- function(cs_pts, transects, crosswalk_id = NULL) {
+  
+  # flowlines <- flines
+  cs_pts_ids <-
+    cs_pts %>% 
+    sf::st_drop_geometry() %>% 
+    hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id)
+  
+  transect_ids <-
+    transects %>% 
+    sf::st_drop_geometry() %>% 
+    hydrofabric3D::get_unique_tmp_ids(x = crosswalk_id)
+  
+  
+  all_cs_pts_ids_in_transects <- all(cs_pts_ids %in% transect_ids)
+  all_transect_ids_in_cs_pts  <- all(transect_ids %in% cs_pts_ids)
+  same_number_of_ids          <- length(cs_pts_ids) == length(transect_ids)  
+  
+  is_valid_cs_pts_ids <- all(
+    c(
+      all_cs_pts_ids_in_transects,
+      all_transect_ids_in_cs_pts,
+      same_number_of_ids
+    )
+  )
+  
+  return(
+    is_valid_cs_pts_ids
+  )
+  
+}
+
+validate_cs_pts_length_against_transects <- function(cs_pts, transects, crosswalk_id = NULL) {
+  
+  cs_pt_lengths <-
+    cs_pts %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::rename(
+      cs_pts_lengthm = cs_lengthm
+    )
+  
+  transect_lengths <-
+    transects %>% 
+    sf::st_drop_geometry() %>% 
+    dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% 
+    dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% 
+    dplyr::slice(1) %>% 
+    dplyr::ungroup()
+  
+  lengths_check <- 
+    dplyr::left_join(
+      transect_lengths,
+      cs_pt_lengths,
+      by = c(crosswalk_id, "cs_id")
+    ) %>% 
+    dplyr::mutate(
+      # TODO: as long as the lengths are within 1 meter, thats equal
+      approx_equal_lengths = abs(cs_lengthm - cs_pts_lengthm) <= 1
+      # approx_equal_lengths = all.equal(cs_lengthm, cs_pts_lengthm, tolerance = 0.01)
+    ) 
+  
+  all_lengths_are_equal <- all(lengths_check$approx_equal_lengths)
+  
+  return(
+    all_lengths_are_equal
+  )
+  
+}
+
+validate_cs_pts_against_transects <- function(
+    cs_pts,  
+    transects, 
+    crosswalk_id = NULL
+) {
+  
+  # # standardize geometry name
+  # transects <- hydroloom::rename_geometry(transects, "geometry")
+  
+  # make sure all id/cs_id combos are in both transects and cs_pts
+  has_valid_cs_pts_ids  <- validate_cs_pt_ids_in_transects(cs_pts, transects, crosswalk_id = crosswalk_id)
+  
+  # make sure cs_lengthm matches from transects to cs_pts
+  has_matching_lengths  <- validate_cs_pts_length_against_transects(cs_pts, transects, crosswalk_id = crosswalk_id)
+  
+  # if everything is TRUE, return true, otherwise return FALSE (or throw an error...?)
+  is_transect_validated_cs_pts <- all(
+    c(
+      has_valid_cs_pts_ids,
+      has_matching_lengths
+    )
+  )
+  
+  return(is_transect_validated_cs_pts)
+  
+}
+
+# og_transects   <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_TRANSECTS_DIR, "/cs-point-extension-transects.gpkg"))
+# new_transects  <- sf::read_sf(paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/transects.gpkg"))
+
+# new_transects %>% 
+#   dplyr::arrange(cs_lengthm) %>% 
+#   dplyr::slice(1:1000) %>% 
+#   mapview::mapview()
+# 
+# hydrofabric3D:::rm_self_intersections(new_transects)
+
+# --------------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------------------
+# 
+og_trans <- sf::read_sf( "/Users/anguswatters/Desktop/lynker-spatial/domain_with_fema/transects/pre_extension_transects.gpkg")
+
+transects <-  sf::read_sf(output_files[[4]])
+
+transects2 <- hydrofabric3D:::rm_self_intersections(transects)
+
+old_ids <- transects %>% hydrofabric3D::get_unique_tmp_ids("id")
+new_ids <- transects2 %>% hydrofabric3D::get_unique_tmp_ids("id")
+diff_tmp_ids <- old_ids[!old_ids %in% new_ids]
+strsplit(diff_ids, "_")
+
+diff_ids <- lapply(diff_tmp_ids, function(i) {
+  strsplit(i, "_")[[1]][1]
+}) %>% unlist()
+
+diff_trans <- 
+  transects %>% 
+  # hydrofabric3D::add_tmp_id(x = "id") %>% 
+  dplyr::filter(id %in% diff_ids)
+  # sf::st_buffer(50) %>% 
+  # sf::st_bbox() %>% 
+  # sf::st_as_sfc() %>% 
+  # sf::st_sf()
+
+og_diff_trans <- 
+  og_trans %>% 
+  # hydrofabric3D::add_tmp_id(x = "id") %>% 
+  dplyr::filter(id %in% diff_ids)
+
+mapview::mapview(diff_trans, color = "green") +
+  mapview::mapview(og_diff_trans, color = "red")
+
+old_trans <- transects[lengths(sf::st_intersects(transects, diff_bb)) > 0, ]
+
+# final_cross_sections %>% 
+#   dplyr::filter(id == "wb-1000") %>% 
+#   dplyr::rename(hy_id = id) %>% 
+#   hydrofabric3D::plot_cs_pts(color = "point_type")
+
+# ---------------------------------------------------------------------------------
+# ---- Write final cross section points data ----
+# ---- Diffusive Domain DEM + FEMA + ML 
+# ---------------------------------------------------------------------------------
+
+CROSS_SECTIONS_ML_OUTPUT_PATH <- paste0(DOMAIN_WITH_FEMA_OUTPUT_DIR, "/cross-sections.parquet")
+
+message(round(Sys.time()), " - Saving Diffusive DEM + FEMA + ML augmented cross section points to:\n - filepath: '", CROSS_SECTIONS_ML_OUTPUT_PATH, "'")
+# transects <- 
+# sum(is.na(final_cross_sections$id))
+# sum(is.na(final_cross_sections$cs_id))
+# sum(is.na(final_cross_sections$pt_id))
+# sum(is.na(final_cross_sections$X))
+
+# save cross section points as a parquet to out_path (domain/outputs/cross-sections.parquet)
+arrow::write_parquet(
+  # dplyr::select(final_cs, 
+  #               -is_dem_point
+  # ), 
+  cross_sections,
+  CROSS_SECTIONS_ML_OUTPUT_PATH
+)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runners/cs_runner/download_ml_outputs.R b/runners/cs_runner/download_ml_outputs.R
new file mode 100644
index 0000000..a3751cc
--- /dev/null
+++ b/runners/cs_runner/download_ml_outputs.R
@@ -0,0 +1,16 @@
+# download_ml_outputs.R
+# This script pulls the ML outputs data from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Get ML outputs data from S3 bucket ----
+# ---------------------------------------------------------------------------
+
+ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)))
+
+message("Copying S3 object:\n", ML_OUTPUTS_S3_URI)
+system(ml_copy_cmd)
+message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!")
+message("------------------")
diff --git a/runners/cs_runner/download_nextgen.R b/runners/cs_runner/download_nextgen.R
index a1aa30a..9c455ee 100644
--- a/runners/cs_runner/download_nextgen.R
+++ b/runners/cs_runner/download_nextgen.R
@@ -1,42 +1,28 @@
-# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "base_dir"
-# base_dir is defined within runners/workflow/root_dir.R
+# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
 
 # load config variables
 source("runners/cs_runner/config_vars.R")
 
-# name of S3 bucket
-s3_bucket <- "s3://lynker-spatial/"
-
-# nextgen bucket name
-prerelease_prefix     <- "s3://lynker-spatial/pre-release/"
-
-# nextgen model attributes folder in S3 bucket with parquet files
-model_attr_prefix <- paste0(s3_bucket, "v20/3D/model_attributes/")
-
 # directory to copy nextgen bucket data too
-nextgen_dir <- paste0(base_dir, "/pre-release/")
+NEXTGEN_DIR <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
 
 # create the directory if it does NOT exist
-if(!dir.exists(nextgen_dir)) {
-  message("Directory does not exist at: \n\t'", nextgen_dir, "'\nCreating directory at: \n\t'", nextgen_dir, "'")
+if(!dir.exists(NEXTGEN_DIR)) {
+  message("Directory does not exist at: \n\t'", NEXTGEN_DIR, "'\nCreating directory at: \n\t'", NEXTGEN_DIR, "'")
   
-  dir.create(nextgen_dir)
+  dir.create(NEXTGEN_DIR)
 }
 
-# model attributes directory
-model_attr_dir <- paste0(base_dir, "/model_attributes/")
-
-# create the directory if it does NOT exist
-if(!dir.exists(model_attr_dir)) {
-  message("Directory does not exist at: \n\t'", model_attr_dir, "'\nCreating directory at: \n\t'", model_attr_dir, "'")
-  dir.create(model_attr_dir)
-}
+# ---------------------------------------------------------------------------
+# ---- List/Get the nextgen gpkgs from S3 bucket  ----
+# ---------------------------------------------------------------------------
 
 # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
 command <- paste0('#!/bin/bash
             # AWS S3 Bucket and Directory information
-            S3_BUCKET="', prerelease_prefix, '"
-            DESTINATION_DIR=', nextgen_dir, '
+            S3_BUCKET="', S3_BUCKET_NEXTGEN_DIR_URI, '"
+            DESTINATION_DIR=', NEXTGEN_DIR, '
             
             # Regular expression pattern to match object keys
             PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\.gpkg$"
@@ -54,8 +40,8 @@ bucket_keys <- system(command, intern = TRUE)
 # Parse the selected S3 objects keys and copy them to the destination directory
 for (key in bucket_keys) {
   
-  copy_cmd <- paste0('aws s3 cp ', prerelease_prefix, key, " ", nextgen_dir, key)
-  message("Copying S3 object:\n", paste0(prerelease_prefix, key))
+  copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_NEXTGEN_DIR_URI, key, " ", NEXTGEN_DIR, key)
+  message("Copying S3 object:\n", paste0(S3_BUCKET_NEXTGEN_DIR_URI, key))
   
   system(copy_cmd)
   
@@ -63,37 +49,51 @@ for (key in bucket_keys) {
   message("------------------")
 }
 
-# ---- Get nextgen model attributes parquets ----
-# aws s3 ls s3://lynker-spatial/v20/3D/model_attributes/
-
-# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
-list_model_attr_cmd <- paste0('#!/bin/bash
-            # AWS S3 Bucket and Directory information
-            S3_BUCKET="', model_attr_prefix, '"
-            
-            # Regular expression pattern to match object keys
-            PATTERN="^nextgen_[0-9][0-9][A-Za-z]*\\_model_attributes.parquet$"
-            
-            # AWS CLI command to list objects in the S3 bucket and use grep to filter them
-            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
-            
-            echo "$S3_OBJECTS"'
-)
-
-# get a list of the model attributes objects in S3
-model_attr_keys <- system(list_model_attr_cmd, intern = TRUE)
-
-# Parse the selected S3 objects keys and copy them to the destination directory
-for (key in model_attr_keys) {
-  
-  copy_cmd <- paste0('aws s3 cp ', model_attr_prefix, key, ' ', model_attr_dir, key)
-  
-  message("Copying S3 object:\n", paste0(model_attr_prefix, key))
-  
-  system(copy_cmd)
-  
-  message("Download '", paste0(model_attr_prefix, key), "' complete!")
-  message("------------------")
-}
-
-
+# # ---------------------------------------------------------------------------
+# # ---- List/Get reference features from S3 bucket ----
+# # ---------------------------------------------------------------------------
+# 
+# ## Go get a list of the reference features geopackages from S3 and create a save path using the S3 file names to save reference features to local directory
+# 
+# # list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+# list_ref_features <- paste0('#!/bin/bash
+#             # AWS S3 Bucket and Directory information
+#             S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI , '"
+#             
+#             # Regular expression pattern to match object keys
+#             PATTERN="reference_features.gpkg"
+# 
+#             S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+#             
+#             echo "$S3_OBJECTS"'
+# )
+# 
+# # ---- Get a list of reference features geopackages geopackages ----
+# # Run the script to get a list of the nextgen geopackages that matched the regular expression above
+# ref_features <- system(list_ref_features, intern = TRUE)
+# 
+# ## Download reference features geopackages and save them to a local directory
+# # Parse the selected S3 objects keys and copy them to the destination directory
+# for (key in ref_features) {
+#   # paste0(REF_FEATURES_DIR, "gpkg/")
+#   copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key)
+#   
+#   message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key))
+#   system(copy_cmd)
+#   
+#   message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!")
+#   message("------------------")
+# }
+# 
+# # ---------------------------------------------------------------------------
+# # ---- Get ML outputs data from S3 bucket ----
+# # ---------------------------------------------------------------------------
+# 
+# ml_copy_cmd <- paste0('aws s3 cp ', ML_OUTPUTS_S3_URI, ' ', paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)))
+# 
+# message("Copying S3 object:\n", ML_OUTPUTS_S3_URI)
+# system(ml_copy_cmd)
+# 
+# message("Download '", paste0(ML_OUTPUTS_DIR, basename(ML_OUTPUTS_S3_URI)), "' complete!")
+# message("------------------")
+# 
diff --git a/runners/cs_runner/download_ref_features.R b/runners/cs_runner/download_ref_features.R
new file mode 100644
index 0000000..fc10e80
--- /dev/null
+++ b/runners/cs_runner/download_ref_features.R
@@ -0,0 +1,35 @@
+# download_ref_features.R
+# This script pulls the reference features geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- List/Get reference features from S3 bucket ----
+# ---------------------------------------------------------------------------
+
+# list objects in S3 bucket, and regular expression match to reference_features.gpkg pattern
+list_ref_features <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', S3_BUCKET_REF_FEATURES_URI, '"
+            
+            # Regular expression pattern to match object keys
+            PATTERN="reference_features.gpkg"
+
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# ---- Get a list of reference features geopackages ----
+# Run the script to get a list of the reference features geopackages that matched the regular expression above
+ref_features <- system(list_ref_features, intern = TRUE)
+
+# Parse the selected S3 objects keys and copy them to the destination directory
+for (key in ref_features) {
+  copy_cmd <- paste0('aws s3 cp ', S3_BUCKET_REF_FEATURES_URI, key, ' ', paste0(REF_FEATURES_DIR, "gpkg/"), key)
+  message("Copying S3 object:\n", paste0(S3_BUCKET_REF_FEATURES_URI, key))
+  system(copy_cmd)
+  message("Download '", paste0(REF_FEATURES_DIR, "gpkg/", key), "' complete!")
+  message("------------------")
+}
diff --git a/runners/cs_runner/new_domain.R b/runners/cs_runner/new_domain.R
new file mode 100644
index 0000000..9dda641
--- /dev/null
+++ b/runners/cs_runner/new_domain.R
@@ -0,0 +1,293 @@
+# Generate the transects + cs_pts + cross sections layers for a single flowlines domain file and DEM file 
+source("runners/cs_runner/config.R")
+source("runners/cs_runner/utils.R")
+
+# # # # load libraries
+# library(hydrofabric3D)
+# library(dplyr)
+# library(sf)
+# install.packages("devtools")
+
+# # transect bucket prefix
+# S3_TRANSECTS_DIR <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
+
+# paths to NEW DOMAIN datasets 
+# NEW_DOMAIN_FILES    <- list.files(NEW_DOMAIN_FLOWLINES_DIR, full.names = TRUE)
+
+# ---------------------------------------------------------------------
+# --- Read in flowlines
+# ---------------------------------------------------------------------
+# read in nextgen data
+flines <- sf::read_sf(NEW_DOMAIN_FLOWLINES_PATH, layer = "flowpaths")
+
+VPUS           <- sf::st_transform(nhdplusTools::vpu_boundaries, sf::st_crs(flines))
+vpu_flines_int <- sf::st_intersects(VPUS, flines)
+vpu_aoi        <- VPUS[lengths(vpu_flines_int) > 0, ]
+
+# VPU IDs of interest 
+VPU_IDS        <- vpu_aoi$VPUID
+
+# sf::write_sf(aoi, "all_diffusive_combined_vpus.gpkg")
+# VPUS[lengths(vpu_fl_int) > 0, ] %>% mapview::mapview()
+# nhdplusTools::vpu_boundaries
+
+# flines %>% 
+#   dplyr::filter(order >= 3) %>% 
+# # flines %>% 
+#   sf::st_transform(4326) %>% 
+#   mapview::mapview()
+# # ggplot2::ggplot() +
+# #   ggplot2::geom_sf()
+#   
+# 
+# flines %>% 
+#   sf::st_transform(4326) %>% 
+#   sf::st_bbox() %>% 
+#   sf::st_as_sfc() %>% 
+#   sf::st_as_sf() %>% 
+#   mapview::mapview()
+
+# ---------------------------------------------------------------------
+# --- Add bankful width estimate
+# ---------------------------------------------------------------------
+
+# calculate bankfull width
+flines <-
+  flines %>%
+  sf::st_transform(26904) %>% 
+  dplyr::mutate(
+    bf_width = exp(0.700    + 0.365* log(tot_drainage_areasqkm))
+  ) %>%
+  dplyr::select(
+    hy_id = id,
+    lengthkm,
+    tot_drainage_areasqkm,
+    bf_width,
+    mainstem,
+    geometry = geom
+  )
+
+# ---------------------------------------------------------------------
+# --- Create transect lines
+# ---------------------------------------------------------------------
+system.time({
+  
+# create transect lines
+transects <- hydrofabric3D::cut_cross_sections(
+  net               = flines,                        # flowlines network
+  id                = "hy_id",                       # Unique feature ID
+  cs_widths         = pmax(50, flines$bf_width * 11),     # cross section width of each "id" linestring ("hy_id")
+  # cs_widths         = pmax(50, flines$bf_width),     # cross section width of each "id" linestring ("hy_id")
+  num               = 10,                            # number of cross sections per "id" linestring ("hy_id")
+  smooth            = TRUE,                          # smooth lines
+  densify           = 3,                             # densify linestring points
+  rm_self_intersect = TRUE,                          # remove self intersecting transects
+  fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+  #### Arguments used for when fix_braids = TRUE     # TODO: these methods need revision in hydrofabric3D to allow for more flexible processing for data that is NOT COMID based (i.e. hy_id)    
+  # terminal_id       = NULL,
+  # braid_threshold   = NULL,
+  # version           = 2,
+  # braid_method      = "comid",
+  # precision         = 1,
+  add               = TRUE                           # whether to add back the original data
+)
+
+})
+
+# add cs_source column and rename cs_widths to cs_lengthm
+transects <- 
+  transects %>%
+  dplyr::mutate(
+    cs_source = CS_SOURCE
+  )
+
+# ---------------------------------------------------------------------
+# --- Extract cross section points from DEM 
+# ---------------------------------------------------------------------
+# dem <- terra::rast(NEW_DOMAIN_DEM_PATH)
+
+system.time({
+
+# get cross section point elevations
+cs_pts <- hydrofabric3D::cross_section_pts(
+  
+  cs             = transects,
+  crosswalk_id   = "hy_id",
+  points_per_cs  = NULL,
+  min_pts_per_cs = 10,
+  dem            = NEW_DOMAIN_DEM_PATH
+)
+
+})
+
+
+# ---------------------------------------------------------------------
+# --- Classify cross section points
+# ---------------------------------------------------------------------
+# STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+
+cs_pts <-
+# cs_pts2 <-
+  cs_pts %>% 
+  # dplyr::group_by(hy_id, cs_id) %>% 
+  # dplyr::filter(!any(is.na(Z))) %>% 
+  # dplyr::ungroup() %>% 
+  hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+  hydrofabric3D::classify_points(
+    crosswalk_id             = "hy_id", 
+    pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )  
+
+# cs_pts3 <-
+#   cs_pts %>% 
+#   # dplyr::group_by(hy_id, cs_id) %>% 
+#   # dplyr::filter(!any(is.na(Z))) %>% 
+#   # dplyr::ungroup() %>% 
+#   hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+#   hydrofabric3D::classify_points2(
+#     crosswalk_id             = "hy_id", 
+#     pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+#   )  
+# 
+# old_plot <- 
+#   cs_pts2 %>% 
+#   dplyr::slice(1:100) %>% 
+#   hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4)+
+#   ggplot2::labs(title = "OLD CLASSES")
+# 
+# new_plot <- 
+#   cs_pts3 %>% 
+#   dplyr::slice(1:100) %>% 
+#   hydrofabric3D::plot_cs_pts(color = 'point_type', size = 4) +
+#   ggplot2::labs(title = "NEW CLASSES")
+# 
+# library(patchwork)
+# old_plot / new_plot
+# })
+
+ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id  
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Improve cross section points based on the bank validity & amount of relief in a cross section
+# ----------------------------------------------------------------------------------------------------------------
+
+# system.time({
+fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+  cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+  net            = flines,     # original flowline network
+  transects      = transects, # original transect lines
+  crosswalk_id   = "hy_id",
+  points_per_cs  = NULL, 
+  min_pts_per_cs = 10, # number of points per cross sections
+  dem            = NEW_DOMAIN_DEM_PATH, # DEM to extract points from
+  scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+  pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+  fix_ids = FALSE,
+  verbose = TRUE
+)
+# })
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+
+out_transects <- match_transects_to_extended_cs_pts(
+                      transect_lines = transects, 
+                      fixed_cs_pts   = fixed_pts, 
+                      crosswalk_id   = "hy_id"
+                      )
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- Re enumerate the transects & cross section points "cs_id" ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+# out_transects  <- hydrofabric3D:::renumber_cs_ids(
+#                                                   df            = dplyr::mutate(out_transects, pt_id = 1), 
+#                                                   crosswalk_id  = "hy_id"
+#                                                   ) %>% 
+#                                                   dplyr::select(-pt_id)
+
+fixed_pts      <- hydrofabric3D:::renumber_cs_ids(df = fixed_pts, crosswalk_id = "hy_id")
+out_transects  <- hydrofabric3D:::renumber_cs_ids(df = out_transects, crosswalk_id = "hy_id")
+
+# ----------------------------------------------------------------------------------------------------------------
+# ---- STEP 4: Update transects with extended transects (if exists) ----
+# ----------------------------------------------------------------------------------------------------------------
+
+# classify the cross section points
+fixed_pts <-
+  fixed_pts %>% 
+  dplyr::mutate(
+    X = sf::st_coordinates(.)[,1],
+    Y = sf::st_coordinates(.)[,2]
+  ) %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::select(
+    hy_id, 
+    cs_id, 
+    pt_id,
+    cs_lengthm,
+    relative_distance,
+    X, Y, Z,
+    class, point_type,
+    bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+  )
+
+# add Z_source column for source of elevation data
+fixed_pts <-
+  fixed_pts %>%
+  dplyr::mutate(
+    Z_source = CS_SOURCE
+  ) %>%
+  dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                  class, point_type, 
+                  bottom, left_bank, right_bank, valid_banks, has_relief)
+
+ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+message("Aligning banks and smoothing bottoms...")
+fixed_pts <- hydrofabric3D::align_banks_and_bottoms(fixed_pts)
+
+ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+message("Reclassifying cross section points...")
+fixed_pts <- hydrofabric3D::classify_points(
+  cs_pts                    = fixed_pts, 
+  crosswalk_id              = "hy_id",
+  pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+)
+
+ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+
+if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+} else {
+  message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+}
+
+if(all(ids_before_align %in% ids_after_align)) {
+  message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+} else {
+  message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+}
+
+if(all(ids_after_align %in% ids_after_reclassify)) {
+  message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+} else {
+  message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+}
+
+sf::write_sf(
+  out_transects,
+  paste0(NEW_DOMAIN_TRANSECTS_DIR, "/hi_transects.gpkg"), 
+)
+
+arrow::write_parquet(
+  fixed_pts,
+  paste0(NEW_DOMAIN_CS_PTS_DIR, "/hi_cs_pts.parquet"), 
+)
+
+# ----------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------------------
\ No newline at end of file
diff --git a/runners/cs_runner/set_path_variables.R b/runners/cs_runner/set_path_variables.R
new file mode 100644
index 0000000..b590024
--- /dev/null
+++ b/runners/cs_runner/set_path_variables.R
@@ -0,0 +1,10 @@
+# Generate the list of file paths for locally stored nextgen datasets:
+# - NEXTGEN_FILES --> (list NEXTGEN_DIR files that were downloaded via download_nextgen.R)
+
+
+# load config variables
+source("runners/cs_runner/config_vars.R")
+
+# NOTE: SET VARIABLE FOR REST OF PROCESSING 
+# paths to nextgen datasets
+NEXTGEN_FILES   <- list.files(NEXTGEN_DIR, full.names = FALSE)
diff --git a/runners/cs_runner/test_cs_pts.R b/runners/cs_runner/test_cs_pts.R
new file mode 100644
index 0000000..7031bb6
--- /dev/null
+++ b/runners/cs_runner/test_cs_pts.R
@@ -0,0 +1,256 @@
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# # load libraries
+library(hydrofabric3D)
+library(dplyr)
+library(sf)
+
+# paths to nextgen datasets
+NEXTGEN_FILES  <- list.files(NEXTGEN_DIR, full.names = FALSE)
+
+# paths to nextgen datasets
+transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
+transect_files <- transect_files[!grepl("updated_", transect_files)]
+
+REF_FEATURES   <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
+
+# reference features dataframe
+ref_df <- data.frame(
+  vpu      = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = REF_FEATURES
+)
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = NEXTGEN_FILES,
+  y    = transect_files,
+  base = BASE_DIR
+) %>% 
+  dplyr::left_join(
+    ref_df,
+    by = "vpu"
+  )
+
+# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# Save parquet locally and upload to specified S3 bucket
+
+# output_path <- paste0(BASE_DIR, "/test_out/")
+
+for (i in 20:nrow(path_df)) {
+    
+  # i = 8
+  
+  start <- Sys.time()
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
+  
+  # model attributes file and full path
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(TRANSECTS_DIR, transect_file)
+  
+  # model attributes file and full path
+  ref_file <- path_df$ref_file[i]
+  ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
+  
+  # current VPU being processed
+  VPU   <- path_df$vpu[i]
+  
+  start <- Sys.time()
+  
+  message("Creating VPU ", VPU, 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - transects: '", transect_file, "'", 
+          "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+  )
+  
+  ################### 
+  message("Reading in transects...\n > ", transect_file)
+  # read in transects data
+  transects <- sf::read_sf(transect_path)
+  
+  message("Reading in flowlines... \n > ", nextgen_file)
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
+  message("Reading in waterbodies... \n > ", ref_file)
+  # read in waterbodies reference features layer
+  waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+  
+  # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+  feature_subsets <- wb_intersects(flines, transects, waterbodies)
+  
+  # replace flowlines and transects objects with updated versions in "updated_features"
+  flines    <- flines[feature_subsets$valid_flowlines, ]
+  transects <- transects[feature_subsets$valid_transects, ]
+  
+  rm(waterbodies)
+  gc()
+  
+  start_cs_pts <- Sys.time()
+  
+  # # ------------------------------------------------------------------------
+  # # ------ TESTING DATA -------
+  # # ------------------------------------------------------------------------
+  #   flines <-
+  #     flines %>%
+  #     dplyr::slice(1:3500)
+  # 
+  # transects <-
+  #   transects %>%
+  #   dplyr::filter(hy_id %in% flines$id)
+  
+  # ------------------------------------------------------------------------
+  
+  message("Extracting cross section points (", start_cs_pts, ")")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 1: Extract cs points from DEM ----
+  # ----------------------------------------------------------------------------------------------------------------
+  # system.time({
+  # tmp <- transects[1:10,]
+  # 
+  # get cross section point elevations
+  cs_pts <- hydrofabric3D::cross_section_pts(
+    
+    cs             = transects,
+    crosswalk_id   = "hy_id",
+    points_per_cs  = NULL,
+    min_pts_per_cs = 10,
+    dem            = DEM_PATH
+  )
+  
+  # })
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_11.gpkg")
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11.gpkg")
+  
+  # sf::write_sf(flines, "/Users/anguswatters/Desktop/test_improve_flines_11_2.gpkg")
+  # sf::write_sf(transects, "/Users/anguswatters/Desktop/test_improve_transects_11_2.gpkg")
+  # cs_pts %>% 
+  #   dplyr::group_by(hy_id, cs_id) %>% 
+  #   dplyr::filter(!any(is.na(Z))) %>% 
+  #   dplyr::ungroup()
+  # 
+  # cs_pts %>% 
+  #   hydrofabric3D::drop_incomplete_cs_pts("hy_id")
+  
+  # system.time({
+  
+  # STEP 2: Remove any cross section that has ANY missing (NA) Z values, and classify the points 
+  cs_pts <- 
+    # cs_pts2 <- 
+    cs_pts %>% 
+    # dplyr::group_by(hy_id, cs_id) %>% 
+    # dplyr::filter(!any(is.na(Z))) %>% 
+    # dplyr::ungroup() %>% 
+    hydrofabric3D::drop_incomplete_cs_pts("hy_id") %>% 
+    hydrofabric3D::classify_points(
+      crosswalk_id             = "hy_id", 
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+    )  
+  
+  # })
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id  
+  
+  # ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts2)$tmp_id
+  
+  # output_path <- paste0(BASE_DIR, "/test_out/")
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  # fixed_pts <-
+  cs_pts <- 
+    cs_pts %>% 
+    hydrofabric3D:::pts_to_XY() %>% 
+    dplyr::select(
+      hy_id, cs_id, pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    ) %>% 
+    dplyr::mutate(
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
+  
+  ids_before_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  cs_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = cs_pts, crosswalk_id = "hy_id")
+  # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  cs_pts <- hydrofabric3D::classify_points(
+    cs_pts                    = cs_pts, 
+    crosswalk_id              = "hy_id",
+    pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id
+  
+  # if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+  #   message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  # } else {
+  #   message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  # }
+  # 
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  
+  ############################################################################## 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # # name of file and path to save transects gpkg too
+  out_file <- paste0("nextgen_", path_df$vpu[i], "_cross_sections.parquet")
+  out_path <- paste0(BASE_DIR, "/test_out/", out_file)
+  
+  message("Saving cross section points to:\n - filepath: '", out_path, "'")
+  
+  # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+  arrow::write_parquet(cs_pts, out_path)
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  end <- Sys.time()
+  
+  message("Finished cross section point generation for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(cs_pts)
+  gc()
+  gc()
+}
diff --git a/runners/cs_runner/test_fix_cs_pts.R b/runners/cs_runner/test_fix_cs_pts.R
new file mode 100644
index 0000000..cbfaae8
--- /dev/null
+++ b/runners/cs_runner/test_fix_cs_pts.R
@@ -0,0 +1,278 @@
+# Generate the flowlines layer for the final cross_sections_<VPU>.gpkg for each VPU
+source("runners/cs_runner/config.R")
+
+# # load libraries
+library(hydrofabric3D)
+library(dplyr)
+library(sf)
+
+# paths to nextgen datasets
+NEXTGEN_FILES  <- list.files(NEXTGEN_DIR, full.names = FALSE)
+
+# paths to nextgen datasets
+transect_files <- list.files(TRANSECTS_DIR, full.names = FALSE)
+transect_files <- transect_files[!grepl("updated_", transect_files)]
+
+REF_FEATURES   <- list.files(REF_FEATURES_GPKG_DIR, full.names = FALSE)
+
+START_CS_PTS_DIR <-  '/Users/anguswatters/Desktop/lynker-spatial/test_out'
+start_cs_pts_paths     <- list.files(START_CS_PTS_DIR, full.names = T)
+
+END_CS_PTS_DIR <-  '/Users/anguswatters/Desktop/lynker-spatial/test_fix_out'
+
+# reference features dataframe
+ref_df <- data.frame(
+  vpu      = sapply(strsplit(REF_FEATURES, "_", fixed = TRUE), function(i) { i[1] }), 
+  ref_file = REF_FEATURES
+)
+
+# ensure the files are in the same order and matched up by VPU
+path_df <- align_files_by_vpu(
+  x    = NEXTGEN_FILES,
+  y    = transect_files,
+  base = BASE_DIR
+) %>% 
+  dplyr::left_join(
+    ref_df,
+    by = "vpu"
+  )
+path_df$cs_pts_path <- start_cs_pts_paths
+
+# path_df
+
+# loop over the nextgen and transect datasets (by VPU) and extract point elevations across points on each transect line,
+# then classify the points, and create a parquet file with hy_id, cs_id, pt_id, X, Y, Z data.
+# Save parquet locally and upload to specified S3 bucket
+
+# output_path <- paste0(BASE_DIR, "/test_out/")
+
+for (i in 15:nrow(path_df)) {
+  # i = 8
+  
+  start <- Sys.time()
+  
+  start_cs_pts_path <- path_df$cs_pts_path[i]
+  
+  # nextgen file and full path
+  nextgen_file <- path_df$x[i]
+  nextgen_path <- paste0(NEXTGEN_DIR, nextgen_file)
+  
+  # model attributes file and full path
+  transect_file <- path_df$y[i]
+  transect_path <- paste0(TRANSECTS_DIR, transect_file)
+  
+  # model attributes file and full path
+  ref_file <- path_df$ref_file[i]
+  ref_path <- paste0(REF_FEATURES_DIR, "gpkg/", ref_file)
+  
+  # current VPU being processed
+  VPU   <- path_df$vpu[i]
+  
+  start <- Sys.time()
+  
+  message("Creating VPU ", VPU, 
+          " cross section points:\n - flowpaths: '", nextgen_file,
+          "'\n - start cs pts: '", basename(start_cs_pts_path), "'", 
+          "'\n - transects: '", transect_file, "'", 
+          "\n - waterbodies: '", ref_file, "'",
+          "'\n - start time: '", start, "'"
+  )
+  
+  ################### 
+  message("Reading in starting CS points...\n > ", basename(start_cs_pts_path))
+  cs_pts <- arrow::read_parquet(start_cs_pts_path)
+  
+  message("Reading in transects...\n > ", transect_file)
+  # read in transects data
+  transects <- sf::read_sf(transect_path)
+  
+  message("Reading in flowlines... \n > ", nextgen_file)
+  # read in nextgen data
+  flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+  
+  message("Reading in waterbodies... \n > ", ref_file)
+  # read in waterbodies reference features layer
+  waterbodies <- sf::read_sf(ref_path, layer = "waterbodies")
+  
+  # Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+  feature_subsets <- wb_intersects(flines, transects, waterbodies)
+  
+  # replace flowlines and transects objects with updated versions in "updated_features"
+  flines    <- flines[feature_subsets$valid_flowlines, ]
+  transects <- transects[feature_subsets$valid_transects, ]
+  
+  rm(waterbodies)
+  gc()
+  
+  start_cs_pts <- Sys.time()
+  flines <- dplyr::rename(flines, hy_id = id)
+  
+  # test_ids <- 
+  #   flines %>% 
+  #   dplyr::slice(1:100) %>% 
+  #   .$hy_id %>% 
+  #   unique()
+  # flines <-
+  #   flines %>% 
+  #   dplyr::filter(
+  #     hy_id %in% test_ids
+  #   )
+  # transects <-
+  #   transects %>% 
+  #   dplyr::filter(
+  #     hy_id %in% test_ids
+  #   )
+  # cs_pts <-
+  #   cs_pts %>% 
+  #   dplyr::filter(
+  #     hy_id %in% test_ids
+  #   )
+  
+  ids_original_cs_pts <- hydrofabric3D::add_tmp_id(cs_pts)$tmp_id 
+  
+  # # ------------------------------------------------------------------------
+  # # ------ TESTING DATA -------
+  # # ------------------------------------------------------------------------
+  # system.time({
+    fixed_pts <- hydrofabric3D::get_improved_cs_pts(
+      cs_pts         = cs_pts,    # cross section points generated from hydrofabric3D::cross_section_pts()
+      net            = flines,    # original flowline network
+      # net            = flines,    # original flowline network
+      transects      = transects, # original transect lines
+      crosswalk_id   = "hy_id",
+      points_per_cs  = NULL, 
+      min_pts_per_cs = 10, # number of points per cross sections
+      dem            = DEM_PATH, # DEM to extract points from
+      scale          = EXTENSION_PCT, # How far to extend transects if the points need to be rechecked
+      pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF, # percent of cross sections length to be needed in relief calculation to consider cross section to "have relief"
+      fix_ids = FALSE,
+      verbose = TRUE
+    )
+  # })
+  # fixed_pts 
+  
+  # fixed_pts %>%
+  #   # cs_pts %>% 
+  #   sf::st_drop_geometry() %>%
+  #   # dplyr::left_join(
+  #   #   stream_order,
+  #   #   by = "hy_id"
+  #   # ) %>%
+  #   dplyr::select(dplyr::any_of("hy_id"), cs_id, 
+  #                 # vpu, order,
+  #                 valid_banks, has_relief) %>%
+  #   dplyr::group_by(
+  #     dplyr::across(dplyr::any_of(c("hy_id", "cs_id")))
+  #   ) %>%
+  #   dplyr::slice(1) %>%
+  #   dplyr::ungroup() %>%
+  #   # dplyr::group_by(
+  #   #   dplyr::across(dplyr::any_of(c("vpu", "order")))
+  #   # ) %>%
+  #   dplyr::count(valid_banks, has_relief)
+  
+  
+  # transects %>% 
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::group_by(id, cs_id)
+  # fixed_pts2$is_extended %>% sum()
+  ids_after_fixed_pts <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # classify the cross section points
+  fixed_pts <-
+    fixed_pts %>% 
+    # dplyr::mutate(
+    #   X = sf::st_coordinates(.)[,1],
+    #   Y = sf::st_coordinates(.)[,2]
+    # ) %>%
+    sf::st_drop_geometry() %>% 
+    dplyr::select(
+      hy_id, cs_id, pt_id,
+      cs_lengthm,
+      relative_distance,
+      X, Y, Z,
+      class, point_type,
+      bottom, left_bank, right_bank, valid_banks, has_relief # newly added columns (03/06/2024)
+    )
+  
+  # # Drop point geometries, leaving just X, Y, Z values
+  # fixed_pts <- sf::st_drop_geometry(fixed_pts)
+  
+  # add Z_source column for source of elevation data
+  fixed_pts <-
+    fixed_pts %>%
+    dplyr::mutate(
+      Z_source = CS_SOURCE
+    ) %>%
+    dplyr::relocate(hy_id, cs_id, pt_id, cs_lengthm, relative_distance, X, Y, Z, Z_source, 
+                    class, point_type, 
+                    bottom, left_bank, right_bank, valid_banks, has_relief)
+  
+  ids_before_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Aligning banks and smoothing bottoms...")
+  fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  # fixed_pts <- hydrofabric3D::align_banks_and_bottoms(cs_pts = fixed_pts, crosswalk_id = "hy_id")
+  
+  ids_after_align <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  message("Reclassifying cross section points...")
+  fixed_pts <- hydrofabric3D::classify_points(
+    cs_pts                    = fixed_pts, 
+    crosswalk_id              = "hy_id",
+    pct_of_length_for_relief  = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF
+  )
+  
+  ids_after_reclassify <- hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id
+  
+  if(all(ids_original_cs_pts %in% ids_after_fixed_pts)) {
+    message("All hy_id/cs_ids in ORIGINAL DEM point extraction were found in the FIXED points")
+  } else {
+    message(" >>> Missing hy_id/cs_ids in ORIGINAL DEM point extraction compared to the FIXED points")
+  }
+  
+  if(all(ids_before_align %in% ids_after_align)) {
+    message("All hy_id/cs_ids are kept in tact after bank alignment and bottom smoothing")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after bank alignment and bottom smoothing")
+  }
+  
+  if(all(ids_after_align %in% ids_after_reclassify)) {
+    message("All hy_id/cs_ids are kept in tact after RECLASSIFICATION")
+  } else {
+    message(" >>> Missing hy_id/cs_ids after RECLASSIFICATION")
+  }
+  
+  # all(hydrofabric3D::add_tmp_id(fixed_pts2)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  # all(hydrofabric3D::add_tmp_id(fixed_pts4)$tmp_id %in% hydrofabric3D::add_tmp_id(fixed_pts)$tmp_id)
+  
+  ############################################################################## 
+  
+  # ----------------------------------------------------------------------------------------------------------------
+  # ---- Upload the cross section points parquet to S3 ----
+  # ----------------------------------------------------------------------------------------------------------------
+  
+  # # name of file and path to save transects gpkg too
+  out_file <- paste0("nextgen_fixed_", path_df$vpu[i], "_cross_sections.parquet")
+  out_path <- paste0(BASE_DIR, "/test_fix_out/", out_file)
+  
+  message("Saving cross section points to:\n - filepath: '", out_path, "'")
+  
+  # save cross section points as a parquet to out_path (lynker-spatial/02_cs_pts/cs_pts_<VPU num>.parquet)
+  arrow::write_parquet(fixed_pts, out_path)
+  # sf::write_sf(cs_pts2, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11.gpkg")
+  # sf::write_sf(cs_pts, "/Users/anguswatters/Desktop/test_improve_cs_pts_classified_11_2.gpkg")
+  
+  end <- Sys.time()
+  
+  message("Finished cross section point generation for VPU ", VPU)
+  message("- Completed at: ", end)
+  message("==========================")
+  
+  rm(cs_pts, fixed_pts)
+  gc()
+  gc()
+}
diff --git a/runners/cs_runner/utils.R b/runners/cs_runner/utils.R
new file mode 100644
index 0000000..47423c1
--- /dev/null
+++ b/runners/cs_runner/utils.R
@@ -0,0 +1,2547 @@
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+#     ├── dem/
+#         ├── vrt/
+#         ├── tif/
+#     ├── cs-extension-polygons/
+create_local_hydrofabric_base_dirs <- function(base_dir) {
+  
+  
+  # build paths
+  hydrofabric_dir <- paste0(base_dir, "/hydrofabric")
+  
+  # DEM dirs
+  dem_dir                    <- file.path(base_dir, "dem")
+  dem_vrt_dir                <- file.path(dem_dir, "vrt")
+  dem_tif_dir                <- file.path(dem_dir, "tif")
+  
+  # polygons for transect extensions
+  cs_extension_polygons_dir     <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  fema_dir                      <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  # create base directories
+  create_if_not_exists(base_dir)
+  create_if_not_exists(hydrofabric_dir)
+  
+  # DEM dirs
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(dem_vrt_dir)
+  create_if_not_exists(dem_tif_dir)
+  
+  # extension polygons
+  create_if_not_exists(cs_extension_polygons_dir)
+  
+  
+  # Create FEMA folders
+  create_if_not_exists(fema_dir)
+  create_if_not_exists(fema_fgb_dir)
+  create_if_not_exists(fema_geojson_dir)
+  create_if_not_exists(fema_clean_dir)
+  create_if_not_exists(fema_gpkg_dir)
+  create_if_not_exists(fema_by_vpu_dir)
+  
+  for (path in fema_by_vpu_subdirs) {
+    create_if_not_exists(path)
+  }
+  
+}
+
+get_vpu_ids <- function() {
+  VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+                            '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+  # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+  
+  return(VPU_IDS)
+  
+}
+
+# # Base directory for local file storage
+# BASE_DIR    <- '/Volumes/T7SSD/lynker-spatial'
+# base_dir <- BASE_DIR
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb")
+# FEMA_GEOJSON_PATH    <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson")
+# FEMA_CLEAN_PATH      <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean")
+# FEMA_GPKG_PATH       <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg")
+# FEMA_BY_VPU_PATH     <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU")
+# 
+# 
+# VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+#                           '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+# # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+# 
+# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ")
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# Create an empty file structure for a new version within a specified base_dir
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+    #     ├── version_number/
+    #         ├── network/
+    #         ├── transects/
+    #         ├── cross-sections/
+          #         ├── dem/
+          #         ├── dem-ml/
+          #         ├── dem-coastal-bathy/
+          #         ├── dem-points/
+create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
+  # version = "v3.0"
+  # base_dir <- BASE_DIR
+  
+  # build paths
+  hydrofabric_dir   <- paste0(base_dir, "/hydrofabric")
+  version_base_dir  <- paste0(hydrofabric_dir, "/", version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- paste0(version_base_dir, "/ml")
+  
+  # reference features 
+  ref_features_dir           <- paste0(version_base_dir, "/reference-features")
+  
+  # conus network gpkg
+  network_dir                <- paste0(version_base_dir, "/network")
+  
+  # transects
+  transects_dir                  <- paste0(version_base_dir, "/transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- paste0(version_base_dir, "/cross-sections")
+  cross_sections_dem_dir             <- paste0(cross_sections_dir, "/dem")
+  cross_sections_ml_dir              <- paste0(cross_sections_dir, "/dem-ml")
+  cross_sections_coastal_bathy_dir   <- paste0(cross_sections_dir, "/dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- paste0(cross_sections_dir, "/dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
+  
+ 
+  if(with_output) {
+    output_dir       <- paste0(version_base_dir, "/outputs")
+  }
+  
+  # create version BASE dir
+  create_if_not_exists(version_base_dir)
+  
+  # CONUS dir
+  create_if_not_exists(network_dir)
+  
+  # ML data
+  create_if_not_exists(ml_dir)
+  
+  # reference features data
+  create_if_not_exists(ref_features_dir)
+  
+  # transects
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(transects_base_dir)
+  create_if_not_exists(transects_fema_extended_dir)
+  create_if_not_exists(transects_cs_extended_dir)
+  create_if_not_exists(transects_output_dir)
+    
+  # CS pts
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cross_sections_dem_dir)
+  create_if_not_exists(cross_sections_ml_dir)
+  create_if_not_exists(cross_sections_coastal_bathy_dir)
+  create_if_not_exists(cross_sections_dem_pts_dir)
+  create_if_not_exists(cross_sections_output_dir)
+  
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
+}
+
+create_if_not_exists <- function(dir_path) {
+  if (!dir.exists(dir_path)) {
+    dir.create(dir_path, recursive = TRUE)
+    message("Created directory: '", dir_path, "'\n")
+  }
+}
+
+# get a list of top level directories for main directory
+get_base_dir_paths <- function(base_dir) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir  <- file.path(base_dir, "hydrofabric")
+  
+  dem_dir          <- file.path(base_dir, "dem")
+  dem_vrt_dir      <- file.path(base_dir, "dem", "vrt")
+  dem_tif_dir      <- file.path(base_dir, "dem", "tif")
+  
+  cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  
+  fema_dir         <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  return(
+      list(
+      hydrofabric_dir = hydrofabric_dir,
+      dem_dir = dem_dir,
+      dem_vrt_dir = dem_vrt_dir, 
+      dem_tif_dir = dem_tif_dir,
+      cs_extension_polygons_dir = cs_extension_polygons_dir,
+      fema_dir = fema_dir,
+      fema_fgb_dir = fema_fgb_dir,
+      fema_geojson_dir = fema_geojson_dir,
+      fema_clean_dir = fema_clean_dir,
+      fema_gpkg_dir = fema_gpkg_dir,
+      fema_by_vpu_dir = fema_by_vpu_dir,
+      fema_by_vpu_subdirs = fema_by_vpu_subdirs
+    )
+  )
+}
+
+# get list of a specific directories in a version directory
+get_version_base_dir_paths <- function(base_dir, version) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir   <- file.path(base_dir, "hydrofabric")
+  
+  version_base_dir  <- file.path(hydrofabric_dir, version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- file.path(version_base_dir, "ml")
+  
+  # reference features 
+  ref_features_dir           <- file.path(version_base_dir, "reference-features")
+  
+  # conus network gpkg
+  network_dir                <- file.path(version_base_dir, "network")
+  
+  # transects
+  transects_dir                  <- file.path(version_base_dir, "transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
+   
+  # cross sections dirs
+  cross_sections_dir                 <- file.path(version_base_dir, "cross-sections")
+  cross_sections_dem_dir             <- file.path(cross_sections_dir, "dem")
+  cross_sections_ml_dir              <- file.path(cross_sections_dir, "dem-ml")
+  cross_sections_coastal_bathy_dir   <- file.path(cross_sections_dir, "dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- file.path(cross_sections_dir, "dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
+  
+  return(
+    list(
+      hydrofabric_dir    = hydrofabric_dir,
+      version_base_dir   = version_base_dir,
+      
+      ref_features_dir   = ref_features_dir, 
+      
+      network_dir        = network_dir,
+      
+      ml_dir = ml_dir,
+      
+      transects_dir      = transects_dir,
+      transects_base_dir = transects_base_dir,
+      transects_fema_extended_dir = transects_fema_extended_dir,
+      transects_cs_extended_dir = transects_cs_extended_dir,
+      transects_output_dir = transects_output_dir,
+      
+      cross_sections_dir = cross_sections_dir,
+      cross_sections_dem_dir     = cross_sections_dem_dir,
+      cross_sections_dem_pts_dir = cross_sections_dem_pts_dir,
+      cross_sections_ml_dir      = cross_sections_ml_dir,
+      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir,
+      cross_sections_output_dir = cross_sections_output_dir
+      
+    )
+  )
+}
+
+list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
+  
+  profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""
+  
+  if (is.null(pattern) || pattern == "") {
+    grep_command <- ""  # no filtering if empty or NULL
+  } else {
+    grep_command <- paste0(" | grep -E \"", pattern, "\"")  # grep if a pattern is given
+  }
+  
+  cmd <- paste0(
+    '#!/bin/bash\n',
+    'S3_BUCKET="', s3_bucket, '"\n',
+    'PATTERN="', pattern, '"\n',
+    'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n',
+    'echo "$S3_OBJECTS"'
+  )
+  # cmd <- paste0(
+  #   '#!/bin/bash\n',
+  #   'S3_BUCKET="', s3_bucket, '"\n',
+  #   'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n',
+  #   'echo "$S3_OBJECTS"'
+  # )
+  ls_output <- system(cmd, intern = TRUE)
+  return(ls_output)
+}
+
+download_tiles <- function(tile_paths, output_dir) {
+  # output_dir <- DEM_TIF_DIR
+  # tile_paths
+  # error_tiles <- 
+  tif_save_paths       <- paste0(output_dir, "/", basename(tile_paths))
+  
+  error_tiles <- data.frame(
+    tile   = basename(tile_paths),
+    status = TRUE
+  )
+  
+  for (i in seq_along(tile_paths)) {
+    # i = 1
+    tile_path         <- tile_paths[i]
+    tif_save_path     <- tif_save_paths[i]
+    
+    message("[", i, "]", 
+            "\n > Tile: ", basename(tile_path),
+            "\n > Output path: ", tif_save_path
+    )
+    
+    download_tif_cmd    <- paste0("curl -o ", tif_save_path, " ", tile_path)
+    
+    tryCatch({
+      
+      tif_download_output <- system(download_tif_cmd, intern = TRUE)
+      message("   > Succesfully downloaded tile: ", basename(tile_path))
+      
+    }, error = function(e) {
+      
+      message("Error downloading tile: ", basename(tile_path))
+      message("ERROW below: \n  ", e)
+      
+      error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE
+      
+    })
+    
+  }
+  
+  return(error_tiles)
+  
+}
+
+# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
+# make sure they are aligned and in the same order
+# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# base is a character vector of the base directory of the files. Defaults to NULL
+# Returns a dataframe with VPU, x, and y columns
+align_files_by_vpu <- function(
+    x, 
+    y, 
+    base = NULL
+) {
+
+  # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period
+  regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*"
+  
+  # path dataframe for X filepaths
+  x_paths <- data.frame(x = x)
+  
+  # path dataframe for Y filepaths
+  y_paths <- data.frame(y = y)
+  
+  # generate VPU IDs based on file path regular expression matching with "regex_pattern" above
+  x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x)
+  y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y)
+  
+  # match paths based on VPU column
+  matched_paths <- dplyr::left_join(
+    x_paths,
+    y_paths,
+    by = "vpu"
+  ) 
+  
+  # reorder columns
+  matched_paths <- dplyr::relocate(matched_paths, vpu, x, y)
+  
+  if(!is.null(base)) {
+    matched_paths$base_dir <- base
+  }
+  
+  return(matched_paths)
+  
+}
+
+# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+# flowlines: flowlines linestring sf object
+# trans: transects linestring sf object
+# waterbodies: waterbodies polygon sf object
+# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
+### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
+wb_intersects <- function(flowlines, trans, waterbodies) {
+  
+  ########  ########  ########  ########  ########  ########
+  
+  flowlines_geos <- geos::as_geos_geometry(flowlines)
+  wbs_geos <- geos::as_geos_geometry(waterbodies)
+  
+  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  
+  message("Checking flowlines against waterbodies...")
+  
+  # create an index between flowlines and waterbodies 
+  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+  
+  # remove any flowlines that cross more than 1 waterbody
+  to_keep  <- flowlines[lengths(wb_index) == 0, ]
+  to_check <- flowlines[lengths(wb_index) != 0, ]
+  
+  # subset transects to the hy_ids in "to_check" set of flowlines
+  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+  
+  # check where the transects linestrings intersect with the waterbodies
+  trans_geos_check <- geos::as_geos_geometry(trans_check)
+  
+  message("Checking transects against waterbodies (v2) ...")
+  wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos)                    # (NEW METHOD)
+  # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)])   # (OLD METHOD)
+  
+  # sum(lengths(wb_trans_index) == 0)
+  # length(wb_trans_index)
+  
+  # within the transects lines that are on a flowline that crosses a waterbody, 
+  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  trans_keep <- trans_check[lengths(wb_trans_index) == 0, ]                       # (NEW METHOD)
+  # trans_keep <- trans_check[!wb_trans_index, ]                                  # (OLD METHOD)
+  
+  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  
+  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # have transects that are NOT in the waterbody
+  to_keep <- dplyr::bind_rows(to_keep, to_check)
+  
+  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # that were determined to be valid (are being kept)
+  check_ids <- unique(trans_check$tmp_id)
+  keep_ids <- unique(trans_keep$tmp_id)
+  
+  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  valid_flowlines <- flowlines$id %in% to_keep$id
+  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  
+  # return alist of updated flowlines and transects 
+  return(
+    list(
+      "valid_flowlines" =  valid_flowlines,
+      "valid_transects" =  valid_transects
+    )
+  )
+  
+  # # within the transects lines that are on a flowline that crosses a waterbody, 
+  # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  # trans_keep <- trans_check[!trans_wb_index, ]
+  # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  # 
+  # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  # 
+  # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # # have transects that are NOT in the waterbody
+  # to_keep <- dplyr::bind_rows(to_keep, to_check)
+  # 
+  # # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # # that were determined to be valid (are being kept)
+  # check_ids <- unique(trans_check$tmp_id)
+  # keep_ids <- unique(trans_keep$tmp_id)
+  # 
+  # # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  # valid_flowlines <- flowlines$id %in% to_keep$id
+  # valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+  #                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  # 
+  # # return alist of updated flowlines and transects 
+  # return(
+  #   list(
+  #     "valid_flowlines" =  valid_flowlines,
+  #     "valid_transects" =  valid_transects
+  #   )
+  # )
+}
+
+add_intersects_ids <- function(x, y, id_col) {
+  # make sure the crs are tjhe same
+  y <- sf::st_transform(y, sf::st_crs(x))
+  
+  # Perform the intersection
+  intersections <- sf::st_intersects(x, y)
+  
+  # add the intersected values to the first dataframe
+  x[[id_col]] <- unlist(lapply(intersections, function(idx) {
+    if (length(idx) > 0) {
+      paste0(unlist(y[[id_col]][idx]), collapse = ", ")
+    } else {
+      NA
+    }
+  }))
+  
+  return(x)
+}
+
+unnest_ids <- function(ids) {
+  return(
+    unique(unlist(strsplit(unique(ids), ", ")))
+  )
+}
+
+#' Get the polygons that interesect with any of the linestring geometries
+#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes
+#' @param polygons polygon sf object. Default is NULL
+#' @param lines linestring sf object. Default is NULL.
+#'
+#' @return sf dataframe of polygons that intersect with the linestrings
+polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) {
+  
+  if (is.null(polygons)) {
+    stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries")
+  }
+  
+  if (is.null(lines)) {
+    stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries")
+  }
+  
+  # Convert the SF geometries to geos geometries
+  polygons_geos   <- geos::as_geos_geometry(polygons)
+  lines_geos      <- geos::as_geos_geometry(lines)
+  
+  # create an index between the polygons and linestrings
+  lines_index <-  geos::geos_intersects_matrix(polygons_geos, lines_geos)
+  
+  # get the polygons that have atleast 1 intersection with the 'lines'
+  polygons_with_lines <- polygons[lengths(lines_index) != 0, ]
+  
+  return(polygons_with_lines)
+}
+
+add_predicate_group_id <- function(polys, predicate) {
+  # GROUP BY SPATIAL PREDICATES
+  # ----------------------------------------- 
+  # predicate = sf::st_touches
+  # polys <- sf_df
+  # ----------------------------------------- 
+  
+  
+  relations <- predicate(polys)
+  
+  relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) })
+  
+  group_ids_map <- fastmap::fastmap()
+  ids_to_groups <- fastmap::fastmap()
+  
+  group_id <- 0
+  
+  for (i in seq_along(relations)) {
+    
+    predicate_ids <- relations[i][[1]]
+    
+    # message("(", i, ") - ", predicate_ids)
+    # message("Start Group ID: ", group_id)
+    
+    id_group_check <- ids_to_groups$has(predicate_ids)
+    
+    if(any(id_group_check)) {
+      
+      known_groups  <- ids_to_groups$mget(predicate_ids)
+      known_group   <- known_groups[unname(sapply(known_groups , function(kg) {
+        !is.null(kg)
+      }))][[1]]
+      
+      # message("IDs part of past group ID > '", known_group, "'")
+      
+      past_group_ids     <- group_ids_map$get(known_group)[[1]]
+      updated_group_ids  <- as.character(
+        sort(as.numeric(unique(c(past_group_ids, predicate_ids))))
+      )
+      
+      group_ids_map$set(known_group, list(updated_group_ids))
+      
+      new_ids <- predicate_ids[!predicate_ids %in% past_group_ids]
+      
+      # message("Adding ", new_ids, " to seen set...")
+      
+      # add any newly added IDs to the seen map
+      for (seen_id in new_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+      
+    } else {
+      # get a new group ID number
+      group_id <- group_id + 1    
+      # message("IDs form NEW group > '", group_id, "'")
+      
+      # create a new key in the map with the predicate IDs list as the value
+      group_ids_map$set(as.character(group_id), list(predicate_ids))
+      
+      # message("Adding ", predicate_ids, " to seen set...")
+      
+      # add each predicate ID to the map storing the seen indexes and their respecitve group IDs 
+      for (seen_id in predicate_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+    }
+    # message("End group ID: ", group_id, "\n") 
+  }
+  
+  group_ids   <- group_ids_map$as_list() 
+  
+  grouping_df <- lapply(seq_along(group_ids), function(i) {
+    # i = 2
+    grouping  <- group_ids[i] 
+    group_id  <- names(grouping)
+    indices   <- grouping[[1]][[1]]
+    
+    data.frame(
+      index      = as.numeric(indices),
+      group_id   = rep(group_id, length(indices))   
+    )
+    
+  }) %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::arrange(i) 
+  
+  # count up the number of IDs for each group, well use this to determine which group 
+  # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members)
+  group_id_counts <- 
+    grouping_df %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::count() %>% 
+    # dplyr::arrange(-n) %>% 
+    dplyr::ungroup()
+  
+  # select the IDs with the most other members
+  grouping_df <- 
+    grouping_df %>% 
+    dplyr::left_join(
+      group_id_counts, 
+      by = 'group_id'
+    ) %>% 
+    dplyr::group_by(index) %>% 
+    dplyr::slice_max(n, with_ties = FALSE) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-n) %>% 
+    dplyr::arrange(-index) 
+  
+  polys$group_id <- grouping_df$group_id
+  
+  return(polys)
+  
+}
+
+# TODO: DElete these NEW DOMAIN functions...
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist)
+
+# Directory tree:
+# base_dir/
+#   └── domain_dirname/
+#     ├── flowlines/
+#     ├── dem/
+#     ├── transects/
+#     ├── cross_sections/
+#     └── cs_pts/
+create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  
+  if(with_output) {
+    output_dir       <- paste0(domain_dir, "/outputs")
+  }
+  
+  # create directories
+  create_if_not_exists(domain_dir)
+  create_if_not_exists(flowlines_dir)
+  create_if_not_exists(domain_subset_dir)
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cs_pts_dir)
+  create_if_not_exists(vpu_subsets_dir)
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
+}
+
+# get path strings for a domain dir (based of a base dir and domain dirname)
+# NOTE: this does NOT guarentee that these folders exist, 
+# NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
+get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  output_dir         <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA)
+  
+  
+  # named list of file paths
+  return(
+    list(
+      base_dir           = base_dir, 
+      domain_dir         = domain_dir,
+      flowlines_dir      = flowlines_dir,
+      domain_subset_dir  = domain_subset_dir,
+      dem_dir            = dem_dir,
+      transects_dir      = transects_dir,
+      cross_sections_dir = cross_sections_dir,
+      cs_pts_dir         = cs_pts_dir,
+      vpu_subsets_dir    = vpu_subsets_dir,
+      output_dir         = output_dir
+    )
+  )
+  
+}
+
+download_3dep_vrt <- function(base_dir) {
+  
+  ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/
+  ### paste w/ `datapasta::vector_paste_vertical()`
+  ### Some manual cleaning of header and footer mess...
+  ### Reason? Un-scrapable page, and no index.gpkg...
+  
+  t <- c(
+    "0              n06e162/",
+    "0              n06e163/",
+    "0              n07e134/",
+    "0              n07e151/",
+    "0              n07e152/",
+    "0              n07e158/",
+    "0              n08e134/",
+    "0              n08e151/",
+    "0              n08e152/",
+    "0              n08e158/",
+    "0              n09e134/",
+    "0              n10e138/",
+    "0              n14e144/",
+    "0              n15e145/",
+    "0              n16e145/",
+    "0              n18w065/",
+    "0              n18w066/",
+    "0              n18w067/",
+    "0              n18w068/",
+    "0              n19w065/",
+    "0              n19w066/",
+    "0              n19w067/",
+    "0              n19w068/",
+    "0              n19w156/",
+    "0              n20w155/",
+    "0              n20w156/",
+    "0              n20w157/",
+    "0              n21w156/",
+    "0              n21w157/",
+    "0              n21w158/",
+    "0              n22w157/",
+    "0              n22w158/",
+    "0              n22w159/",
+    "0              n22w160/",
+    "0              n22w161/",
+    "0              n23w160/",
+    "0              n23w161/",
+    "0              n25w081/",
+    "0              n25w082/",
+    "0              n25w083/",
+    "0              n26w081/",
+    "0              n26w082/",
+    "0              n26w098/",
+    "0              n26w099/",
+    "0              n27w081/",
+    "0              n27w082/",
+    "0              n27w083/",
+    "0              n27w098/",
+    "0              n27w099/",
+    "0              n27w100/",
+    "0              n28w081/",
+    "0              n28w082/",
+    "0              n28w083/",
+    "0              n28w097/",
+    "0              n28w098/",
+    "0              n28w099/",
+    "0              n28w100/",
+    "0              n28w101/",
+    "0              n29w081/",
+    "0              n29w082/",
+    "0              n29w083/",
+    "0              n29w090/",
+    "0              n29w096/",
+    "0              n29w097/",
+    "0              n29w098/",
+    "0              n29w099/",
+    "0              n29w100/",
+    "0              n29w101/",
+    "0              n29w104/",
+    "0              n30w081/",
+    "0              n30w082/",
+    "0              n30w083/",
+    "0              n30w084/",
+    "0              n30w085/",
+    "0              n30w086/",
+    "0              n30w089/",
+    "0              n30w090/",
+    "0              n30w091/",
+    "0              n30w092/",
+    "0              n30w093/",
+    "0              n30w094/",
+    "0              n30w095/",
+    "0              n30w096/",
+    "0              n30w097/",
+    "0              n30w098/",
+    "0              n30w099/",
+    "0              n30w100/",
+    "0              n30w101/",
+    "0              n30w102/",
+    "0              n30w103/",
+    "0              n30w104/",
+    "0              n30w105/",
+    "0              n31w082/",
+    "0              n31w083/",
+    "0              n31w084/",
+    "0              n31w085/",
+    "0              n31w086/",
+    "0              n31w087/",
+    "0              n31w088/",
+    "0              n31w089/",
+    "0              n31w090/",
+    "0              n31w091/",
+    "0              n31w092/",
+    "0              n31w093/",
+    "0              n31w094/",
+    "0              n31w095/",
+    "0              n31w096/",
+    "0              n31w097/",
+    "0              n31w098/",
+    "0              n31w099/",
+    "0              n31w100/",
+    "0              n31w101/",
+    "0              n31w102/",
+    "0              n31w103/",
+    "0              n31w104/",
+    "0              n31w105/",
+    "0              n31w106/",
+    "0              n31w107/",
+    "0              n32w081/",
+    "0              n32w082/",
+    "0              n32w083/",
+    "0              n32w084/",
+    "0              n32w085/",
+    "0              n32w086/",
+    "0              n32w087/",
+    "0              n32w088/",
+    "0              n32w089/",
+    "0              n32w090/",
+    "0              n32w091/",
+    "0              n32w092/",
+    "0              n32w093/",
+    "0              n32w094/",
+    "0              n32w095/",
+    "0              n32w096/",
+    "0              n32w097/",
+    "0              n32w098/",
+    "0              n32w099/",
+    "0              n32w100/",
+    "0              n32w101/",
+    "0              n32w102/",
+    "0              n32w103/",
+    "0              n32w104/",
+    "0              n32w105/",
+    "0              n32w106/",
+    "0              n32w107/",
+    "0              n32w108/",
+    "0              n32w109/",
+    "0              n32w110/",
+    "0              n32w111/",
+    "0              n32w112/",
+    "0              n32w113/",
+    "0              n32w114/",
+    "0              n33w080/",
+    "0              n33w081/",
+    "0              n33w082/",
+    "0              n33w083/",
+    "0              n33w084/",
+    "0              n33w085/",
+    "0              n33w086/",
+    "0              n33w087/",
+    "0              n33w088/",
+    "0              n33w089/",
+    "0              n33w090/",
+    "0              n33w091/",
+    "0              n33w092/",
+    "0              n33w093/",
+    "0              n33w094/",
+    "0              n33w095/",
+    "0              n33w096/",
+    "0              n33w097/",
+    "0              n33w098/",
+    "0              n33w099/",
+    "0              n33w100/",
+    "0              n33w101/",
+    "0              n33w102/",
+    "0              n33w103/",
+    "0              n33w104/",
+    "0              n33w105/",
+    "0              n33w106/",
+    "0              n33w107/",
+    "0              n33w108/",
+    "0              n33w109/",
+    "0              n33w110/",
+    "0              n33w111/",
+    "0              n33w112/",
+    "0              n33w113/",
+    "0              n33w114/",
+    "0              n33w115/",
+    "0              n33w116/",
+    "0              n33w117/",
+    "0              n33w118/",
+    "0              n33w119/",
+    "0              n34w078/",
+    "0              n34w079/",
+    "0              n34w080/",
+    "0              n34w081/",
+    "0              n34w082/",
+    "0              n34w083/",
+    "0              n34w084/",
+    "0              n34w085/",
+    "0              n34w086/",
+    "0              n34w087/",
+    "0              n34w088/",
+    "0              n34w089/",
+    "0              n34w090/",
+    "0              n34w091/",
+    "0              n34w092/",
+    "0              n34w093/",
+    "0              n34w094/",
+    "0              n34w095/",
+    "0              n34w096/",
+    "0              n34w097/",
+    "0              n34w098/",
+    "0              n34w099/",
+    "0              n34w100/",
+    "0              n34w101/",
+    "0              n34w102/",
+    "0              n34w103/",
+    "0              n34w104/",
+    "0              n34w105/",
+    "0              n34w106/",
+    "0              n34w107/",
+    "0              n34w108/",
+    "0              n34w109/",
+    "0              n34w110/",
+    "0              n34w111/",
+    "0              n34w112/",
+    "0              n34w113/",
+    "0              n34w114/",
+    "0              n34w115/",
+    "0              n34w116/",
+    "0              n34w117/",
+    "0              n34w118/",
+    "0              n34w119/",
+    "0              n34w120/",
+    "0              n34w121/",
+    "0              n35w076/",
+    "0              n35w077/",
+    "0              n35w078/",
+    "0              n35w079/",
+    "0              n35w080/",
+    "0              n35w081/",
+    "0              n35w082/",
+    "0              n35w083/",
+    "0              n35w084/",
+    "0              n35w085/",
+    "0              n35w086/",
+    "0              n35w087/",
+    "0              n35w088/",
+    "0              n35w089/",
+    "0              n35w090/",
+    "0              n35w091/",
+    "0              n35w092/",
+    "0              n35w093/",
+    "0              n35w094/",
+    "0              n35w095/",
+    "0              n35w096/",
+    "0              n35w097/",
+    "0              n35w098/",
+    "0              n35w099/",
+    "0              n35w100/",
+    "0              n35w101/",
+    "0              n35w102/",
+    "0              n35w103/",
+    "0              n35w104/",
+    "0              n35w105/",
+    "0              n35w106/",
+    "0              n35w107/",
+    "0              n35w108/",
+    "0              n35w109/",
+    "0              n35w110/",
+    "0              n35w111/",
+    "0              n35w112/",
+    "0              n35w113/",
+    "0              n35w114/",
+    "0              n35w115/",
+    "0              n35w116/",
+    "0              n35w117/",
+    "0              n35w118/",
+    "0              n35w119/",
+    "0              n35w120/",
+    "0              n35w121/",
+    "0              n36w076/",
+    "0              n36w077/",
+    "0              n36w078/",
+    "0              n36w079/",
+    "0              n36w080/",
+    "0              n36w081/",
+    "0              n36w082/",
+    "0              n36w083/",
+    "0              n36w084/",
+    "0              n36w085/",
+    "0              n36w086/",
+    "0              n36w087/",
+    "0              n36w088/",
+    "0              n36w089/",
+    "0              n36w090/",
+    "0              n36w091/",
+    "0              n36w092/",
+    "0              n36w093/",
+    "0              n36w094/",
+    "0              n36w095/",
+    "0              n36w096/",
+    "0              n36w097/",
+    "0              n36w098/",
+    "0              n36w099/",
+    "0              n36w100/",
+    "0              n36w101/",
+    "0              n36w102/",
+    "0              n36w103/",
+    "0              n36w104/",
+    "0              n36w105/",
+    "0              n36w106/",
+    "0              n36w107/",
+    "0              n36w108/",
+    "0              n36w109/",
+    "0              n36w110/",
+    "0              n36w111/",
+    "0              n36w112/",
+    "0              n36w113/",
+    "0              n36w114/",
+    "0              n36w115/",
+    "0              n36w116/",
+    "0              n36w117/",
+    "0              n36w118/",
+    "0              n36w119/",
+    "0              n36w120/",
+    "0              n36w121/",
+    "0              n36w122/",
+    "0              n37w076/",
+    "0              n37w077/",
+    "0              n37w078/",
+    "0              n37w079/",
+    "0              n37w080/",
+    "0              n37w081/",
+    "0              n37w082/",
+    "0              n37w083/",
+    "0              n37w084/",
+    "0              n37w085/",
+    "0              n37w086/",
+    "0              n37w087/",
+    "0              n37w088/",
+    "0              n37w089/",
+    "0              n37w090/",
+    "0              n37w091/",
+    "0              n37w092/",
+    "0              n37w093/",
+    "0              n37w094/",
+    "0              n37w095/",
+    "0              n37w096/",
+    "0              n37w097/",
+    "0              n37w098/",
+    "0              n37w099/",
+    "0              n37w100/",
+    "0              n37w101/",
+    "0              n37w102/",
+    "0              n37w103/",
+    "0              n37w104/",
+    "0              n37w105/",
+    "0              n37w106/",
+    "0              n37w107/",
+    "0              n37w108/",
+    "0              n37w109/",
+    "0              n37w110/",
+    "0              n37w111/",
+    "0              n37w112/",
+    "0              n37w113/",
+    "0              n37w114/",
+    "0              n37w115/",
+    "0              n37w116/",
+    "0              n37w117/",
+    "0              n37w118/",
+    "0              n37w119/",
+    "0              n37w120/",
+    "0              n37w121/",
+    "0              n37w122/",
+    "0              n37w123/",
+    "0              n38w076/",
+    "0              n38w077/",
+    "0              n38w078/",
+    "0              n38w079/",
+    "0              n38w080/",
+    "0              n38w081/",
+    "0              n38w082/",
+    "0              n38w083/",
+    "0              n38w084/",
+    "0              n38w085/",
+    "0              n38w086/",
+    "0              n38w087/",
+    "0              n38w088/",
+    "0              n38w089/",
+    "0              n38w090/",
+    "0              n38w091/",
+    "0              n38w092/",
+    "0              n38w093/",
+    "0              n38w094/",
+    "0              n38w095/",
+    "0              n38w096/",
+    "0              n38w097/",
+    "0              n38w098/",
+    "0              n38w099/",
+    "0              n38w100/",
+    "0              n38w101/",
+    "0              n38w102/",
+    "0              n38w103/",
+    "0              n38w104/",
+    "0              n38w105/",
+    "0              n38w106/",
+    "0              n38w107/",
+    "0              n38w108/",
+    "0              n38w109/",
+    "0              n38w110/",
+    "0              n38w111/",
+    "0              n38w112/",
+    "0              n38w113/",
+    "0              n38w114/",
+    "0              n38w115/",
+    "0              n38w116/",
+    "0              n38w117/",
+    "0              n38w118/",
+    "0              n38w119/",
+    "0              n38w120/",
+    "0              n38w121/",
+    "0              n38w122/",
+    "0              n38w123/",
+    "0              n38w124/",
+    "0              n39w075/",
+    "0              n39w076/",
+    "0              n39w077/",
+    "0              n39w078/",
+    "0              n39w079/",
+    "0              n39w080/",
+    "0              n39w081/",
+    "0              n39w082/",
+    "0              n39w083/",
+    "0              n39w084/",
+    "0              n39w085/",
+    "0              n39w086/",
+    "0              n39w087/",
+    "0              n39w088/",
+    "0              n39w089/",
+    "0              n39w090/",
+    "0              n39w091/",
+    "0              n39w092/",
+    "0              n39w093/",
+    "0              n39w094/",
+    "0              n39w095/",
+    "0              n39w096/",
+    "0              n39w097/",
+    "0              n39w098/",
+    "0              n39w099/",
+    "0              n39w100/",
+    "0              n39w101/",
+    "0              n39w102/",
+    "0              n39w103/",
+    "0              n39w104/",
+    "0              n39w105/",
+    "0              n39w106/",
+    "0              n39w107/",
+    "0              n39w108/",
+    "0              n39w109/",
+    "0              n39w110/",
+    "0              n39w111/",
+    "0              n39w112/",
+    "0              n39w113/",
+    "0              n39w114/",
+    "0              n39w115/",
+    "0              n39w116/",
+    "0              n39w117/",
+    "0              n39w118/",
+    "0              n39w119/",
+    "0              n39w120/",
+    "0              n39w121/",
+    "0              n39w122/",
+    "0              n39w123/",
+    "0              n39w124/",
+    "0              n40w075/",
+    "0              n40w076/",
+    "0              n40w077/",
+    "0              n40w078/",
+    "0              n40w079/",
+    "0              n40w080/",
+    "0              n40w081/",
+    "0              n40w082/",
+    "0              n40w083/",
+    "0              n40w084/",
+    "0              n40w085/",
+    "0              n40w086/",
+    "0              n40w087/",
+    "0              n40w088/",
+    "0              n40w089/",
+    "0              n40w090/",
+    "0              n40w091/",
+    "0              n40w092/",
+    "0              n40w093/",
+    "0              n40w094/",
+    "0              n40w095/",
+    "0              n40w096/",
+    "0              n40w097/",
+    "0              n40w098/",
+    "0              n40w099/",
+    "0              n40w100/",
+    "0              n40w101/",
+    "0              n40w102/",
+    "0              n40w103/",
+    "0              n40w104/",
+    "0              n40w105/",
+    "0              n40w106/",
+    "0              n40w107/",
+    "0              n40w108/",
+    "0              n40w109/",
+    "0              n40w110/",
+    "0              n40w111/",
+    "0              n40w112/",
+    "0              n40w113/",
+    "0              n40w114/",
+    "0              n40w115/",
+    "0              n40w116/",
+    "0              n40w117/",
+    "0              n40w118/",
+    "0              n40w119/",
+    "0              n40w120/",
+    "0              n40w121/",
+    "0              n40w122/",
+    "0              n40w123/",
+    "0              n40w124/",
+    "0              n40w125/",
+    "0              n41w073/",
+    "0              n41w074/",
+    "0              n41w075/",
+    "0              n41w076/",
+    "0              n41w077/",
+    "0              n41w078/",
+    "0              n41w079/",
+    "0              n41w080/",
+    "0              n41w081/",
+    "0              n41w082/",
+    "0              n41w083/",
+    "0              n41w084/",
+    "0              n41w085/",
+    "0              n41w086/",
+    "0              n41w087/",
+    "0              n41w088/",
+    "0              n41w089/",
+    "0              n41w090/",
+    "0              n41w091/",
+    "0              n41w092/",
+    "0              n41w093/",
+    "0              n41w094/",
+    "0              n41w095/",
+    "0              n41w096/",
+    "0              n41w097/",
+    "0              n41w098/",
+    "0              n41w099/",
+    "0              n41w100/",
+    "0              n41w101/",
+    "0              n41w102/",
+    "0              n41w103/",
+    "0              n41w104/",
+    "0              n41w105/",
+    "0              n41w106/",
+    "0              n41w107/",
+    "0              n41w108/",
+    "0              n41w109/",
+    "0              n41w110/",
+    "0              n41w111/",
+    "0              n41w112/",
+    "0              n41w113/",
+    "0              n41w114/",
+    "0              n41w115/",
+    "0              n41w116/",
+    "0              n41w117/",
+    "0              n41w118/",
+    "0              n41w119/",
+    "0              n41w120/",
+    "0              n41w121/",
+    "0              n41w122/",
+    "0              n41w123/",
+    "0              n41w124/",
+    "0              n41w125/",
+    "0              n42w070/",
+    "0              n42w071/",
+    "0              n42w072/",
+    "0              n42w073/",
+    "0              n42w074/",
+    "0              n42w075/",
+    "0              n42w076/",
+    "0              n42w077/",
+    "0              n42w078/",
+    "0              n42w079/",
+    "0              n42w080/",
+    "0              n42w081/",
+    "0              n42w082/",
+    "0              n42w083/",
+    "0              n42w084/",
+    "0              n42w085/",
+    "0              n42w086/",
+    "0              n42w087/",
+    "0              n42w088/",
+    "0              n42w089/",
+    "0              n42w090/",
+    "0              n42w091/",
+    "0              n42w092/",
+    "0              n42w093/",
+    "0              n42w094/",
+    "0              n42w095/",
+    "0              n42w096/",
+    "0              n42w097/",
+    "0              n42w098/",
+    "0              n42w099/",
+    "0              n42w100/",
+    "0              n42w101/",
+    "0              n42w102/",
+    "0              n42w103/",
+    "0              n42w104/",
+    "0              n42w105/",
+    "0              n42w106/",
+    "0              n42w107/",
+    "0              n42w108/",
+    "0              n42w109/",
+    "0              n42w110/",
+    "0              n42w111/",
+    "0              n42w112/",
+    "0              n42w113/",
+    "0              n42w114/",
+    "0              n42w115/",
+    "0              n42w116/",
+    "0              n42w117/",
+    "0              n42w118/",
+    "0              n42w119/",
+    "0              n42w120/",
+    "0              n42w121/",
+    "0              n42w122/",
+    "0              n42w123/",
+    "0              n42w124/",
+    "0              n42w125/",
+    "0              n43w071/",
+    "0              n43w072/",
+    "0              n43w073/",
+    "0              n43w074/",
+    "0              n43w075/",
+    "0              n43w076/",
+    "0              n43w077/",
+    "0              n43w078/",
+    "0              n43w079/",
+    "0              n43w080/",
+    "0              n43w081/",
+    "0              n43w082/",
+    "0              n43w083/",
+    "0              n43w084/",
+    "0              n43w085/",
+    "0              n43w086/",
+    "0              n43w087/",
+    "0              n43w088/",
+    "0              n43w089/",
+    "0              n43w090/",
+    "0              n43w091/",
+    "0              n43w092/",
+    "0              n43w093/",
+    "0              n43w094/",
+    "0              n43w095/",
+    "0              n43w096/",
+    "0              n43w097/",
+    "0              n43w098/",
+    "0              n43w099/",
+    "0              n43w100/",
+    "0              n43w101/",
+    "0              n43w102/",
+    "0              n43w103/",
+    "0              n43w104/",
+    "0              n43w105/",
+    "0              n43w106/",
+    "0              n43w107/",
+    "0              n43w108/",
+    "0              n43w109/",
+    "0              n43w110/",
+    "0              n43w111/",
+    "0              n43w112/",
+    "0              n43w113/",
+    "0              n43w114/",
+    "0              n43w115/",
+    "0              n43w116/",
+    "0              n43w117/",
+    "0              n43w118/",
+    "0              n43w119/",
+    "0              n43w120/",
+    "0              n43w121/",
+    "0              n43w122/",
+    "0              n43w123/",
+    "0              n43w124/",
+    "0              n43w125/",
+    "0              n44w069/",
+    "0              n44w070/",
+    "0              n44w071/",
+    "0              n44w072/",
+    "0              n44w073/",
+    "0              n44w074/",
+    "0              n44w075/",
+    "0              n44w076/",
+    "0              n44w077/",
+    "0              n44w078/",
+    "0              n44w079/",
+    "0              n44w080/",
+    "0              n44w081/",
+    "0              n44w083/",
+    "0              n44w084/",
+    "0              n44w085/",
+    "0              n44w086/",
+    "0              n44w087/",
+    "0              n44w088/",
+    "0              n44w089/",
+    "0              n44w090/",
+    "0              n44w091/",
+    "0              n44w092/",
+    "0              n44w093/",
+    "0              n44w094/",
+    "0              n44w095/",
+    "0              n44w096/",
+    "0              n44w097/",
+    "0              n44w098/",
+    "0              n44w099/",
+    "0              n44w100/",
+    "0              n44w101/",
+    "0              n44w102/",
+    "0              n44w103/",
+    "0              n44w104/",
+    "0              n44w105/",
+    "0              n44w106/",
+    "0              n44w107/",
+    "0              n44w108/",
+    "0              n44w109/",
+    "0              n44w110/",
+    "0              n44w111/",
+    "0              n44w112/",
+    "0              n44w113/",
+    "0              n44w114/",
+    "0              n44w115/",
+    "0              n44w116/",
+    "0              n44w117/",
+    "0              n44w118/",
+    "0              n44w119/",
+    "0              n44w120/",
+    "0              n44w121/",
+    "0              n44w122/",
+    "0              n44w123/",
+    "0              n44w124/",
+    "0              n44w125/",
+    "0              n45w067/",
+    "0              n45w068/",
+    "0              n45w069/",
+    "0              n45w070/",
+    "0              n45w071/",
+    "0              n45w072/",
+    "0              n45w073/",
+    "0              n45w074/",
+    "0              n45w075/",
+    "0              n45w076/",
+    "0              n45w077/",
+    "0              n45w083/",
+    "0              n45w084/",
+    "0              n45w085/",
+    "0              n45w086/",
+    "0              n45w087/",
+    "0              n45w088/",
+    "0              n45w089/",
+    "0              n45w090/",
+    "0              n45w091/",
+    "0              n45w092/",
+    "0              n45w093/",
+    "0              n45w094/",
+    "0              n45w095/",
+    "0              n45w096/",
+    "0              n45w097/",
+    "0              n45w098/",
+    "0              n45w099/",
+    "0              n45w100/",
+    "0              n45w101/",
+    "0              n45w102/",
+    "0              n45w103/",
+    "0              n45w104/",
+    "0              n45w105/",
+    "0              n45w106/",
+    "0              n45w107/",
+    "0              n45w108/",
+    "0              n45w109/",
+    "0              n45w110/",
+    "0              n45w111/",
+    "0              n45w112/",
+    "0              n45w113/",
+    "0              n45w114/",
+    "0              n45w115/",
+    "0              n45w116/",
+    "0              n45w117/",
+    "0              n45w118/",
+    "0              n45w119/",
+    "0              n45w120/",
+    "0              n45w121/",
+    "0              n45w122/",
+    "0              n45w123/",
+    "0              n45w124/",
+    "0              n45w125/",
+    "0              n46w068/",
+    "0              n46w069/",
+    "0              n46w070/",
+    "0              n46w071/",
+    "0              n46w072/",
+    "0              n46w073/",
+    "0              n46w074/",
+    "0              n46w075/",
+    "0              n46w084/",
+    "0              n46w085/",
+    "0              n46w086/",
+    "0              n46w087/",
+    "0              n46w088/",
+    "0              n46w089/",
+    "0              n46w090/",
+    "0              n46w091/",
+    "0              n46w092/",
+    "0              n46w093/",
+    "0              n46w094/",
+    "0              n46w095/",
+    "0              n46w096/",
+    "0              n46w097/",
+    "0              n46w098/",
+    "0              n46w099/",
+    "0              n46w100/",
+    "0              n46w101/",
+    "0              n46w102/",
+    "0              n46w103/",
+    "0              n46w104/",
+    "0              n46w105/",
+    "0              n46w106/",
+    "0              n46w107/",
+    "0              n46w108/",
+    "0              n46w109/",
+    "0              n46w110/",
+    "0              n46w111/",
+    "0              n46w112/",
+    "0              n46w113/",
+    "0              n46w114/",
+    "0              n46w115/",
+    "0              n46w116/",
+    "0              n46w117/",
+    "0              n46w118/",
+    "0              n46w119/",
+    "0              n46w120/",
+    "0              n46w121/",
+    "0              n46w122/",
+    "0              n46w123/",
+    "0              n46w124/",
+    "0              n46w125/",
+    "0              n47w068/",
+    "0              n47w069/",
+    "0              n47w070/",
+    "0              n47w071/",
+    "0              n47w084/",
+    "0              n47w085/",
+    "0              n47w086/",
+    "0              n47w087/",
+    "0              n47w088/",
+    "0              n47w089/",
+    "0              n47w090/",
+    "0              n47w091/",
+    "0              n47w092/",
+    "0              n47w093/",
+    "0              n47w094/",
+    "0              n47w095/",
+    "0              n47w096/",
+    "0              n47w097/",
+    "0              n47w098/",
+    "0              n47w099/",
+    "0              n47w100/",
+    "0              n47w101/",
+    "0              n47w102/",
+    "0              n47w103/",
+    "0              n47w104/",
+    "0              n47w105/",
+    "0              n47w106/",
+    "0              n47w107/",
+    "0              n47w108/",
+    "0              n47w109/",
+    "0              n47w110/",
+    "0              n47w111/",
+    "0              n47w112/",
+    "0              n47w113/",
+    "0              n47w114/",
+    "0              n47w115/",
+    "0              n47w116/",
+    "0              n47w117/",
+    "0              n47w118/",
+    "0              n47w119/",
+    "0              n47w120/",
+    "0              n47w121/",
+    "0              n47w122/",
+    "0              n47w123/",
+    "0              n47w124/",
+    "0              n47w125/",
+    "0              n48w068/",
+    "0              n48w069/",
+    "0              n48w070/",
+    "0              n48w087/",
+    "0              n48w088/",
+    "0              n48w089/",
+    "0              n48w090/",
+    "0              n48w091/",
+    "0              n48w092/",
+    "0              n48w093/",
+    "0              n48w094/",
+    "0              n48w095/",
+    "0              n48w096/",
+    "0              n48w097/",
+    "0              n48w098/",
+    "0              n48w099/",
+    "0              n48w100/",
+    "0              n48w101/",
+    "0              n48w102/",
+    "0              n48w103/",
+    "0              n48w104/",
+    "0              n48w105/",
+    "0              n48w106/",
+    "0              n48w107/",
+    "0              n48w108/",
+    "0              n48w109/",
+    "0              n48w110/",
+    "0              n48w111/",
+    "0              n48w112/",
+    "0              n48w113/",
+    "0              n48w114/",
+    "0              n48w115/",
+    "0              n48w116/",
+    "0              n48w117/",
+    "0              n48w118/",
+    "0              n48w119/",
+    "0              n48w120/",
+    "0              n48w121/",
+    "0              n48w122/",
+    "0              n48w123/",
+    "0              n48w124/",
+    "0              n48w125/",
+    "0              n49w089/",
+    "0              n49w090/",
+    "0              n49w091/",
+    "0              n49w092/",
+    "0              n49w093/",
+    "0              n49w094/",
+    "0              n49w095/",
+    "0              n49w096/",
+    "0              n49w097/",
+    "0              n49w098/",
+    "0              n49w099/",
+    "0              n49w100/",
+    "0              n49w101/",
+    "0              n49w102/",
+    "0              n49w103/",
+    "0              n49w104/",
+    "0              n49w105/",
+    "0              n49w106/",
+    "0              n49w107/",
+    "0              n49w108/",
+    "0              n49w109/",
+    "0              n49w110/",
+    "0              n49w111/",
+    "0              n49w112/",
+    "0              n49w113/",
+    "0              n49w114/",
+    "0              n49w115/",
+    "0              n49w116/",
+    "0              n49w117/",
+    "0              n49w118/",
+    "0              n49w119/",
+    "0              n49w120/",
+    "0              n49w121/",
+    "0              n49w122/",
+    "0              n49w123/",
+    "0              n49w124/",
+    "0              n49w125/",
+    "0              n50w095/",
+    "0              n50w096/",
+    "0              n50w097/",
+    "0              n50w098/",
+    "0              n50w099/",
+    "0              n50w100/",
+    "0              n50w101/",
+    "0              n50w107/",
+    "0              n50w108/",
+    "0              n50w122/",
+    "0              n50w123/",
+    "0              n50w124/",
+    "0              n52e177/",
+    "0              n52e178/",
+    "0              n52e179/",
+    "0              n52w174/",
+    "0              n52w176/",
+    "0              n52w177/",
+    "0              n52w178/",
+    "0              n52w179/",
+    "0              n52w180/",
+    "0              n53e172/",
+    "0              n53e173/",
+    "0              n53e174/",
+    "0              n53e175/",
+    "0              n53e177/",
+    "0              n53e178/",
+    "0              n53e179/",
+    "0              n53w169/",
+    "0              n53w170/",
+    "0              n53w171/",
+    "0              n53w172/",
+    "0              n53w173/",
+    "0              n53w174/",
+    "0              n53w175/",
+    "0              n53w176/",
+    "0              n53w177/",
+    "0              n54e172/",
+    "0              n54e173/",
+    "0              n54w167/",
+    "0              n54w168/",
+    "0              n54w169/",
+    "0              n54w170/",
+    "0              n55w131/",
+    "0              n55w132/",
+    "0              n55w133/",
+    "0              n55w134/",
+    "0              n55w160/",
+    "0              n55w161/",
+    "0              n55w162/",
+    "0              n55w163/",
+    "0              n55w164/",
+    "0              n55w165/",
+    "0              n55w166/",
+    "0              n55w167/",
+    "0              n56w130/",
+    "0              n56w131/",
+    "0              n56w132/",
+    "0              n56w133/",
+    "0              n56w134/",
+    "0              n56w135/",
+    "0              n56w156/",
+    "0              n56w157/",
+    "0              n56w159/",
+    "0              n56w160/",
+    "0              n56w161/",
+    "0              n56w162/",
+    "0              n56w163/",
+    "0              n56w164/",
+    "0              n57w131/",
+    "0              n57w132/",
+    "0              n57w133/",
+    "0              n57w134/",
+    "0              n57w135/",
+    "0              n57w136/",
+    "0              n57w153/",
+    "0              n57w154/",
+    "0              n57w155/",
+    "0              n57w156/",
+    "0              n57w157/",
+    "0              n57w158/",
+    "0              n57w159/",
+    "0              n57w160/",
+    "0              n57w161/",
+    "0              n57w162/",
+    "0              n57w170/",
+    "0              n57w171/",
+    "0              n58w133/",
+    "0              n58w134/",
+    "0              n58w135/",
+    "0              n58w136/",
+    "0              n58w137/",
+    "0              n58w153/",
+    "0              n58w154/",
+    "0              n58w155/",
+    "0              n58w156/",
+    "0              n58w157/",
+    "0              n58w158/",
+    "0              n58w159/",
+    "0              n58w170/",
+    "0              n58w171/",
+    "0              n59w134/",
+    "0              n59w135/",
+    "0              n59w136/",
+    "0              n59w137/",
+    "0              n59w138/",
+    "0              n59w139/",
+    "0              n59w152/",
+    "0              n59w153/",
+    "0              n59w154/",
+    "0              n59w155/",
+    "0              n59w156/",
+    "0              n59w157/",
+    "0              n59w158/",
+    "0              n59w159/",
+    "0              n59w160/",
+    "0              n59w161/",
+    "0              n59w162/",
+    "0              n59w163/",
+    "0              n60w135/",
+    "0              n60w136/",
+    "0              n60w137/",
+    "0              n60w138/",
+    "0              n60w139/",
+    "0              n60w140/",
+    "0              n60w141/",
+    "0              n60w142/",
+    "0              n60w143/",
+    "0              n60w144/",
+    "0              n60w145/",
+    "0              n60w146/",
+    "0              n60w147/",
+    "0              n60w148/",
+    "0              n60w149/",
+    "0              n60w150/",
+    "0              n60w151/",
+    "0              n60w152/",
+    "0              n60w153/",
+    "0              n60w154/",
+    "0              n60w155/",
+    "0              n60w156/",
+    "0              n60w157/",
+    "0              n60w158/",
+    "0              n60w159/",
+    "0              n60w160/",
+    "0              n60w161/",
+    "0              n60w162/",
+    "0              n60w163/",
+    "0              n60w164/",
+    "0              n60w165/",
+    "0              n60w166/",
+    "0              n60w167/",
+    "0              n60w168/",
+    "0              n61w140/",
+    "0              n61w141/",
+    "0              n61w142/",
+    "0              n61w143/",
+    "0              n61w144/",
+    "0              n61w145/",
+    "0              n61w146/",
+    "0              n61w147/",
+    "0              n61w148/",
+    "0              n61w149/",
+    "0              n61w150/",
+    "0              n61w151/",
+    "0              n61w152/",
+    "0              n61w153/",
+    "0              n61w154/",
+    "0              n61w155/",
+    "0              n61w156/",
+    "0              n61w157/",
+    "0              n61w158/",
+    "0              n61w159/",
+    "0              n61w160/",
+    "0              n61w161/",
+    "0              n61w162/",
+    "0              n61w163/",
+    "0              n61w164/",
+    "0              n61w165/",
+    "0              n61w166/",
+    "0              n61w167/",
+    "0              n61w168/",
+    "0              n61w173/",
+    "0              n61w174/",
+    "0              n62w142/",
+    "0              n62w143/",
+    "0              n62w144/",
+    "0              n62w145/",
+    "0              n62w146/",
+    "0              n62w147/",
+    "0              n62w148/",
+    "0              n62w149/",
+    "0              n62w150/",
+    "0              n62w151/",
+    "0              n62w152/",
+    "0              n62w153/",
+    "0              n62w154/",
+    "0              n62w155/",
+    "0              n62w156/",
+    "0              n62w157/",
+    "0              n62w158/",
+    "0              n62w159/",
+    "0              n62w160/",
+    "0              n62w161/",
+    "0              n62w162/",
+    "0              n62w163/",
+    "0              n62w164/",
+    "0              n62w165/",
+    "0              n62w166/",
+    "0              n62w167/",
+    "0              n63w142/",
+    "0              n63w143/",
+    "0              n63w144/",
+    "0              n63w145/",
+    "0              n63w146/",
+    "0              n63w147/",
+    "0              n63w148/",
+    "0              n63w149/",
+    "0              n63w150/",
+    "0              n63w151/",
+    "0              n63w152/",
+    "0              n63w153/",
+    "0              n63w154/",
+    "0              n63w155/",
+    "0              n63w156/",
+    "0              n63w157/",
+    "0              n63w158/",
+    "0              n63w159/",
+    "0              n63w160/",
+    "0              n63w161/",
+    "0              n63w162/",
+    "0              n63w163/",
+    "0              n63w164/",
+    "0              n63w165/",
+    "0              n63w166/",
+    "0              n63w167/",
+    "0              n63w170/",
+    "0              n64w141/",
+    "0              n64w142/",
+    "0              n64w143/",
+    "0              n64w144/",
+    "0              n64w145/",
+    "0              n64w146/",
+    "0              n64w147/",
+    "0              n64w148/",
+    "0              n64w149/",
+    "0              n64w150/",
+    "0              n64w151/",
+    "0              n64w152/",
+    "0              n64w153/",
+    "0              n64w154/",
+    "0              n64w155/",
+    "0              n64w156/",
+    "0              n64w157/",
+    "0              n64w158/",
+    "0              n64w159/",
+    "0              n64w160/",
+    "0              n64w161/",
+    "0              n64w162/",
+    "0              n64w163/",
+    "0              n64w164/",
+    "0              n64w165/",
+    "0              n64w169/",
+    "0              n64w170/",
+    "0              n64w171/",
+    "0              n64w172/",
+    "0              n65w141/",
+    "0              n65w142/",
+    "0              n65w143/",
+    "0              n65w144/",
+    "0              n65w145/",
+    "0              n65w146/",
+    "0              n65w147/",
+    "0              n65w148/",
+    "0              n65w149/",
+    "0              n65w150/",
+    "0              n65w151/",
+    "0              n65w152/",
+    "0              n65w153/",
+    "0              n65w154/",
+    "0              n65w155/",
+    "0              n65w156/",
+    "0              n65w157/",
+    "0              n65w158/",
+    "0              n65w159/",
+    "0              n65w160/",
+    "0              n65w161/",
+    "0              n65w162/",
+    "0              n65w163/",
+    "0              n65w164/",
+    "0              n65w165/",
+    "0              n65w166/",
+    "0              n65w167/",
+    "0              n66w141/",
+    "0              n66w142/",
+    "0              n66w143/",
+    "0              n66w144/",
+    "0              n66w145/",
+    "0              n66w146/",
+    "0              n66w147/",
+    "0              n66w148/",
+    "0              n66w149/",
+    "0              n66w150/",
+    "0              n66w151/",
+    "0              n66w152/",
+    "0              n66w153/",
+    "0              n66w154/",
+    "0              n66w155/",
+    "0              n66w156/",
+    "0              n66w157/",
+    "0              n66w158/",
+    "0              n66w159/",
+    "0              n66w160/",
+    "0              n66w161/",
+    "0              n66w162/",
+    "0              n66w163/",
+    "0              n66w164/",
+    "0              n66w165/",
+    "0              n66w166/",
+    "0              n66w167/",
+    "0              n66w168/",
+    "0              n66w169/",
+    "0              n67w141/",
+    "0              n67w142/",
+    "0              n67w143/",
+    "0              n67w144/",
+    "0              n67w145/",
+    "0              n67w146/",
+    "0              n67w147/",
+    "0              n67w148/",
+    "0              n67w149/",
+    "0              n67w150/",
+    "0              n67w151/",
+    "0              n67w152/",
+    "0              n67w153/",
+    "0              n67w154/",
+    "0              n67w155/",
+    "0              n67w156/",
+    "0              n67w157/",
+    "0              n67w158/",
+    "0              n67w159/",
+    "0              n67w160/",
+    "0              n67w161/",
+    "0              n67w162/",
+    "0              n67w163/",
+    "0              n67w164/",
+    "0              n67w165/",
+    "0              n67w166/",
+    "0              n67w167/",
+    "0              n67w168/",
+    "0              n68w141/",
+    "0              n68w142/",
+    "0              n68w143/",
+    "0              n68w144/",
+    "0              n68w145/",
+    "0              n68w146/",
+    "0              n68w147/",
+    "0              n68w148/",
+    "0              n68w149/",
+    "0              n68w150/",
+    "0              n68w151/",
+    "0              n68w152/",
+    "0              n68w153/",
+    "0              n68w154/",
+    "0              n68w155/",
+    "0              n68w156/",
+    "0              n68w157/",
+    "0              n68w158/",
+    "0              n68w159/",
+    "0              n68w160/",
+    "0              n68w161/",
+    "0              n68w162/",
+    "0              n68w163/",
+    "0              n68w164/",
+    "0              n68w165/",
+    "0              n68w166/",
+    "0              n69w141/",
+    "0              n69w142/",
+    "0              n69w143/",
+    "0              n69w144/",
+    "0              n69w145/",
+    "0              n69w146/",
+    "0              n69w147/",
+    "0              n69w148/",
+    "0              n69w149/",
+    "0              n69w150/",
+    "0              n69w151/",
+    "0              n69w152/",
+    "0              n69w153/",
+    "0              n69w154/",
+    "0              n69w155/",
+    "0              n69w156/",
+    "0              n69w157/",
+    "0              n69w158/",
+    "0              n69w159/",
+    "0              n69w160/",
+    "0              n69w161/",
+    "0              n69w162/",
+    "0              n69w163/",
+    "0              n69w164/",
+    "0              n69w165/",
+    "0              n69w166/",
+    "0              n69w167/",
+    "0              n70w141/",
+    "0              n70w142/",
+    "0              n70w143/",
+    "0              n70w144/",
+    "0              n70w145/",
+    "0              n70w146/",
+    "0              n70w147/",
+    "0              n70w148/",
+    "0              n70w149/",
+    "0              n70w150/",
+    "0              n70w151/",
+    "0              n70w152/",
+    "0              n70w153/",
+    "0              n70w154/",
+    "0              n70w155/",
+    "0              n70w156/",
+    "0              n70w157/",
+    "0              n70w158/",
+    "0              n70w159/",
+    "0              n70w160/",
+    "0              n70w161/",
+    "0              n70w162/",
+    "0              n70w163/",
+    "0              n70w164/",
+    "0              n70w165/",
+    "0              n71w143/",
+    "0              n71w144/",
+    "0              n71w145/",
+    "0              n71w146/",
+    "0              n71w147/",
+    "0              n71w148/",
+    "0              n71w149/",
+    "0              n71w150/",
+    "0              n71w151/",
+    "0              n71w152/",
+    "0              n71w153/",
+    "0              n71w154/",
+    "0              n71w155/",
+    "0              n71w156/",
+    "0              n71w157/",
+    "0              n71w158/",
+    "0              n71w159/",
+    "0              n71w160/",
+    "0              n71w161/",
+    "0              n71w162/",
+    "0              n71w163/",
+    "0              n71w164/",
+    "0              n72w155/",
+    "0              n72w156/",
+    "0              n72w157/",
+    "0              n72w158/",
+    "0              s14w170/",
+    "0              s14w171/"
+  )
+  
+  
+  # sub out HTML copy pattern with vsi URL
+  t2 <- gsub("0              ",
+             "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/",
+             t)
+  
+  # add file paths following NED scheme
+  t3 <- paste0(t2,
+               "USGS_13_",
+               basename(t2),
+               ".tif")
+  
+  # Write table to data-raw
+  write.table(t3,
+              paste0(base_dir, "/ned_list_USGS_13.txt"),
+              row.names = FALSE,
+              col.names = FALSE,
+              quote = FALSE)
+  
+  
+}
+# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem"
+# download_3dep_vrt(dem_base_dir)
+# 
+# # Create meta data object of three NED resoruces
+# ned <- data.frame(rbind(
+#   # c(id        = "USGS_1",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1",
+#   #   varname   = "30m elevation",
+#   #   long_name = "30m (1 arcsec) National Elevation Dataset",
+#   #   units     = "m"),
+#   # 
+#   # c(id        = "USGS_2",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2",
+#   #   varname   = "60m elevation",
+#   #   long_name = "60m (2 arcsec) National Elevation Dataset Alaska",
+#   #   units     = "m"),
+#   
+#   c(id        = "USGS_13",
+#     URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13",
+#     varname   = "10m elevation",
+#     long_name = "10m (1/3th arcsec) National Elevation Dataset",
+#     units     = "m")
+# ))
+# 
+# 
+# 
+# 
+# # Loop over the three resolutions
+# for (i in 1:length(ned)) {
+#   i = 1
+#   
+#   # Define output text file path
+#   txt_file  <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt")
+#   
+#   # Define output VRT path
+#   vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt"))
+#   
+#   # If VRT does NOT exist, build VRT
+#   if (!file.exists(vrt_file)) {
+#     
+#     # read the corresponding index.gpkg
+#     files <- sf::read_sf(ned$domain_url[i])
+#     DEM_UR
+#     DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     # Build full HTTPS paths to "./current/"
+#     files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location)))
+#     
+#     files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     
+#     # write list of files to text file
+#     write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
+#     
+#     # build VRT from text file input using GDAL system call ...
+#     system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file))
+#   }
+#   
+#   logger::log_info("Finished ", ned$id[i], "...")
+# }
+
+# 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>% 
+#     #   dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     # 
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         crosswalk_id = crosswalk_id,
+#         pct        = extension_pct,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>%  
+#     #   dplyr::rename(!!sym(crosswalk_id) := hy_id)
+#     
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+# utility function for getting transects extended and 
+# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
+# returns the extended version of the transects 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>% 
+#       dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         pct        = EXTENSION_PCT,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%  
+#       dplyr::rename(!!sym(crosswalk_id) := hy_id)
+# 
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+
+
diff --git a/runners/cs_runner2/00_driver.R b/runners/cs_runner2/00_driver.R
new file mode 100644
index 0000000..9c6eaed
--- /dev/null
+++ b/runners/cs_runner2/00_driver.R
@@ -0,0 +1,27 @@
+### Run this file to have all runner scripts run in order
+
+# downloads nextgen datasets 
+source("runners/cs_runner2/config_env.R")
+
+# downloads datasets
+# - Nextgen data
+# - Reference features (for waterbody filtering)
+# - FEMA 100 year floodplain polygons (FGBs)
+#   - Process state level FEMA polygons to CONUS 
+# - ML outputs
+# - 3DEP DEM VRT
+source("runners/cs_runner2/download_conus_nextgen.R")
+source("runners/cs_runner2/download_conus_ref_features.R")
+source("runners/cs_runner2/download_fema.r")
+source("runners/cs_runner2/process_fema.R")
+source("runners/cs_runner2/download_ml_outputs.R")
+source("runners/cs_runner2/download_dem_from_vrt.R")
+
+# generate and upload transects datasets 
+source("runners/cs_runner2/01_transects.R")
+
+# generate and upload cross sections points datasets 
+source("runners/cs_runner2/02_cs_pts.R")
+
+# Apply machine learning topwidths and depths estimates to DEM cross section points
+source("runners/cs_runner2/03_inject_ml.R")
diff --git a/runners/cs_runner2/01_transects.R b/runners/cs_runner2/01_transects.R
new file mode 100644
index 0000000..e11f46c
--- /dev/null
+++ b/runners/cs_runner2/01_transects.R
@@ -0,0 +1,190 @@
+
+library(sf)
+library(dplyr)
+
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner2/utils.R")
+
+VPU_IDS <- get_vpu_ids()
+
+for (i in seq_along(VPU_IDS)) {
+# for (i in 21:length(VPU_IDS)) {
+  
+  # --------------------------------------------------------------
+  # ---- Setup variables / paths ----
+  # --------------------------------------------------------------
+  # VERSION_DIRS_LIST
+  vpu                       <- VPU_IDS[i]
+  CROSSWALK_ID              <- "id"
+  
+  transect_filenames        <- get_transect_filenames(vpu, sep = "-")
+  base_output_path          <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path)
+  base_transect_file_exists <- file.exists(base_output_path)
+  
+  do_process_transects      <- !base_transect_file_exists || REGENERATE_TRANSECTS
+  # do_not_reprocess_transects <- base_transect_file_exists && !REGENERATE_TRANSECTS
+  
+  if (do_process_transects) {
+    message("Creating VPU ", vpu, " transects:", 
+            "\n - flowpaths: '",
+            basename(CONUS_NEXTGEN_GPKG_PATH), "'"
+    )
+  } else {
+    message(
+      "VPU ", vpu, " transects file already exists at:\n - '", base_output_path, "'", 
+      "\n\n   >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", 
+      basename(base_output_path), "' file"
+    )
+    next
+    
+  }
+  
+  # if we are going to create new transects and the transect file already exists, delete it as to not cause confusion or accidently append to an old dataset
+  if (do_process_transects && base_transect_file_exists) {
+    message("Deleting old transect file:\n > '", base_output_path, "'")
+    file.remove(base_output_path)
+  }
+  
+  # --------------------------------------------------------------
+  # ---- Load flowpaths for VPU from conus_nextgen.gpkg ----
+  # --------------------------------------------------------------
+  
+  # read in nextgen flowlines 
+  flines <- 
+    CONUS_NEXTGEN_GPKG_PATH  %>% 
+    sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'"))
+  
+  has_no_flowlines <- nrow(flines) == 0 
+  
+  if (has_no_flowlines) {
+      message("Skipping VPU ", vpu, " as no flowlines were found in '", basename(CONUS_NEXTGEN_GPKG_PATH), "'..." )
+      next
+    }
+    
+  
+  
+  # Add an estimate bankful width based on Total downsteam drainage area (sqkm, Power law equation) 
+  flines <-
+    flines %>% 
+    hydroloom::rename_geometry("geometry") %>% 
+    hydrofabric3D::add_powerlaw_bankful_width(
+      total_drainage_area_sqkm_col = "tot_drainage_areasqkm", 
+      min_bf_width = 50
+    ) %>% 
+    dplyr::group_by(order) %>% 
+      dplyr::slice(1:10) %>% 
+      dplyr::ungroup() %>% 
+      dplyr::select(
+        id,
+        lengthkm,
+        tot_drainage_areasqkm,
+        bf_width,
+        mainstem,
+        geometry
+      ) 
+    # dplyr::group_by(order)
+  
+  # --------------------------------------------------------------
+  # ---- Generate transects ---- 
+  # --------------------------------------------------------------
+  # flines$geometry %>% sf::st_is_empty() %>% any()
+  
+  # sf::write_sf(
+  #   flines,
+  #   "/Users/anguswatters/Desktop/wrong_cs_ids_flines_error.gpkg"
+  #   # "/Users/anguswatters/Desktop/empty_geom_flines_error.gpkg"
+  # )
+  
+  # create transect lines
+  transects <- hydrofabric3D::cut_cross_sections(
+    net               = flines,                        # flowlines network
+    crosswalk_id      = CROSSWALK_ID,                       # Unique feature ID
+    cs_widths         = flines$bf_width,     # cross section width of each "id" linestring ("hy_id")
+    # cs_widths = 15,
+    num               = 3,                            # number of cross sections per "id" linestring ("hy_id")
+    # smooth            = FALSE,                          # smooth lines
+    # densify           = NULL,  
+    smooth            = TRUE,                          # smooth lines
+    densify           = 3,                             # densify linestring points
+    
+    rm_self_intersect = TRUE,                          # remove self intersecting transects
+    fix_braids        = FALSE,                         # whether to fix braided flowlines or not
+    add               = TRUE                           # whether to add back the original data
+  )  
+  # dplyr::mutate(
+    # cs_source = CS_SOURCE
+  # )
+  # transects$cs_lengthm 
+  # transects$geometry %>% sf::st_length()
+  
+  # transects %>% 
+  #   dplyr::mutate(
+  #     new_cs_lengthm = as.numeric(sf::st_length(.))
+  #   ) %>% 
+  #   dplyr::filter(!dplyr::near(new_cs_lengthm, cs_lengthm, tol =   2))
+  # .Machine$double.eps^1
+  # crosswalk_id <- "id"
+  # # reenumerate the cs_ids for each transect based on cs_measure sorting, and make sure all cross sections are correctly numbered
+  # mismatches <-
+  #   transects %>%
+  #   sf::st_drop_geometry() %>% 
+  #   dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% 
+  #   dplyr::arrange(cs_measure, .by_group = TRUE) %>% 
+  #   dplyr::mutate(
+  #     new_cs_id = 1:dplyr::n()
+  #   ) %>% 
+  #   dplyr::ungroup() %>% 
+  #   dplyr::filter(cs_id != new_cs_id)
+  # mismatches
+  # transects %>% 
+  #   dplyr::filter(id %in% c("wb-1023360", "wb-1023363"))
+  # 
+  # transects %>% 
+  #   dplyr::filter(id %in% c("wb-1023360", "wb-1023363")) %>% 
+  #   mapview::mapview()
+  # 
+  # # FALSE if there are any transects with different cs_ids to the newly created cs_id 
+  # # Otherwise TRUE
+  # has_valid_cs_ids <- !(nrow(mismatches) > 0)
+  # 
+  # # t2 <- transects %>% hydrofabric3D:::renumber_cs_ids("id")
+  # # hydrofabric3D::validate_transects(t2, "id")
+  # transects 
+  is_valid_transects                    <- hydrofabric3D::validate_transects(transects, "id")
+  is_valid_transects_against_flowlines  <- hydrofabric3D::validate_transects_against_flowlines(transects, flines, "id")
+  
+  # is_valid_transects                    <- hydrofabric3D::validate_transects(transects[c(1, 3, 55), ], "id")
+  # is_valid_transects_against_flowlines  <- hydrofabric3D::validate_transects_against_flowlines(transects, flines[c(-1), ], "id")
+  # is_valid_transects_against_flowlines
+  # hydrofabric3D:::rm_multiflowline_intersections(transects, flines[c(-1), ])
+  
+  # select relevent columns
+  transects <-
+    transects %>% 
+    dplyr::mutate(
+      cs_source = CS_SOURCE
+    ) %>% 
+    # hydrofabric3D:::select_transects(CROSSWALK_ID)
+    dplyr::select(
+      dplyr::any_of(CROSSWALK_ID), 
+      cs_id, 
+      cs_lengthm,
+      # cs_lengthm = new_cs_lengthm, 
+      cs_measure,
+      sinuosity,
+      cs_source,
+      geometry
+    )
+  
+  if (is_valid_transects && is_valid_transects_against_flowlines) {
+    message("Saving transects to:\n - filepath: '", base_output_path, "'")
+      sf::write_sf(
+        transects,
+        base_output_path
+      )
+  }
+  
+  message(" --- VPU ", vpu, " transects generation complete! --- ")
+  message()
+  
+}
diff --git a/runners/cs_runner2/02_extend_transects_by_fema.R b/runners/cs_runner2/02_extend_transects_by_fema.R
new file mode 100644
index 0000000..106dc44
--- /dev/null
+++ b/runners/cs_runner2/02_extend_transects_by_fema.R
@@ -0,0 +1,118 @@
+
+library(sf)
+library(dplyr)
+
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner2/utils.R")
+
+VPU_IDS <- get_vpu_ids()
+
+for (i in seq_along(VPU_IDS)) {
+  # for (i in 21:length(VPU_IDS)) {
+  
+  # --------------------------------------------------------------
+  # ---- Setup variables / paths ----
+  # --------------------------------------------------------------
+  # VERSION_DIRS_LIST
+  # i = 8
+  vpu                       <- VPU_IDS[i]
+  CROSSWALK_ID              <- "id"
+  # cs_filesnames <- get_cross_section_filenames(vpu, sep = "-")
+  transect_filenames           <- get_transect_filenames(vpu, sep = "-")
+ 
+  # TODO: Add a catch for not allowing this all to run if the base_transects do NOT exist 
+  base_transects_path          <- paste0(VERSION_DIRS_LIST$transects_base_dir, "/", transect_filenames$transects_base_path)
+  fema_transects_output_path   <- paste0(VERSION_DIRS_LIST$transects_fema_extended_dir, "/", transect_filenames$transects_fema_extended_path)
+  
+  base_transect_file_exists    <- file.exists(base_transects_path)
+  fema_transects_file_exists   <- file.exists(fema_transects_output_path)
+  
+  do_process_transects      <- !fema_transects_file_exists || REGENERATE_TRANSECTS
+  
+  if (do_process_transects) {
+    message("Creating VPU ", vpu, " transects:", 
+            "\n - flowpaths: '",
+            basename(CONUS_NEXTGEN_GPKG_PATH), "'",
+            "\n - base transects: '",
+            basename(base_transects_path), "'"
+    )
+  } else {
+    message(
+      "VPU ", vpu, " transects file already exists at:\n - '", fema_transects_output_path, "'", 
+      "\n\n   >>> NOTE: To regenerate transects:\n\t\t- Set REGENERATE_TRANSECTS = TRUE in 'base_variables.R' \n\t\tOR\n\t\t- delete the '", 
+      basename(fema_transects_output_path), "' file"
+    )
+    next
+    
+  }
+  
+  if (do_process_transects && fema_transects_file_exists) {
+    message("Deleting old fema extended transect file:\n > '", fema_transects_output_path, "'")
+    file.remove(fema_transects_output_path)
+  }
+  
+  # read in nextgen flowlines 
+  flines <- 
+    CONUS_NEXTGEN_GPKG_PATH  %>% 
+    sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'")) %>% 
+    hydroloom::rename_geometry("geometry")
+  
+  # transects
+  transects <- sf::read_sf(base_transects_path) %>% 
+    hydroloom::rename_geometry("geometry")
+  
+  # FEMA polygons
+  fema <- 
+    CONUS_FEMA_GPKG_PATH %>% 
+    sf::read_sf(layer = get_fema_conus_layer_name(vpu)) %>% 
+    rmapshaper::ms_simplify(keep_shapes = T, keep = 0.01, sys = TRUE, sys_mem = 16) %>% 
+    hydroloom::rename_geometry("geometry")
+  # fema <- rmapshaper::ms_simplify(fema, keep_shapes = T, keep = 0.1, sys = TRUE, sys_mem = 16)
+  
+  # # add mainstem to  
+  # transects <- 
+  #   transects  %>%
+  #   dplyr::left_join(
+  #       flines %>% 
+  #         sf::st_drop_geometry() %>% 
+  #         dplyr::select(dplyr::any_of(CROSSWALK_ID), mainstem),
+  #     by = CROSSWALK_ID 
+  #   )
+  
+  message("Extending transects out to FEMA 100yr floodplain polygon boundaries - (", Sys.time(), ")")
+  
+  # TODO: make sure this 3000m extension distance is appropriate across VPUs 
+  # TODO: also got to make sure that this will be feasible on memory on the larger VPUs...
+  extended_transects <- hydrofabric3D::extend_transects_to_polygons(
+    transect_lines         = transects, 
+    polygons               = fema, 
+    flowlines              = flines, 
+    crosswalk_id           = CROSSWALK_ID,
+    grouping_id            = CROSSWALK_ID, 
+    # grouping_id            = "mainstem", 
+    max_extension_distance = 3000,
+    reindex_cs_ids = TRUE
+  )
+  
+  is_valid_transects <- hydrofabric3D::validate_transects(extended_transects, "id")
+  is_valid_transects_against_flowlines <- hydrofabric3D::validate_transects_against_flowlines(extended_transects, flines, "id")
+  
+  
+  
+  
+  
+}
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
\ No newline at end of file
diff --git a/runners/cs_runner/.Rapp.history b/runners/cs_runner2/03_extract_initial_cs_pts.R
similarity index 100%
rename from runners/cs_runner/.Rapp.history
rename to runners/cs_runner2/03_extract_initial_cs_pts.R
diff --git a/runners/cs_runner2/04_extend_transects_by_cs_attributes.R b/runners/cs_runner2/04_extend_transects_by_cs_attributes.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/05_extract_output_cs_pts.R b/runners/cs_runner2/05_extract_output_cs_pts.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/06_inject_ml_bathymetry.R b/runners/cs_runner2/06_inject_ml_bathymetry.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/base_variables.R b/runners/cs_runner2/base_variables.R
new file mode 100644
index 0000000..7588bb9
--- /dev/null
+++ b/runners/cs_runner2/base_variables.R
@@ -0,0 +1,435 @@
+### EDIT BASE_DIR, AWS_PROFILE, and DEM_PATH ###
+# load utils 
+source("runners/cs_runner2/utils.R")
+
+# ---------------------------------------------------------------------------------
+# ---- General paths and constants variables ----
+# - edit to match your local environment
+# - BASE_DIR: base directory for local file storage
+# - AWS_PROFILE: AWS profile to run CLI commands
+# - VERSION: S3 prefix/folder of version to run / generate hydrofabric data for
+# ---------------------------------------------------------------------------------
+# Base directory for local file storage
+BASE_DIR    <- '/Volumes/T7SSD/lynker-spatial'
+# BASE_DIR           <- '/Users/anguswatters/Desktop/lynker-spatial'
+
+BASE_DIRS_LIST   <- get_base_dir_paths(BASE_DIR)
+
+# AWS profile to run CLI commands 
+AWS_PROFILE        <- "angus-lynker"
+
+# S3 prefix/folder of version run
+VERSION            <- "v3.0"
+CONUS_VERSION      <- "v2.2"
+
+VERSION_DIRS_LIST            <- get_version_base_dir_paths(BASE_DIR, VERSION)
+
+# string to fill in "CS_SOURCE" column in output datasets
+CS_SOURCE          <- "hydrofabric3D"
+
+# -------------------------------------------------------------------------------------
+# ---- Processing flags (cache data) ----
+# -------------------------------------------------------------------------------------
+
+# cache transects by setting REGENERATE_TRANSECTS to FALSE
+REGENERATE_TRANSECTS      <- TRUE
+
+# -------------------------------------------------------------------------------------
+# ---- S3 BUCKET NAMES ----
+# -------------------------------------------------------------------------------------
+
+# name of bucket with nextgen data
+LYNKER_SPATIAL_S3_BUCKET_NAME   <- "lynker-spatial"
+LYNKER_SPATIAL_HF_S3_PREFIX     <- "hydrofabric"
+
+# AWS S3 bucket URI 
+LYNKER_SPATIAL_BASE_S3_URI  <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/")
+LYNKER_SPATIAL_HF_S3_URI    <- paste0(LYNKER_SPATIAL_BASE_S3_URI, LYNKER_SPATIAL_HF_S3_PREFIX, "/")
+
+# name of bucket with nextgen data
+LYNKER_HF_S3_BUCKET_NAME        <- "lynker-hydrofabric"
+
+# -------------------------------------------------------------------------------------
+# ---- VPU IDs  ----
+# -------------------------------------------------------------------------------------
+VPU_IDS              <- c('01', '02', '03N', '03S', '03W', 
+                          '04', '05', '06', '07', '08', '09', 
+                          '10L', '10U', '11', '12', '13', '14', 
+                          '15', '16', '17', '18', '20', '21')
+# VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS NEXTGEN  ----
+# -------------------------------------------------------------------------------------
+
+CONUS_NEXTGEN_S3_URI    <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_nextgen.gpkg")
+CONUS_NEXTGEN_GPKG_PATH <- file.path(VERSION_DIRS_LIST$network_dir, "conus_nextgen.gpkg")
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS REFEREMCE FEATURES  ----
+# -------------------------------------------------------------------------------------
+
+CONUS_REF_FEATURES_S3_URI    <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/conus_reference.gpkg")
+CONUS_REF_FEATURES_GPKG_PATH <- file.path(VERSION_DIRS_LIST$ref_features_dir, "conus_reference.gpkg")
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS ML DATA  ----
+# -------------------------------------------------------------------------------------
+
+CONUS_ML_S3_URI          <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/", LYNKER_SPATIAL_HF_S3_PREFIX, "/", CONUS_VERSION, "/conus/bathymetry/ml_auxiliary_data.parquet")
+CONUS_ML_PARQUET_PATH    <- file.path(VERSION_DIRS_LIST$ml_dir, "ml_auxiliary_data.parquet")
+
+# -------------------------------------------------------------------------------------
+# ----  FEMA 100 year flood plain data  ----
+# -------------------------------------------------------------------------------------
+
+LYNKER_HF_FEMA_S3_PREFIX        <- "FEMA100"
+LYNKER_HF_FEMA_S3_URI           <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/")
+
+FEMA_FGB_PATH       = BASE_DIRS_LIST$fema_fgb_dir
+FEMA_GEOJSON_PATH   = BASE_DIRS_LIST$fema_geojson_dir
+FEMA_CLEAN_PATH     = BASE_DIRS_LIST$fema_clean_dir
+FEMA_GPKG_PATH      = BASE_DIRS_LIST$fema_gpkg_dir
+FEMA_BY_VPU_PATH    = BASE_DIRS_LIST$fema_by_vpu_dir
+FEMA_VPU_SUBFOLDERS = BASE_DIRS_LIST$fema_by_vpu_subdirs
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS CS extension polygons ----
+# Data derived from FEMA 100 year flood plain data, puts all FEMA VPU datasets into a 
+# single geopackage for CONUS
+# -------------------------------------------------------------------------------------
+
+CS_EXTENSION_POLYGONS_DIR  <- BASE_DIRS_LIST$cs_extension_polygons_dir
+CONUS_FEMA_GPKG_PATH       <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg')
+
+# -------------------------------------------------------------------------------------
+# ---- CONUS CS extension polygons ----
+# -------------------------------------------------------------------------------------
+
+# Meta data object of National Elevation Datasets (NED)
+NED_META <- data.frame(rbind(
+  # c(id        = "USGS_1",
+  #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1",
+  #   varname   = "30m elevation",
+  #   long_name = "30m (1 arcsec) National Elevation Dataset",
+  #   units     = "m",
+  #   s3_uri    = "s3://prd-tnm/StagedProducts/Elevation/1/TIFF/",
+  #   vrt_name  = "USGS_Seamless_DEM_1.vrt"
+  # )
+  # 
+  # # c(id        = "USGS_2",
+  # #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2",
+  # #   varname   = "60m elevation",
+  # #   long_name = "60m (2 arcsec) National Elevation Dataset Alaska",
+  # #   units     = "m",
+  # #   s3_uri    = "s3://prd-tnm/StagedProducts/Elevation/2/TIFF/",
+  # #   vrt_name  = "USGS_Seamless_DEM_2.vrt"
+  # #   ),
+  c(id        = "USGS_13",
+    URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13",
+    varname   = "10m elevation",
+    long_name = "10m (1/3th arcsec) National Elevation Dataset",
+    units     = "m",
+    s3_uri    = "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/",
+    vrt_name  = "USGS_Seamless_DEM_13.vrt"
+    )
+  )
+)
+
+
+DEM_DIR        <- BASE_DIRS_LIST$dem_dir
+DEM_VRT_DIR    <- BASE_DIRS_LIST$dem_vrt_dir
+# DEM_TIF_DIR    <- BASE_DIRS_LIST$dem_tif_dir
+
+# DEM URL
+DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+# DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt"
+# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif
+# ----------------------------------------------------------------------------
+# ---- Cross section point extraction constant variables ----
+# ----------------------------------------------------------------------------
+
+# scale argument for cross_section_pts() function. 
+# The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
+# Default setting is 50% of the original transect lines length (0.5)
+EXTENSION_PCT  <- 0.5
+
+# percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
+# 1% of the cross sections length is the default value we are using 
+# (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
+PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01
+
+# # -------------------------------------------------------------------------------------
+# # ---- Create local directory / path variables (FEMA data) ----
+# # -------------------------------------------------------------------------------------
+
+# LYNKER_HF_FEMA_S3_PREFIX        <- "FEMA100"
+# LYNKER_HF_FEMA_S3_URI           <- paste0("s3://", LYNKER_HF_S3_BUCKET_NAME, "/", LYNKER_HF_FEMA_S3_PREFIX, "/")
+
+# # location of FEMA 100 year flood plain FGB files
+# FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
+# LYNKER_HF_FEMA_S3_PREFIX  <- "FEMA100/"
+# LYNKER_HF_FEMA_S3_URI            <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX)
+
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb")
+# FEMA_GEOJSON_PATH    <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson")
+# FEMA_CLEAN_PATH      <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean")
+# FEMA_GPKG_PATH       <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg")
+# FEMA_BY_VPU_PATH     <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU")
+
+# VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# CS_EXTENSION_POLYGONS_DIR  <- paste0(BASE_DIR, "/cs-extension-polygons")
+# CONUS_FEMA_GPKG_PATH       <- file.path(CS_EXTENSION_POLYGONS_DIR, 'conus_fema.gpkg')
+
+# DEM_DIR      <- BASE_DIRS_LIST$dem_dir
+# DEM_VRT_DIR  <- BASE_DIRS_LIST$dem_vrt_dir
+# DEM_TIF_DIR  <- BASE_DIRS_LIST$dem_tif_dir
+
+# # ----------------------------------------------------------------------------
+# # ---- Cross section point extraction constant variables ----
+# # ----------------------------------------------------------------------------
+
+# # DEM URL
+# DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+
+# # scale argument for cross_section_pts() function. 
+# # The percentage of the length of the transect line to try and extend a transect to see if viable Z values can be found by extending transect line
+# # Default setting is 50% of the original transect lines length (0.5)
+# EXTENSION_PCT  <- 0.5
+
+# # percentage of the length each cross section that should be used as a threshold for classifying a cross section as having relief or not
+# # 1% of the cross sections length is the default value we are using 
+# # (i.e. a 100m long cross section needs a minimum of 1 meter (1%) of relief in its cross section points to be classified as "having relief")
+# PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF <- 0.01
+
+
+# # -------------------------------------------------------------------------------------
+# # ---- S3 output directories -----
+# # - transects 
+# # - cross section points
+# # - ML cross section points
+# # -------------------------------------------------------------------------------------
+# 
+# # transect bucket prefix
+# S3_TRANSECTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/transects/")
+# 
+# # cross section bucket prefix
+# S3_CS_PTS_DIR      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/dem-cross-sections/")
+# 
+# # cross section bucket prefix
+# S3_CS_ML_PTS_DIR   <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/cross-sections/")
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- S3 nextgen data paths / directories -----
+# # -------------------------------------------------------------------------------------
+# 
+# # the name of the folder in the S3 bucket with the nextgen data
+# S3_BUCKET_NEXTGEN_DIR       <- paste0(VERSION, "/gpkg/")
+# # S3_BUCKET_NEXTGEN_DIR <- "v20.1/gpkg/"
+# 
+# # full URI to the S3 bucket folder with the nextgen data 
+# S3_BUCKET_NEXTGEN_DIR_URI   <- paste0(LYNKER_SPATIAL_HF_S3_URI, S3_BUCKET_NEXTGEN_DIR)
+# 
+# # reference features S3 bucket prefix
+# S3_BUCKET_REF_FEATURES_URI  <- paste0("s3://", LYNKER_SPATIAL_S3_BUCKET_NAME, "/00_reference_features/gpkg/")
+# # S3_BUCKET_REF_FEATURES_URI  <- "s3://lynker-spatial/00_reference_features/gpkg/"
+# 
+# # ----------------------------------------------------------------------------
+# # ---- Machine learning data path variables ----
+# # ----------------------------------------------------------------------------
+# 
+# ML_OUTPUTS_S3_FILE   <- "channel_ml_outputs.parquet"
+# 
+# # ML_OUTPUTS_S3_DIR    <- paste0(VERSION, "/3D/ml-outputs/")
+# # ML_OUTPUTS_S3_DIR  <- "v20.1/3D/ml-outputs/"
+# 
+# ML_OUTPUTS_S3_URI      <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/3D/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# # ML_OUTPUTS_S3_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, ML_OUTPUTS_S3_DIR, ML_OUTPUTS_S3_FILE)
+# 
+# ML_OUTPUTS_PATH      <- paste0(BASE_DIR, "/ml-outputs/", ML_OUTPUTS_S3_FILE)
+# 
+# # path to the remote CONUS net parquet file
+# CONUS_NETWORK_FILE   <- "conus_net.parquet"
+# CONUS_NETWORK_URI    <- paste0(LYNKER_SPATIAL_HF_S3_URI, VERSION, "/", CONUS_NETWORK_FILE)
+# 
+# # ----------------------------------------------------------------------------
+# 
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- Local directory / path variables ----
+# # -------------------------------------------------------------------------------------
+# 
+# # directory to copy nextgen bucket data too
+# NEXTGEN_DIR      <- paste0(BASE_DIR, "/", S3_BUCKET_NEXTGEN_DIR)
+# # NEXTGEN_DIR <- paste0(BASE_DIR, "/pre-release/")
+# 
+# # # model attributes directory
+# # MODEL_ATTR_DIR   <- paste0(BASE_DIR, "/model_attributes/")
+# 
+# # cross-section data model data directories
+# TRANSECTS_DIR    <- paste0(BASE_DIR, "/01_transects/")
+# CS_PTS_DIR       <- paste0(BASE_DIR, "/02_cs_pts/")
+# 
+# # final output directory with geopackages per VPU
+# CS_OUTPUT_DIR    <- paste0(BASE_DIR, "/cross_sections/")
+# 
+# # directory to copy nextgen bucket data too
+# REF_FEATURES_DIR      <- paste0(BASE_DIR, "/00_reference_features/")
+# REF_FEATURES_GPKG_DIR <- paste0(REF_FEATURES_DIR, "gpkg/")
+# 
+# # make a directory for the ML outputs data
+# ML_OUTPUTS_DIR   <- paste0(BASE_DIR, "/ml-outputs/")
+# 
+# DEM_DIR      <- paste0(BASE_DIR, "/dem")
+# DEM_VRT_DIR  <- paste0(DEM_DIR, "/vrt")
+# DEM_TIF_DIR  <- paste0(DEM_DIR, "/tif")
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- Create local directory / path variables (FEMA data) ----
+# # -------------------------------------------------------------------------------------
+# 
+# # location of FEMA 100 year flood plain FGB files
+# FEMA_S3_BUCKET         <- "s3://lynker-hydrofabric/"
+# LYNKER_HF_FEMA_S3_PREFIX  <- "FEMA100/"
+# LYNKER_HF_FEMA_S3_URI            <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX)
+# 
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- paste0(BASE_DIR, "/FEMA100")
+# FEMA_GEOJSON_PATH    <- paste0(BASE_DIR, "/FEMA100_geojson")
+# FEMA_CLEAN_PATH      <- paste0(BASE_DIR, "/FEMA100_clean")
+# FEMA_GPKG_PATH       <- paste0(BASE_DIR, "/FEMA100_gpkg")
+# FEMA_GPKG_BB_PATH    <- paste0(BASE_DIR, "/FEMA100_bounding_box") # TODO: Probably can be deleted too, not sure yet
+# 
+# FEMA_BY_VPU_PATH     <- paste0(BASE_DIR, "/FEMA_BY_VPU")
+# VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+# 
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+# # FEMA_VPU_SUBFOLDERS <- paste0(
+# #                         FEMA_BY_VPU_PATH, "/VPU_",
+# #                         unlist(
+# #                           lapply(list.files(NEXTGEN_DIR, full.names = FALSE), function(vpu_file_names) {
+# #                             unlist(regmatches(vpu_file_names,  gregexpr("\\d+[A-Za-z]*", vpu_file_names)))})
+# #                           )
+# #                         )
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- OVERWRITE_FEMA_FILES constant logicals----
+# # ---- > if TRUE, processing steps will be run again 
+# #          and overwrite existing previously processed files
+# # TODO: Describe these variables
+# # -------------------------------------------------------------------------------------
+# 
+# # Default is TRUE (i.e. a fresh processing run is done from start to finish)
+# OVERWRITE_FEMA_FILES  <- TRUE
+# DELETE_STAGING_GPKGS  <- TRUE # remove intermediary files from the main output folder
+# 
+
+# # -------------------------------------------------------------------------------------
+# # ---- (New single domain) Local directory / path variables ----
+# # -------------------------------------------------------------------------------------
+# 
+# # directory for new domain data 
+# NEW_DOMAIN_DIRNAME  <- "new_domain"
+# NEW_DOMAIN_DIR      <- paste0(BASE_DIR, "/", NEW_DOMAIN_DIRNAME)
+# 
+# NEW_DOMAIN_FLOWLINES_DIRNAME <- "flowlines"
+# NEW_DOMAIN_FLOWLINES_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_FLOWLINES_DIRNAME)
+# 
+# NEW_DOMAIN_DEM_DIRNAME <- "dem"
+# NEW_DOMAIN_DEM_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_DEM_DIRNAME)
+# 
+# NEW_DOMAIN_FLOWLINES_FILE  <- "AllDiffusiveCombined.gpkg"
+# NEW_DOMAIN_FLOWLINES_PATH  <- paste0(NEW_DOMAIN_FLOWLINES_DIR, "/", NEW_DOMAIN_FLOWLINES_FILE)
+# 
+# # # Local DEM file
+# # NEW_DOMAIN_DEM_FILE        <- "hi_dem.tif"
+# # NEW_DOMAIN_DEM_PATH        <- paste0(NEW_DOMAIN_DEM_DIR, "/", NEW_DOMAIN_DEM_FILE)
+# 
+# # Remote DEM file
+# NEW_DOMAIN_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+# 
+# NEW_DOMAIN_TRANSECTS_DIRNAME         <- "transects"
+# NEW_DOMAIN_CS_PTS_DIRNAME            <- "cs_pts"
+# NEW_DOMAIN_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+# 
+# NEW_DOMAIN_TRANSECTS_DIR          <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_TRANSECTS_DIRNAME)
+# NEW_DOMAIN_CS_PTS_DIR             <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CS_PTS_DIRNAME)
+# NEW_DOMAIN_CROSS_SECTIONS_DIR     <- paste0(NEW_DOMAIN_DIR, "/", NEW_DOMAIN_CROSS_SECTIONS_DIRNAME)
+# 
+# # -------------------------------------------------------------------------------------
+# # ---- (New single domain) Local directory / path variables ----
+# # -------------------------------------------------------------------------------------
+# 
+# # directory for new domain data 
+# DOMAIN_WITH_FEMA_DIRNAME  <- "domain_with_fema"
+# DOMAIN_WITH_FEMA_DIR      <- paste0(BASE_DIR, "/", DOMAIN_WITH_FEMA_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME <- "flowlines"
+# DOMAIN_WITH_FEMA_FLOWLINES_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_SUBSET_DIRNAME  <- "domain_subset"
+# DOMAIN_WITH_FEMA_SUBSET_DIR      <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_DEM_DIRNAME     <- "dem"
+# DOMAIN_WITH_FEMA_DEM_DIR         <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_DEM_DIRNAME)
+# 
+# DOMAIN_WITH_FEMA_FLOWLINES_FILE  <- "ls_conus.gpkg"
+# DOMAIN_WITH_FEMA_FLOWLINES_PATH  <- paste0(DOMAIN_WITH_FEMA_FLOWLINES_DIR, "/", DOMAIN_WITH_FEMA_FLOWLINES_FILE)
+# 
+# # Geopackage containing area to subset flowlines to before processing
+# DOMAIN_WITH_FEMA_SUBSET_FILE     <- "AllDiffusiveCombined.gpkg"
+# DOMAIN_WITH_FEMA_SUBSET_PATH     <- paste0(DOMAIN_WITH_FEMA_SUBSET_DIR, "/", DOMAIN_WITH_FEMA_SUBSET_FILE)
+# 
+# # # Local DEM file
+# # DOMAIN_WITH_FEMA_DEM_FILE        <- "hi_dem.tif"
+# # DOMAIN_WITH_FEMA_DEM_PATH        <- paste0(DOMAIN_WITH_FEMA_DEM_DIR, "/", DOMAIN_WITH_FEMA_DEM_FILE)
+# 
+# # Remote DEM file
+# DOMAIN_WITH_FEMA_DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+# 
+# DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME         <- "transects"
+# DOMAIN_WITH_FEMA_CS_PTS_DIRNAME            <- "cs_pts"
+# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME    <- "cross_sections"
+# DOMAIN_WITH_FEMA_OUTPUT_DIRNAME            <- "outputs"
+# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME       <- "vpu-subsets"
+# DOMAIN_WITH_FEMA_ML_DIRNAME                <- "ml"
+# # aws s3 cp s3://prd-tnm/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt /Users/anguswatters/Desktop/3DEP/3DEP.vrt
+# DOMAIN_WITH_FEMA_TRANSECTS_DIR          <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_TRANSECTS_DIRNAME)
+# DOMAIN_WITH_FEMA_CS_PTS_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CS_PTS_DIRNAME)
+# DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIR     <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_CROSS_SECTIONS_DIRNAME)
+# DOMAIN_WITH_FEMA_OUTPUT_DIR             <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_OUTPUT_DIRNAME)
+# DOMAIN_WITH_FEMA_VPU_SUBSETS_DIR        <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_VPU_SUBSETS_DIRNAME)
+# DOMAIN_WITH_FEMA_ML_DIR                 <- paste0(DOMAIN_WITH_FEMA_DIR, "/", DOMAIN_WITH_FEMA_ML_DIRNAME)
+# 
+# ML_AUXILIARY_DATA_S3_URI <- paste0(LYNKER_SPATIAL_BASE_S3_URI, "bathymetry/ml_auxiliary_data")
+# # LYNKER_SPATIAL_BASE_S3_URI
+# LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI   <- "s3://lynker-hydrofabric/"
+# ML_BATHYMETRY_S3_DATA_DIR               <- "hydrofabric/nextgen/bathymetry/multisource_river_attributes/"
+# ML_BATHYMETRY_S3_URI                    <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, ML_BATHYMETRY_S3_DATA_DIR)
+# VPU_ML_BATHYMETRY_S3_DIRS               <- paste0(ML_BATHYMETRY_S3_URI, "vpuid=", nhdplusTools::vpu_boundaries$VPUID, "/")
+# 
+# COASTAL_BATHY_DEM_S3_DIR     <- "coastal_bathy/diffusive_domain/"
+# COASTAL_BATHY_DEM_S3_DIR_URI <- paste0(LYNKER_HYDROFABRIC_S3_BUCKET_BASE_URI, COASTAL_BATHY_DEM_S3_DIR)
+# # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+# 
+# 
+# # # # --------------------------------------------------------------------------
+# # # # ---- Get locations of diffusive domain DEM files in S3 ----
+# # # # --------------------------------------------------------------------------
+# # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE))
+# # 
+# # # COASTAL_BATHY_DEM_S3_URIS    <- paste0(COASTAL_BATHY_DEM_S3_DIR_URI, 
+# # #                                        list_s3_objects(COASTAL_BATHY_DEM_S3_DIR_URI, ".tif$", AWS_PROFILE)
+# # #                                        )
+# 
+# 
+
+
+
+
+
+
diff --git a/runners/cs_runner2/config_env.R b/runners/cs_runner2/config_env.R
new file mode 100644
index 0000000..a9b0b17
--- /dev/null
+++ b/runners/cs_runner2/config_env.R
@@ -0,0 +1,26 @@
+# load required packages
+pacman::p_load(
+  archive,
+  hydrofabric,
+  hydrofabric3D,
+  dplyr,
+  sf
+)
+
+# # install.packages("devtools")
+# devtools::install_github("anguswg-ucsb/hydrofabric3D")
+
+# load root directory 
+source("runners/cs_runner2/base_variables.R")
+source("runners/cs_runner2/utils.R")
+
+sf::sf_use_s2(FALSE)
+
+# create empty base directories 
+create_local_hydrofabric_base_dirs(base_dir = BASE_DIR)
+
+# create a new version directory
+create_new_version_dirs(base_dir = BASE_DIR, 
+                        version = VERSION, 
+                        with_output = TRUE)
+
diff --git a/runners/cs_runner2/download_conus_nextgen.R b/runners/cs_runner2/download_conus_nextgen.R
new file mode 100644
index 0000000..80de910
--- /dev/null
+++ b/runners/cs_runner2/download_conus_nextgen.R
@@ -0,0 +1,28 @@
+# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
+
+# load config variables
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Download conus_nextgen.gpkg 
+# ---------------------------------------------------------------------------
+
+copy_cmd <- paste0('aws s3 cp ', CONUS_NEXTGEN_S3_URI, " ", CONUS_NEXTGEN_GPKG_PATH)
+message("Copying S3 object:\n", CONUS_NEXTGEN_S3_URI)
+
+if (!file.exists(CONUS_NEXTGEN_GPKG_PATH)) {
+  tryCatch({
+      system(copy_cmd)
+      message("Download '", basename(CONUS_NEXTGEN_GPKG_PATH), "' complete!")
+      message("------------------")
+  }, error = function(e) {
+      message("Error downloading conus_nextgen.gpkg")
+      message(e)
+      stop()
+  })
+
+} else {
+  message("conus_nextgen.gpkg file already exists at\n > '", CONUS_NEXTGEN_GPKG_PATH, "'")
+}
diff --git a/runners/cs_runner2/download_conus_ref_features.R b/runners/cs_runner2/download_conus_ref_features.R
new file mode 100644
index 0000000..b6f6a06
--- /dev/null
+++ b/runners/cs_runner2/download_conus_ref_features.R
@@ -0,0 +1,31 @@
+# Running this script goes and pulls the desired CONUS NextGen geopackage dataset from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/cs_runner/base_variables.R
+
+# load config variables
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Download conus_reference.gpkg 
+# ---------------------------------------------------------------------------
+
+copy_cmd <- paste0('aws s3 cp ', CONUS_REF_FEATURES_S3_URI, " ", CONUS_REF_FEATURES_GPKG_PATH)
+message("Copying S3 object:\n", CONUS_REF_FEATURES_S3_URI)
+
+if (!file.exists(CONUS_REF_FEATURES_GPKG_PATH)) {
+  tryCatch({
+    system(copy_cmd)
+    message("Download '", basename(CONUS_REF_FEATURES_GPKG_PATH), "' complete!")
+    message("------------------")
+  }, error = function(e) {
+    message("Error downloading conus_nextgen.gpkg")
+    message(e)
+    stop()
+  })
+  
+} else {
+  message("conus_nextgen.gpkg file already exists at\n > '", CONUS_REF_FEATURES_GPKG_PATH, "'")
+}
+
+
+# }
\ No newline at end of file
diff --git a/runners/cs_runner2/download_dem_from_vrt.R b/runners/cs_runner2/download_dem_from_vrt.R
new file mode 100644
index 0000000..fc18f2c
--- /dev/null
+++ b/runners/cs_runner2/download_dem_from_vrt.R
@@ -0,0 +1,143 @@
+library(terra)
+library(dplyr)
+
+# get the main config variables / paths 
+# source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner2/base_variables.R")
+source("runners/cs_runner2/utils.R")
+
+# specify where VRT should go
+DEM_VRT_DIR <- BASE_DIRS_LIST$dem_vrt_dir
+# # DEM_TIF_DIR <- base_dirs$dem_tif_dir
+# DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt"
+# # /vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/n70w148/USGS_13_n70w148.tif
+# message("Syncing 3DEP elevation VRTs...")
+# s3_cp_cmd <- paste0("aws s3 cp ", "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt", " ", "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt", " --profile ", AWS_PROFILE, " --no-sign-request")
+# s3_cp_cmd
+# system(s3_cp_cmd)
+# "s3://prd-tnm/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt"
+# 
+# VRT_file <- "/Users/anguswatters/Desktop/USGS_Seamless_DEM_13.vrt"
+# vrt_tiles <- terra::vrt_tiles(VRT_file)
+# 
+# dem <- terra::rast(vrt_tiles[999])
+# dem
+# terra::writeRaster(dem, "/Users/anguswatters/Desktop/tile1_USGS_Seamless_DEM_13.tif", overwrite = T)
+# length(vrt_tiles)
+# s3://lynker-nypa-inputs/ryan_test/
+  
+
+for (i in seq_along(nrow(NED_META))) {
+  
+  NED_META
+  i = 1
+  ned_uri         <- NED_META$s3_uri[i]  
+  ned_current_uri <- paste0(ned_uri, "current/")
+  ned_current_uri
+  
+  output_dir <- file.path(DEM_VRT_DIR,  NED_META$id[i])
+  output_dir
+  current_output_dir <- file.path(DEM_VRT_DIR,  NED_META$id[i], "current")
+  
+  # create specific VRT directory if it doesnt already exist
+  create_if_not_exists(output_dir)
+  create_if_not_exists(current_output_dir)
+  
+  message("[", i, "] - ", "Syncing\n '", ned_current_uri, "' > '", current_output_dir, "'")
+  
+  s3_sync_cmd <- paste0("aws s3 sync ", ned_current_uri, " ", current_output_dir, " --profile ", AWS_PROFILE, " --no-sign-request")
+  # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request")
+  
+  
+  tryCatch({
+    
+    # system(s3_sync_cmd)
+    
+  }, error = function(e) {
+    
+    message("Error syncing ", ned_uri, " to local directory...")
+    message(e)
+    
+  })
+  
+}
+
+# vrt_file <- file.path(DEM_VRT_DIR, "USGS_Seamless_DEM_1.vrt")
+# 
+# # TODO: use AWS S3 sync command like below
+# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request")
+# # s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors")
+# 
+# system(s3_sync_cmd)
+
+# ------------------------------------
+# ---- Build VRTs ----
+# ------------------------------------
+
+message("Building 3DEP elevation '.vrt' files...")
+
+for (i in seq_along(nrow(NED_META))) {
+  
+  i = 1
+  NED_META$id[i]
+  
+  output_dir <- file.path(DEM_VRT_DIR,  NED_META$id[i])
+  
+  # output_dir 
+  
+  
+  # Define output text file path
+  txt_file  <- file.path(output_dir, paste0("ned_list_", NED_META$id[i], ".txt"))
+  
+  # Define output VRT path
+  vrt_file  <- file.path(output_dir, paste0("ned_", NED_META$id[i], ".vrt"))
+  # vrt_file <- paste0(paste0("data-raw/ned_", NED_META$id[i], ".vrt"))
+  
+  # If VRT does NOT exist, build VRT
+  if (!file.exists(vrt_file)) {
+    
+    # txt_file
+  
+    current_dir <- file.path(output_dir, "current")
+    
+    # all the current/ directories that have the TIFs
+    tile_dirs   <- list.files(current_dir, full.names = TRUE)
+    # list.files(tile_dirs[1:2],  full.names = T)
+    # index_gpkg <- sf::read_sf(list.files(tile_dirs[999], full.names = T, pattern = ".gpkg"))
+    
+    # get list of all the .tif files in the "current/" dir
+    tif_paths   <- list.files(tile_dirs,  pattern = ".tif", full.names = T)
+    
+    # write list of files to text file
+    write.table(tif_paths, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
+    
+    # build VRT from text file input using GDAL system call ...
+    system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file))
+    
+  }
+  
+}
+
+
+# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt")
+# length(list.files(file.path(DEM_TIF_DIR, "current"), full.names = T))
+# vrt_tiles <- terra::vrt_tiles(file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt"))
+# vrt_tiles[1]
+# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+# 
+# # Parse the VRT file
+# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE)
+# vrt_file <- list.files(DEM_VRT_DIR, full.names = TRUE)
+# file.path(DEM_TIF_DIR, "USGS_Seamless_DEM_1.vrt")
+# vrt_tiles <- terra::vrt_tiles(vrt_file)
+# 
+# # tile_path <- vrt_tiles[500]
+# tile_paths <- gsub("/vsicurl/", "",  vrt_tiles)
+# 
+# # TODO: use AWS S3 sync command like below
+# s3_sync_cmd <- paste0("aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ ", DEM_VRT_DIR, " --profile ", AWS_PROFILE, " --no-sign-request --only-show-errors")
+# # aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors
+# # "aws s3 sync s3://prd-tnm/StagedProducts/Elevation/1/TIFF/ /Volumes/T7SSD/lynker-spatial/dem/tif/ --no-sign-request --only-show-errors"
+# 
+# # TODO: Old method curl request each file individually.... super slow
+# # error_tiles <- download_tiles(tile_paths, DEM_TIF_DIR)
\ No newline at end of file
diff --git a/runners/cs_runner2/download_fema.R b/runners/cs_runner2/download_fema.R
new file mode 100644
index 0000000..86b7e6d
--- /dev/null
+++ b/runners/cs_runner2/download_fema.R
@@ -0,0 +1,123 @@
+# Running this script goes and pulls the desired FEMA100 flood fgb datasets from the lynker-hydrofabric S3 bucket then saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
+
+# NOTE: The lynker-hydrofabric S3 bucket is private at the moment
+
+# load config variables
+# source("runners/cs_runner/config_vars.R")
+source("runners/cs_runner2/base_variables.R")
+
+# -------------------------------------------------------------------------------------
+# ---- Create FEMA100/ directory and bounding box dir (if it does NOT exist) ----
+# -------------------------------------------------------------------------------------
+
+# if (!dir.exists(FEMA_FGB_PATH)) {
+#   message(paste0("FEMA100/ directory does not exist...\nCreating directory:\n > '", FEMA_FGB_PATH, "'"))
+#   dir.create(FEMA_FGB_PATH)
+# }
+# 
+# # create geojsons directory (if not exists) 
+# if (!dir.exists(FEMA_GEOJSON_PATH)) {
+#   message(paste0(FEMA_GEOJSON_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GEOJSON_PATH, "'"))
+#   dir.create(FEMA_GEOJSON_PATH)
+# }
+# 
+# # create directory for cleaned FEMA geometries (if not exists) 
+# if (!dir.exists(FEMA_CLEAN_PATH)) {
+#   message(paste0(FEMA_CLEAN_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_CLEAN_PATH, "'"))
+#   dir.create(FEMA_CLEAN_PATH)
+# }
+# 
+# # create directory for cleaned FEMA geometries as geopackages (if not exists) 
+# if (!dir.exists(FEMA_GPKG_PATH)) {
+#   message(paste0(FEMA_GPKG_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_PATH, "'"))
+#   dir.create(FEMA_GPKG_PATH)
+# }
+# 
+# # create simplified geojsons directory (if not exists)
+# if (!dir.exists(FEMA_SIMPLIFIED_PATH)) {
+#   message(paste0(FEMA_SIMPLIFIED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_SIMPLIFIED_PATH, "'"))
+#   dir.create(FEMA_SIMPLIFIED_PATH)
+# }
+# 
+# # create simplified geojsons directory (if not exists)
+# if (!dir.exists(FEMA_DISSOLVED_PATH)) {
+#   message(paste0(FEMA_DISSOLVED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_DISSOLVED_PATH, "'"))
+#   dir.create(FEMA_DISSOLVED_PATH)
+# }
+# 
+# # create exploded geojsons directory (if not exists)
+# if (!dir.exists(FEMA_EXPLODED_PATH)) {
+#   message(paste0(FEMA_EXPLODED_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_EXPLODED_PATH, "'"))
+#   dir.create(FEMA_EXPLODED_PATH)
+# }
+# 
+# # create FEMA GPKG Bounding Boxes directory (if not exists)
+# if (!dir.exists(FEMA_GPKG_BB_PATH)) {
+#   message(paste0(FEMA_GPKG_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_GPKG_BB_PATH, "'"))
+#   dir.create(FEMA_GPKG_BB_PATH)
+# }
+# 
+# if (!dir.exists(FEMA_FGB_BB_PATH)) {
+#   message(paste0(FEMA_FGB_BB_PATH, " directory does not exist...\nCreating directory:\n > '", FEMA_FGB_BB_PATH, "'"))
+#   dir.create(FEMA_FGB_BB_PATH)
+# }
+
+# -------------------------------------------------------------------------------------
+# ---- Get list of FEMA FGB files in S3 bucket ----
+# -------------------------------------------------------------------------------------
+
+# list objects in S3 bucket, and regular expression match to nextgen_.gpkg pattern
+fema_list_command <- paste0('#!/bin/bash
+            # AWS S3 Bucket and Directory information
+            S3_BUCKET="', LYNKER_HF_FEMA_S3_URI, '" 
+            
+            # Regular expression pattern to match object keys
+            PATTERN=".fgb$"
+            
+            # AWS CLI command to list objects in the S3 bucket and use grep to filter them
+            S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" --profile ', AWS_PROFILE, ' | awk \'{print $4}\' | grep -E "$PATTERN")
+            
+            echo "$S3_OBJECTS"'
+)
+
+# -------------------------------------------------------------------------------------
+# ---- Get the S3 buckets object keys for FEMA 100 FGB files ----
+# -------------------------------------------------------------------------------------
+
+# Run the script to get a list of the nextgen geopackages that matched the regular expression above
+FEMA_BUCKET_KEYS <- system(fema_list_command, intern = TRUE)
+
+# create bucket object URIs
+# FEMA_BUCKET_OBJECTS <- paste0(FEMA_S3_BUCKET, LYNKER_HF_FEMA_S3_PREFIX, FEMA_BUCKET_KEYS)
+
+# -------------------------------------------------------------------------------------
+# ---- Download FEMA 100 year FGB files from S3 ----
+# -------------------------------------------------------------------------------------
+
+# Parse the selected S3 objects keys from the FEMA100 bucket directory copy them to the local destination directory if the file does NOT exist yet
+for (key in FEMA_BUCKET_KEYS) {
+  local_save_path <- paste0(FEMA_FGB_PATH, "/", key)
+  
+  if (!file.exists(local_save_path)) {
+    copy_cmd <- paste0('aws s3 cp ', LYNKER_HF_FEMA_S3_URI, key, " ", local_save_path, " --profile ", AWS_PROFILE)
+    
+    message("S3 object:\n > '", LYNKER_HF_FEMA_S3_URI, key, "'")
+    message("Downloading S3 object to:\n > '", local_save_path, "'")
+    
+    system(copy_cmd)
+    
+    message(" > '", key, "' download complete!")
+    message("----------------------------------")
+  } else {
+    message("File already exists at:\n > '", local_save_path, "'")
+  }
+}
+
+
+
+  
+
+
+
+
diff --git a/runners/cs_runner2/download_ml_outputs.R b/runners/cs_runner2/download_ml_outputs.R
new file mode 100644
index 0000000..4d14634
--- /dev/null
+++ b/runners/cs_runner2/download_ml_outputs.R
@@ -0,0 +1,28 @@
+# Running this script goes and pulls the desired NextGen geopackage datasets from http://www.lynker-spatial.com/, saves them into a directory within "BASE_DIR"
+# BASE_DIR is defined within runners/workflow/root_dir.R
+
+# load config variables
+source("runners/cs_runner2/base_variables.R")
+# source("runners/cs_runner/config_vars.R")
+
+# ---------------------------------------------------------------------------
+# ---- Download conus_nextgen.gpkg 
+# ---------------------------------------------------------------------------
+
+copy_cmd <- paste0('aws s3 cp ', CONUS_ML_S3_URI, " ", CONUS_ML_PARQUET_PATH)
+message("Copying S3 object:\n", CONUS_ML_S3_URI)
+
+if (!file.exists(CONUS_ML_PARQUET_PATH)) {
+  tryCatch({
+    system(copy_cmd)
+    message("Download '", basename(CONUS_ML_PARQUET_PATH), "' complete!")
+    message("------------------")
+  }, error = function(e) {
+    message("Error downloading conus_nextgen.gpkg")
+    message(e)
+    stop()
+  })
+  
+} else {
+  message("'", basename(CONUS_ML_PARQUET_PATH), "' file already exists at\n > '", CONUS_ML_PARQUET_PATH, "'")
+}
diff --git a/runners/cs_runner2/finalize_outputs.R b/runners/cs_runner2/finalize_outputs.R
new file mode 100644
index 0000000..e69de29
diff --git a/runners/cs_runner2/process_fema.R b/runners/cs_runner2/process_fema.R
new file mode 100644
index 0000000..50b2106
--- /dev/null
+++ b/runners/cs_runner2/process_fema.R
@@ -0,0 +1,737 @@
+# Script should be run AFTER download_fema100.R as the FEMA 100 year flood plain data needs to first be downloaded from S3
+# This file will take a directory of FEMA 100 year FGB files (FEMA_FGB_PATH) the below processes to generate a cleaned, simple set of geopackages 
+
+# Processing steps:
+# - Convert FGBs to GEOJSON (via ogr2ogr)
+# - Simplifies
+# - Dissolves
+# - Explodes
+# - Convert cleaned GEOJSON to cleaned GPKGs (via ogr2ogr)
+# - Apply hydrofab::clean_geometry()
+# - Partition FEMA 100 geometries by VPU      # TODO still
+# - Get FEMA bounding box geometries (maybe)
+
+# load config variables
+# source("runners/cs_runner/config_vars.R")
+# source("runners/cs_runner/config.R")
+source("runners/cs_runner2/base_variables.R")
+source("runners/cs_runner2/utils.R")
+
+library(dplyr)
+library(sf)
+library(geos)
+library(fastmap)
+library(nngeo)
+
+# ONLY runs if the main CONUS FEMA gpkg hasnt been created yet
+if (!file.exists(CONUS_FEMA_GPKG_PATH)) {
+  
+# -------------------------------------------------------------------------------------
+# ---- OVERWRITE_FEMA_FILES constant logical ----
+# ---- > if TRUE, processing steps will be run again 
+#          and overwrite existing previously processed files
+# -------------------------------------------------------------------------------------
+
+# Default is TRUE (i.e. a fresh processing run is done from start to finish)
+OVERWRITE_FEMA_FILES  <- TRUE
+DELETE_STAGING_GPKGS  <- TRUE
+Sys.setenv(OGR_GEOJSON_MAX_OBJ_SIZE=0)
+
+# -------------------------------------------------------------------------------------
+# ---- Get paths to downloaded FEMA 100 FGBs ----
+# -------------------------------------------------------------------------------------
+
+FEMA_FILENAMES        <- list.files(FEMA_FGB_PATH, full.names = FALSE)
+FEMA_FILE_PATHS       <- paste0(FEMA_FGB_PATH, "/", FEMA_FILENAMES)
+
+for (file in FEMA_FILENAMES) {
+  # message(file)
+  
+  STAGING_FILES_TO_DELETE <- c()
+  
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- Step 1: Convert FGB to GeoJSON 
+  # -------------------------------------------------------------------------------------------------------------------
+ 
+  local_fema_path   <- paste0(FEMA_FGB_PATH, "/", file)
+  geojson_filename  <- gsub(".fgb", ".geojson", file)
+  geojson_save_path <- paste0(FEMA_GPKG_PATH, "/", geojson_filename)
+  
+  message("FEMA filename: '", file, "'")
+  message("Converting \n > '", file, "' to geojson '", geojson_filename, "'")
+  
+  geojson_exists  <- file.exists(geojson_save_path)
+  
+  message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  # Step 1.1 Run FGDB to GeoJSON conversion
+  ogr2ogr_command <- paste0("ogr2ogr ", geojson_save_path, " ", local_fema_path)
+  
+  if (OVERWRITE_FEMA_FILES || !geojson_exists) {
+    system(ogr2ogr_command)
+    message("Writing '", geojson_filename, "' to: \n > '", geojson_save_path, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, geojson_save_path)
+  }
+  
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- # Step 2: Clean GeoJSON
+  # -------------------------------------------------------------------------------------------------------------------
+ 
+  message("Simplify, dissolve, explode > '", geojson_filename, "'")
+  output_clean_filename <- gsub(".geojson", "_clean.geojson", geojson_filename)
+  output_clean_geojson_path     <- paste0(FEMA_GPKG_PATH, "/", output_clean_filename)
+  
+  clean_geojson_exists  <- file.exists(output_clean_geojson_path)
+  message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ', geojson_save_path, 
+                             ' -dissolve2 FLD_AR_ID \\', 
+                             ' -simplify 0.1 visvalingam \\', 
+                             ' -snap \\',
+                             ' -o ', output_clean_geojson_path)
+
+  # Step 2.1 Run simplify, dissolve, explode on cleaned GeoJSON
+  if (OVERWRITE_FEMA_FILES || !clean_geojson_exists) {
+    message("Running mapshaper 'simplify', 'dissolve', and 'explode' via CLI...")
+    system(mapshaper_command)
+    message("Writing '", output_clean_filename, "' to: \n > '", output_clean_geojson_path, "'")
+    
+    STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
+  }
+  
+  # -------------------------------------------------------------------------------------------------------------------
+  # ----  # Step 3: Convert cleaned GeoJSON to GeoPackage
+  # -------------------------------------------------------------------------------------------------------------------
+ 
+  message("Fema 100 year flood plain:\n > '", output_clean_filename, "'")
+  output_gpkg_filename  <- gsub("_clean.geojson", "_clean.gpkg", output_clean_filename)
+  output_gpkg_path      <- paste0(FEMA_GPKG_PATH, "/", output_gpkg_filename)
+  
+  message("Converting GEOJSON file to GPKG:\n > '", output_clean_filename, "' > '", output_gpkg_filename, "'")
+  
+  clean_gpkg_exists <- file.exists(output_gpkg_path)
+  message(" >>> '", output_gpkg_filename, "' already exists? ", clean_gpkg_exists)
+  message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  
+  ogr2ogr_command <- paste0("ogr2ogr -nlt MULTIPOLYGON ", output_gpkg_path, " ", output_clean_geojson_path)
+  
+  if (OVERWRITE_FEMA_FILES || !clean_gpkg_exists) {
+    system(ogr2ogr_command)
+    message("Writing '", output_gpkg_filename, "' to: \n > '", output_gpkg_path, "'")
+  }
+  
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- Step 4: Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
+  # -------------------------------------------------------------------------------------------------------------------
+ 
+  message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(output_gpkg_path), "'")
+  
+  fema <- sf::read_sf(output_gpkg_path)
+  fema <- resolve_internal_fema_boundaries(fema, output_gpkg_path)  
+
+  message("End time: ", Sys.time())
+  
+  if (OVERWRITE_FEMA_FILES) {
+    message("Writting '", basename(output_gpkg_path), "' to: \n > '", output_gpkg_path, "'")
+    sf::write_sf(
+      fema,
+      # fema_clean,
+      output_gpkg_path
+    )
+  }
+  
+
+  
+  # -------------------------------------------------------------------------------------------------------------------
+  # ---- Step 5: Delete intermediary files 
+  # -------------------------------------------------------------------------------------------------------------------
+  
+  message("Deleting intermediary files\n")
+  for (delete_file in STAGING_FILES_TO_DELETE) {
+    if (file.exists(delete_file)) {
+      message("Deleting >>> '", delete_file, "'")
+      file.remove(delete_file)
+    }
+    
+  }
+
+  rm(fema)
+
+  message()
+  
+}
+
+# -------------------------------------------------------------------------------------------------------------------
+# ---- Apply final dissolve/snap and removal of internal boundaries in FEMA geometries  ----
+# -------------------------------------------------------------------------------------------------------------------
+
+# # paths to FEMA 100 year flood plain files
+# FEMA_gpkg_paths      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+# for (file_path in FEMA_gpkg_paths) {
+#   message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
+  
+#   fema <- sf::read_sf(file_path)
+
+#   fema <- resolve_internal_fema_boundaries(fema) 
+  
+#   # fema <-
+#   #   fema[!sf::st_is_empty(fema), ] %>% 
+#   #   sf::st_transform(5070)
+  
+#   # fema <-
+#   #   fema %>% 
+#   #   # fema[!sf::st_is_empty(fema), ] %>% 
+#   #   dplyr::select(geometry = geom) %>%
+#   #   add_predicate_group_id(sf::st_intersects) %>% 
+#   #   sf::st_make_valid() %>% 
+#   #   dplyr::group_by(group_id) %>% 
+#   #   dplyr::summarise(
+#   #     geometry = sf::st_combine(sf::st_union(geometry))
+#   #   ) %>% 
+#   #   dplyr::ungroup() %>% 
+#   #   dplyr::select(-group_id) %>% 
+#   #   add_predicate_group_id(sf::st_intersects) %>% 
+#   #   rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+#   #   rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+#   #   dplyr::mutate(
+#   #     fema_id = as.character(1:dplyr::n())
+#   #   ) %>% 
+#   #   dplyr::select(fema_id, geometry)
+  
+#   # # mapview::mapview(fema, color = 'cyan', col.regions = "cyan") + 
+#   # # mapview::mapview(end_fema, color = 'red', col.regions = "white") 
+  
+#   # fema <- 
+#   #   fema %>% 
+#   #   dplyr::mutate(
+#   #     source = basename(file_path),
+#   #     state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+#   #   ) %>%
+#   #   dplyr::select(fema_id, source, state, 
+#   #                 # areasqkm, 
+#   #                 geometry)
+  
+#   message("End time: ", Sys.time())
+  
+#   if (OVERWRITE_FEMA_FILES) {
+#     message("Writting '", basename(file_path), "' to: \n > '", file_path, "'")
+#     sf::write_sf(
+#       # fema_clean,
+#       fema,
+#       file_path
+#     )
+#   }
+#   message()
+  
+# }
+
+# -------------------------------------------------------------------------------------
+# ---- Partion parts of each FEMA GPKGs to a Nextgen VPU ---- 
+# -------------------------------------------------------------------------------------
+
+# Clean FEMA GPKG files
+FEMA_CLEAN_GPKG_PATHS      <- list.files(FEMA_GPKG_PATH, full.names = TRUE)
+
+# paths to nextgen datasets and model attribute parquet files
+# CONUS_NEXTGEN_GPKG_PATH 
+
+# layer_info = sf::st_layers(CONUS_NEXTGEN_GPKG_PATH)
+# layer_info[layer_info$name == "flowpaths", "fields"]
+# layer_inf
+
+# flines = sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths")
+# flines %>% names()
+# (flines$vpuid  %>% unique()) %in% VPU_IDS
+# VPU_IDS
+# VPU_IDS %in% (flines$vpuid  %>% unique())
+
+# query the unique VPU_IDS from the flowlines layer from sf::read_sf(CONUS_NEXTGEN_GPKG_PATH, layer = "flowpaths")
+CONUS_VPU_IDS <- 
+            CONUS_NEXTGEN_GPKG_PATH  %>% 
+            sf::read_sf(query = "SELECT DISTINCT vpuid FROM flowpaths") %>%
+            dplyr::pull()
+
+
+# NEXTGEN_FILENAMES    <- list.files(NEXTGEN_DIR, full.names = FALSE)
+# NEXTGEN_FILE_PATHS   <- paste0(NEXTGEN_DIR, NEXTGEN_FILENAMES)
+
+
+for (file_path in FEMA_CLEAN_GPKG_PATHS) {
+   # i = 35
+  # file_path = FEMA_CLEAN_GPKG_PATHS[i]
+
+  fema_file <- basename(file_path)
+  
+  message("Partioning FEMA polygons by VPU: \n > FEMA gpkg: '", fema_file, "'")
+  
+  # read in fema polygons
+  fema <- sf::read_sf(file_path)
+  
+  for (vpu in CONUS_VPU_IDS) {
+    # j = 8 
+    # vpu = CONUS_VPU_IDS[j]
+    
+    # nextgen_basename <- basename(nextgen_path)
+    # vpu              <- unlist(regmatches(nextgen_basename, gregexpr("\\d+[A-Za-z]*", nextgen_basename)))
+    
+    message("VPU: ", vpu)   
+    # message("- nextgen gpkg:\n > '", nextgen_path, "'")   
+    message(" > Checking if '", fema_file, "' intersects with CONUS flowpaths in VPU '", vpu, "'")
+    
+    # read in nextgen flowlines 
+    flines <- 
+            CONUS_NEXTGEN_GPKG_PATH  %>% 
+            sf::read_sf(query = paste0("SELECT * FROM flowpaths WHERE vpuid = '", vpu, "'"))
+
+    # flines <- sf::read_sf(nextgen_path, layer = "flowpaths")
+    
+    # get the FEMA polygons that intersect with the nextgen flowlines
+    fema_intersect <- polygons_with_line_intersects(fema, flines)
+    
+    fema_in_nextgen <-  nrow(fema_intersect) != 0
+    
+    message("FEMA intersects with nextgen flowlines? ", fema_in_nextgen)
+    
+    if(fema_in_nextgen) {
+      
+      # create filepaths
+      vpu_subfolder      <- paste0("vpu-", vpu)
+      # vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder, "/states")
+      vpu_subfolder_path <- paste0(FEMA_BY_VPU_PATH, "/", vpu_subfolder)
+      
+      # vpu_subfolder_path <- FEMA_VPU_SUBFOLDERS[grepl(vpu_subfolder, FEMA_VPU_SUBFOLDERS)]
+      
+      fema_intersect <-
+        fema_intersect %>%
+        dplyr::mutate(
+          vpu = vpu
+        ) %>%
+        dplyr::select(vpu, fema_id, source, state, geom)
+      
+      # state <- gsub("-100yr-flood_valid_clean.gpkg", "", fema_file)
+      
+      # BASE_DIRS_LIST$fema_by_vpu_subsets_dirs
+      
+      fema_vpu_filename <- gsub(".gpkg", paste0("_", vpu, ".gpkg"), fema_file)
+      fema_vpu_path     <- paste0(vpu_subfolder_path, "/subsets/", fema_vpu_filename)
+      
+      
+      if (OVERWRITE_FEMA_FILES) {
+        message("Writting '", basename(fema_vpu_filename), "' to: \n > '", fema_vpu_path, "'")
+        
+        sf::write_sf(
+          fema_intersect,
+          fema_vpu_path
+        )
+      }
+      
+      
+    }
+    message()
+  } 
+  
+  message(
+    "--------------------------------------------------------------\n", 
+    "Completed all VPU intersections for: \n > '", fema_file, "'",
+    "\n--------------------------------------------------------------\n"
+  )
+  
+}
+
+# for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) {
+#   vpu_subdir <- BASE_DIRS_LIST$fema_by_vpu_subdirs[i]
+#   
+#   message(i, " - ", vpu_subdir)
+#   
+#   files_to_move = list.files(vpu_subdir, pattern = ".gpkg", full.names = T)
+#   
+#   for (file in files_to_move) {
+#     new_path <- paste0(vpu_subdir, "/subsets/",     basename(file))
+#     message(" > ", file, "\n > ", new_path, "\n")
+#     fs::file_move(
+#       file, 
+#       new_path
+#     )
+#   }
+# }
+
+# -------------------------------------------------------------------------------------
+# ---- Loop through each VPU subfolder and merge all of the Geopackages into one---- 
+# -------------------------------------------------------------------------------------
+# BASE_DIRS_LIST$fema_by_vpu_subdirs
+# BASE_DIRS_LIST$fema_by_vpu_subsets_dirs
+# for (vpu_dir in FEMA_VPU_SUBFOLDERS) {
+BASE_DIRS_LIST$fema_by_vpu_subdirs
+
+for (i in seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)) {
+  # for (i in 1:4) {
+  # i = 8
+  
+  vpu_dir <-  BASE_DIRS_LIST$fema_by_vpu_subdirs[i]
+  # vpu_dir <-  FEMA_VPU_SUBFOLDERS[i]
+  
+  vpu_subset_dir <- BASE_DIRS_LIST$fema_by_vpu_subsets_dirs[i]
+  
+  message("Merging files in '", basename(vpu_dir), "' directory...")
+  vpu_subdirs    <- list.files(vpu_subset_dir, full.names = TRUE)
+  # vpu_subdirs    <- list.files(vpu_dir, full.names = TRUE)
+  
+  # message(paste0("\n > ", basename(vpu_subdirs), collapse = ""))
+  
+  # path to the merged directory where the final merged geopackge will end up
+  master_name       <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_gpkg_name  <- paste0(master_name, ".gpkg")
+  master_filepath   <- paste0(vpu_dir, "/merged/", master_gpkg_name)
+  
+  # if the file already exists, remove it so we dont OVER append data to the "master file"
+  if (file.exists(master_filepath)) {
+    file.remove(master_filepath)
+  }
+  
+  # fema state geopackages partioned for the specific VPU
+  fema_state_gpkgs        <- list.files(vpu_subset_dir, full.names = TRUE)
+  # master_output_filepath  <- paste0(vpu_dir, "/", gsub(".gpkg", "_output.gpkg", master_gpkg_name))
+  # 
+  # # make sure to ignore the master file if it already exists
+  # fema_state_gpkgs <- fema_state_gpkgs[fema_state_gpkgs != master_filepath & fema_state_gpkgs != master_output_filepath]
+  
+  for(gpkg_file in fema_state_gpkgs) {
+    # message(" - Appending '", basename(gpkg_file), "' to master FEMA VPU gpkg:\n  >  '",
+    #         basename(gpkg_file), " > ", basename(master_filepath),
+    #         "'")
+    message(" > '",
+            basename(gpkg_file), " > ", basename(master_filepath),
+            "'")
+
+    ogr2ogr_merge_command <- paste0("ogr2ogr -f 'gpkg' -append -nln ", master_name, " ",
+                                    master_filepath,
+                                    " ", gpkg_file
+    )
+
+    # if (OVERWRITE_FEMA_FILES) {
+      system(ogr2ogr_merge_command)
+    # }
+      
+  }
+  
+  # has_fema_state_gpkgs <- length(fema_state_gpkgs) > 0
+  # 
+  # if(DELETE_STAGING_GPKGS && has_fema_state_gpkgs) {
+  #   message(" - Deleting individual gpkgs from '", vpu_dir, "' directory...")
+  #   remove_gpkg_cmds <- paste0("rm ", fema_state_gpkgs)
+  #   
+  #   for (remove_cmd in remove_gpkg_cmds) {
+  #     message("  >  '", remove_cmd, "'")
+  #     system(remove_cmd)
+  #   }
+  # }
+  
+  # message()
+  message("Merge complete!")
+  message("Merged '", basename(vpu_dir), "' FEMA output geopackage:\n --> '", master_filepath, "'")
+  message()
+  
+}
+# -------------------------------------------------------------------------------------
+# ----Apply simplify, dissolve, explode on the MERGED polygons  ---- 
+# -------------------------------------------------------------------------------------
+
+# # NOTE: remove past runs for testing...
+# for (i in list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS, full.names = T))]) {
+#   file.remove(i)
+# }
+# seq_along(BASE_DIRS_LIST$fema_by_vpu_subdirs)
+# rm(STAGING_FILES_TO_DELETE, i, VPU, vpu_dir)
+
+for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+
+  # i = 8
+  # FEMA_VPU_SUBFOLDERS
+  STAGING_FILES_TO_DELETE <- c()
+  
+  vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
+  VPU        <- basename(vpu_dir)
+  
+  message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
+  
+  # path to the merged directory where the final merged geopackage will end up
+  master_name       <- paste0("fema-", gsub("VPU", "vpu", basename(vpu_dir)))
+  master_gpkg_name  <- paste0(master_name, ".gpkg")
+  master_filepath   <- paste0(vpu_dir, "/merged/", master_gpkg_name)
+  
+  master_geojson_name       <- paste0(master_name, ".geojson")
+  master_geojson_filepath   <- paste0(vpu_dir, "/merged/", master_geojson_name)
+  
+  # updated_gpkg_name  <- gsub(".gpkg", "-output.gpkg", master_gpkg_name)
+  updated_gpkg_name  <- master_gpkg_name
+  updated_filepath   <- paste0(vpu_dir, "/output/", master_gpkg_name)
+  
+  updated_gpkg_exists  <- file.exists(updated_filepath)
+  
+  # remove output file if it already exists
+  if (updated_gpkg_exists) {
+    file.remove(updated_filepath)
+  }
+  
+  message("VPU Merged FEMA filename: '", master_gpkg_name, "'")
+  message("> Simplifying, dissolve, exploding VPU aggregated FEMA polygons... '", basename(master_filepath), "'")
+  
+  if(!file.exists(master_filepath)) { 
+    message("No FEMA geometries in '", VPU, "'")
+    message()
+    next
+  }
+  
+  message("Converting \n > '", basename(master_filepath), "' to geojson '", master_geojson_name, "'")
+  
+  geojson_exists  <- file.exists(master_geojson_filepath)
+  
+  # message(" >>> '", geojson_filename, "' already exists? ", geojson_exists)
+  # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+  gpkg_to_geojson_cmd <- paste0("ogr2ogr -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath)
+  # gpkg_to_geojson_cmd <- paste0("ogr2ogr -f GEOJSON -s_srs EPSG:5070 -t_srs EPSG:5070 ", master_geojson_filepath, " ", master_filepath)
+  # gpkg_to_geojson_cmd <- paste0("ogr2ogr ", master_geojson_filepath, " ", master_filepath)
+  # file.remove(master_geojson_filepath)
+  
+  # if (OVERWRITE_FEMA_FILES || !geojson_exists) {
+  system(gpkg_to_geojson_cmd)
+  message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
+  
+  STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, master_geojson_filepath)
+  # }
+  
+  # master_gj <- sf::read_sf(master_geojson_filepath)
+  # master_gpkg <- sf::read_sf(master_filepath)
+
+  # Clean GeoJSON
+  message("Simplify, dissolve, explode > '", master_geojson_name, "'")
+  output_clean_filename      <- gsub(".geojson", "_clean.geojson", master_geojson_name)
+  output_clean_geojson_path  <- paste0(vpu_dir, "/merged/", output_clean_filename)
+  
+  clean_geojson_exists  <- file.exists(output_clean_geojson_path)
+  message(" >>> '", output_clean_filename, "' already exists? ", clean_geojson_exists)
+  # message(" >>> Overwrite? ", OVERWRITE_FEMA_FILES)
+
+  if (clean_geojson_exists) {
+    file.remove(output_clean_geojson_path)
+  }
+  
+  mapshaper_command = paste0('node  --max-old-space-size=16000 /opt/homebrew/bin/mapshaper ',
+                             master_geojson_filepath, 
+                             # ' -clean \\', 
+                             # ' -explode \\',
+                             # ' -dissolve2 \\',
+                             ' -simplify 0.3 visvalingam \\', 
+                             ' -snap \\',
+                             ' -explode \\',
+                             ' -clean \\',
+                             # ' -proj EPSG:5070 \\',
+                             ' -o ', output_clean_geojson_path
+  )
+  
+  system(mapshaper_command)
+  # message("Writing '", master_geojson_name, "' to: \n > '", master_geojson_filepath, "'")
+  
+  STAGING_FILES_TO_DELETE <- c(STAGING_FILES_TO_DELETE, output_clean_geojson_path)
+  # output_clean_gpkg_filename   <- gsub(".geojson", ".gpkg", master_geojson_name)
+  # output_clean_gpkg_path       <- paste0(vpu_dir, "/merged/", output_clean_gpkg_filename)
+  
+  # fema_vpu <- sf::read_sf(master_filepath)
+  # geojson_to_gpkg_cmd <- paste0("ogr2ogr -f GPKG ", updated_filepath, " ", output_clean_geojson_path)
+  geojson_to_gpkg_cmd <- paste0("ogr2ogr -nlt MULTIPOLYGON -s_srs EPSG:5070 -t_srs EPSG:5070  ", 
+                                updated_filepath, " ", 
+                                output_clean_geojson_path)
+  # geojson_to_gpkg_cmd <- paste0("ogr2ogr ", updated_filepath, " ", output_clean_geojson_path)
+
+  # updated_gpkg_exists  <- file.exists(updated_filepath)
+  # if (updated_gpkg_exists) {
+  #   file.remove(updated_filepath)
+  # }
+  
+  
+  # if (OVERWRITE_FEMA_FILES || !updated_gpkg_exists) {
+  system(geojson_to_gpkg_cmd)
+  message("Writing '", updated_gpkg_name, "' to: \n > '", updated_filepath, "'")
+  # }
+
+  # sf::st_layers(updated_filepath)
+  
+  # mapview::npts(fema)
+  fema  <- 
+    sf::read_sf(updated_filepath) %>%
+    # sf::read_sf(output_clean_geojson_path) %>%
+    # rmapshaper::ms_explode(sys=TRUE, sys_mem = 16) %>% 
+    dplyr::mutate(
+      vpu      = gsub("vpu-", "", VPU),
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(
+      vpu,
+      fema_id, 
+      geom
+    )
+  
+  # fema %>% 
+  #   rmapshaper::ms_simplify(keep = 0.5, keep_shapes = T) %>% 
+  #   dplyr::group_by(fema_id) %>% 
+  #   dplyr::mutate(pts = mapview::npts(geom)) %>% 
+  #   dplyr::arrange(-pts) 
+  
+  # remove before writting updated version
+  file.remove(updated_filepath)
+  
+  sf::write_sf(
+    fema, 
+    updated_filepath, 
+    append = FALSE
+  )
+  
+  message("Deleting intermediary files\n")
+  for (delete_file in STAGING_FILES_TO_DELETE) {
+    if (file.exists(delete_file)) {
+      message("Deleting >>> '", delete_file, "'")
+      file.remove(delete_file)
+    }
+    
+  }
+}
+
+
+# -------------------------------------------------------------------------------------
+# ---- Store all FEMA layers in a single conus_fema.gpkg 
+# -------------------------------------------------------------------------------------
+
+FEMA_VPU_SUBFOLDERS
+
+all_fema_vpu_layers <- list.files(BASE_DIRS_LIST$fema_by_vpu_output_dirs, full.names = TRUE)
+
+# fema_vpu_layers <- list.files(FEMA_VPU_SUBFOLDERS, full.names = T)[grepl("_output.gpkg", list.files(FEMA_VPU_SUBFOLDERS))]
+
+combine_gpkg_files(all_fema_vpu_layers, CONUS_FEMA_GPKG_PATH)
+  
+
+# -------------------------------------------------------------------------------------
+
+} else {
+  message("'", basename(CONUS_FEMA_GPKG_PATH), "'already exists at \n> ", CONUS_FEMA_GPKG_PATH)
+}
+
+# -------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+
+# # -------------------------------------------------------------------------------------
+# # ---- Union each VPU geopackage (either on state or just touching predicate) ---- 
+# # -------------------------------------------------------------------------------------
+# 
+# for (i in 1:length(FEMA_VPU_SUBFOLDERS)) {
+#   
+#   vpu_dir    <- FEMA_VPU_SUBFOLDERS[i]
+#   VPU        <- basename(vpu_dir)
+#   
+#   message(i, " - Attempting to union FEMA polygons for '", VPU, "'...")
+#   
+#   # path to the merged directory where the final merged geopackage will end up
+#   master_name       <- paste0("fema_", gsub("VPU", "vpu", basename(vpu_dir)))
+#   master_gpkg_name  <- paste0(master_name, ".gpkg")
+#   master_filepath   <- paste0(vpu_dir, "/", master_gpkg_name)
+#   
+#   updated_gpkg_name  <- gsub(".gpkg", "_output.gpkg", master_gpkg_name)
+#   updated_filepath   <- paste0(vpu_dir, "/", updated_gpkg_name)
+#   
+#   message("> Re-unioning and re-exploding geometries in '", basename(master_filepath), "'")
+#   
+#   if(!file.exists(master_filepath)) { 
+#     message("No FEMA geometries in '", VPU, "'")
+#     message()
+#     next
+#   }
+#   
+#   
+#   fema_vpu <- sf::read_sf(master_filepath)
+#   
+#   # fema_vpu %>% sf::st_geometry_type() %>% unique()
+#   # fema_vpu %>% mapview::npts()
+#   # fema_vpu %>% sf::st_is_valid() %>% all()
+#   # fema_vpu %>% 
+#   #   sf::st_make_valid() %>% 
+#   #   sf::st_geometry_type() %>% 
+#   #   unique()
+#   
+#   geom_type_counts <- table(sf::st_geometry_type(fema_vpu))
+#   
+#   message("Geometry counts before casting all geometries to MULTIPOLYGON:")
+#   for (g in seq_along(geom_type_counts)) {
+#     message(" > ", names(geom_type_counts[g]), ": ", geom_type_counts[g])
+#   }
+#   
+#   # mapview::mapview(fema_vpu, color = 'red', col.regions = 'white') +
+#   #       mapview::mapview(fema_union, color = 'green', col.regions = 'white')
+#   
+#   # fema_vpu %>% 
+#   #   sf::st_make_valid() %>% 
+#   #   dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+#   #   sf::st_is_valid() %>% 
+#   #   all()
+#   
+#   tryCatch({
+#     
+#     fema_vpu <- 
+#       fema_vpu %>% 
+#       nngeo::st_remove_holes(max_area = 200) %>% 
+#       # sf::st_make_valid() %>%
+#       # dplyr::filter(sf::st_geometry_type(geom) %in% c("POLYGON", "MULTIPOLYGON")) %>% 
+#       add_predicate_group_id(sf::st_intersects) %>% 
+#       dplyr::group_by(group_id) %>%
+#       rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+#       rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+#       dplyr::ungroup() %>% 
+#       nngeo::st_remove_holes(max_area = 200) %>% 
+#       dplyr::mutate(      
+#         vpu      = gsub("VPU_", "", VPU),
+#         fema_id  = as.character(1:dplyr::n())
+#       ) %>% 
+#       dplyr::select(
+#         vpu, fema_id,
+#         # state, 
+#         geom = geometry
+#       )
+#     
+#   }, error = function(e) {
+#     message(VPU, " threw into the following error \n ", e)
+#     message(" > Cleaning ", VPU, " using a backup cleaning strategy...")
+#     
+#     fema_vpu <- 
+#       fema_vpu %>% 
+#       sf::st_make_valid() %>% 
+#       dplyr::mutate(      
+#         vpu      = gsub("VPU_", "", VPU),
+#         fema_id  = as.character(1:dplyr::n())
+#       ) %>% 
+#       dplyr::select(
+#         vpu, fema_id,
+#         # state, 
+#         geom
+#       )
+#   
+#     })
+#   
+#   if (OVERWRITE_FEMA_FILES) {
+#     
+#     message("> Overwritting '", basename(master_filepath), "' with final clean version...")
+#     
+#     # union_file_path <- gsub(".gpkg", "_union.gpkg", fema_vpu_file)
+#     # message("> writting '", basename(union_file_path), "' (unioned and exploded version)")
+#     
+#     sf::write_sf(
+#       fema_vpu,
+#       updated_filepath
+#       # master_filepath
+#       # union_file_path
+#     )
+#   }
+#   message()
+# }
diff --git a/runners/cs_runner2/utils.R b/runners/cs_runner2/utils.R
new file mode 100644
index 0000000..5413d5c
--- /dev/null
+++ b/runners/cs_runner2/utils.R
@@ -0,0 +1,2680 @@
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+#     ├── dem/
+#         ├── vrt/
+#         ├── tif/
+#     ├── cs-extension-polygons/
+create_local_hydrofabric_base_dirs <- function(base_dir) {
+  # base_dir <- BASE_DIR
+  
+  # build paths
+  hydrofabric_dir <- paste0(base_dir, "/hydrofabric")
+  
+  # DEM dirs
+  dem_dir                    <- file.path(base_dir, "dem")
+  dem_vrt_dir                <- file.path(dem_dir, "vrt")
+  # dem_tif_dir                <- file.path(dem_dir, "tif")
+  
+  # polygons for transect extensions
+  cs_extension_polygons_dir     <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  fema_dir                      <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  # create base directories
+  create_if_not_exists(base_dir)
+  create_if_not_exists(hydrofabric_dir)
+  
+  # DEM dirs
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(dem_vrt_dir)
+  # create_if_not_exists(dem_tif_dir)
+  
+  # extension polygons
+  create_if_not_exists(cs_extension_polygons_dir)
+  
+  
+  # Create FEMA folders
+  create_if_not_exists(fema_dir)
+  create_if_not_exists(fema_fgb_dir)
+  create_if_not_exists(fema_geojson_dir)
+  create_if_not_exists(fema_clean_dir)
+  create_if_not_exists(fema_gpkg_dir)
+  create_if_not_exists(fema_by_vpu_dir)
+  
+  for (path in fema_by_vpu_subdirs) {
+    create_if_not_exists(path)
+    create_if_not_exists(file.path(path, "subsets"))
+    create_if_not_exists(file.path(path, "merged"))
+    create_if_not_exists(file.path(path, "output"))
+  }
+  
+}
+
+get_vpu_ids <- function() {
+  VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+                            '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+  # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+  
+  return(VPU_IDS)
+  
+}
+
+# # Base directory for local file storage
+# BASE_DIR    <- '/Volumes/T7SSD/lynker-spatial'
+# base_dir <- BASE_DIR
+# # FEMA100 year flood map FGB save location (temporary, will be deleted after processing)
+# FEMA_FGB_PATH        <- file.path(BASE_DIRS_LIST$fema_dir, "fema_fgb")
+# FEMA_GEOJSON_PATH    <- file.path(BASE_DIRS_LIST$fema_dir, "fema_geojson")
+# FEMA_CLEAN_PATH      <- file.path(BASE_DIRS_LIST$fema_dir, "fema_clean")
+# FEMA_GPKG_PATH       <- file.path(BASE_DIRS_LIST$fema_dir, "fema_gpkg")
+# FEMA_BY_VPU_PATH     <- file.path(BASE_DIRS_LIST$fema_dir, "FEMA_BY_VPU")
+# 
+# 
+# VPU_IDS              <- c('01', '02', '03N', '03S', '03W', '04', '05', '06', '07', '08', '09', 
+#                           '10L', '10U', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21')
+# # VPU_IDS              <- sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID
+# 
+# paste0("'", sf::st_drop_geometry(nhdplusTools::get_boundaries())$VPUID, "'", collapse = ", ")
+# FEMA_VPU_SUBFOLDERS  <- paste0(FEMA_BY_VPU_PATH, "/VPU_", VPU_IDS)
+
+# Create an empty file structure for a new version within a specified base_dir
+# base_dir: character, top level directory path
+# Directory tree:
+# base_dir/
+#   └── lynker-spatial/
+#     ├── hydrofabric/
+#     ├── version_number/
+#         ├── network/
+#         ├── transects/
+#         ├── cross-sections/
+#         ├── dem/
+#         ├── dem-ml/
+#         ├── dem-coastal-bathy/
+#         ├── dem-points/
+create_new_version_dirs <- function(base_dir, version, with_output = FALSE) {
+  # version = "v3.0"
+  # base_dir <- BASE_DIR
+  
+  # build paths
+  hydrofabric_dir   <- paste0(base_dir, "/hydrofabric")
+  version_base_dir  <- paste0(hydrofabric_dir, "/", version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- paste0(version_base_dir, "/ml")
+  
+  # reference features 
+  ref_features_dir           <- paste0(version_base_dir, "/reference-features")
+  
+  # conus network gpkg
+  network_dir                <- paste0(version_base_dir, "/network")
+  
+  # transects
+  transects_dir                  <- paste0(version_base_dir, "/transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- paste0(version_base_dir, "/cross-sections")
+  cross_sections_dem_dir             <- paste0(cross_sections_dir, "/dem")
+  cross_sections_ml_dir              <- paste0(cross_sections_dir, "/dem-ml")
+  cross_sections_coastal_bathy_dir   <- paste0(cross_sections_dir, "/dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- paste0(cross_sections_dir, "/dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
+  
+  
+  if(with_output) {
+    output_dir       <- paste0(version_base_dir, "/outputs")
+  }
+  
+  # create version BASE dir
+  create_if_not_exists(version_base_dir)
+  
+  # CONUS dir
+  create_if_not_exists(network_dir)
+  
+  # ML data
+  create_if_not_exists(ml_dir)
+  
+  # reference features data
+  create_if_not_exists(ref_features_dir)
+  
+  # transects
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(transects_base_dir)
+  create_if_not_exists(transects_fema_extended_dir)
+  create_if_not_exists(transects_cs_extended_dir)
+  create_if_not_exists(transects_output_dir)
+  
+  # CS pts
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cross_sections_dem_dir)
+  create_if_not_exists(cross_sections_ml_dir)
+  create_if_not_exists(cross_sections_coastal_bathy_dir)
+  create_if_not_exists(cross_sections_dem_pts_dir)
+  create_if_not_exists(cross_sections_output_dir)
+  
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
+}
+
+create_if_not_exists <- function(dir_path) {
+  if (!dir.exists(dir_path)) {
+    dir.create(dir_path, recursive = TRUE)
+    message("Created directory: '", dir_path, "'\n")
+  }
+}
+
+# get a list of top level directories for main directory
+get_base_dir_paths <- function(base_dir) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir  <- file.path(base_dir, "hydrofabric")
+  
+  dem_dir          <- file.path(base_dir, "dem")
+  dem_vrt_dir      <- file.path(base_dir, "dem", "vrt")
+  # dem_tif_dir      <- file.path(base_dir, "dem", "tif")
+  
+  cs_extension_polygons_dir <- file.path(base_dir, "cs-extension-polygons")
+  
+  # FEMA data 
+  
+  fema_dir         <- file.path(base_dir, "fema")
+  
+  fema_fgb_dir     <- file.path(fema_dir, "fema-fgb")
+  fema_geojson_dir <- file.path(fema_dir, "fema-geojson")
+  fema_clean_dir   <- file.path(fema_dir, "fema-clean")
+  fema_gpkg_dir    <- file.path(fema_dir, "fema-gpkg")
+  
+  # BY VPU folders 
+  VPU_IDS               <- get_vpu_ids()
+  fema_by_vpu_dir       <- file.path(fema_dir, "fema-by-vpu")
+  fema_by_vpu_subdirs   <- paste0(fema_by_vpu_dir, "/vpu-", VPU_IDS)
+  
+  fema_by_vpu_subsets_dirs <- file.path(fema_by_vpu_subdirs, "subsets")
+  fema_by_vpu_merged_dirs  <- file.path(fema_by_vpu_subdirs, "merged")
+  fema_by_vpu_output_dirs  <- file.path(fema_by_vpu_subdirs, "output")
+  
+  return(
+      list(
+      hydrofabric_dir = hydrofabric_dir,
+      dem_dir = dem_dir,
+      dem_vrt_dir = dem_vrt_dir, 
+      # dem_tif_dir = dem_tif_dir,
+      cs_extension_polygons_dir = cs_extension_polygons_dir,
+      fema_dir = fema_dir,
+      fema_fgb_dir = fema_fgb_dir,
+      fema_geojson_dir = fema_geojson_dir,
+      fema_clean_dir = fema_clean_dir,
+      fema_gpkg_dir = fema_gpkg_dir,
+      fema_by_vpu_dir = fema_by_vpu_dir,
+      fema_by_vpu_subdirs = fema_by_vpu_subdirs,
+      fema_by_vpu_subsets_dirs = fema_by_vpu_subsets_dirs,
+      fema_by_vpu_merged_dirs = fema_by_vpu_merged_dirs,
+      fema_by_vpu_output_dirs = fema_by_vpu_output_dirs
+    )
+  )
+}
+
+# get list of a specific directories in a version directory
+get_version_base_dir_paths <- function(base_dir, version) {
+  # base_dir = BASE_DIR
+  # version = "v3.0"
+  
+  hydrofabric_dir   <- file.path(base_dir, "hydrofabric")
+  
+  version_base_dir  <- file.path(hydrofabric_dir, version)
+  
+  # polygons for transect extensions
+  ml_dir                     <- file.path(version_base_dir, "ml")
+  
+  # reference features 
+  ref_features_dir           <- file.path(version_base_dir, "reference-features")
+  
+  # conus network gpkg
+  network_dir                <- file.path(version_base_dir, "network")
+  
+  # transects
+  transects_dir                  <- file.path(version_base_dir, "transects")
+  transects_base_dir             <- paste0(transects_dir, "/base")
+  transects_fema_extended_dir    <- paste0(transects_dir, "/extended-by-fema")
+  transects_cs_extended_dir      <- paste0(transects_dir, "/extended-by-cs-attributes")
+  transects_output_dir           <- paste0(transects_dir, "/output")
+  
+  # cross sections dirs
+  cross_sections_dir                 <- file.path(version_base_dir, "cross-sections")
+  cross_sections_dem_dir             <- file.path(cross_sections_dir, "dem")
+  cross_sections_ml_dir              <- file.path(cross_sections_dir, "dem-ml")
+  cross_sections_coastal_bathy_dir   <- file.path(cross_sections_dir, "dem-coastal-bathy")
+  cross_sections_dem_pts_dir         <- file.path(cross_sections_dir, "dem-points")
+  cross_sections_output_dir          <- paste0(cross_sections_dir, "/output")
+  
+  return(
+    list(
+      hydrofabric_dir    = hydrofabric_dir,
+      version_base_dir   = version_base_dir,
+      
+      ref_features_dir   = ref_features_dir, 
+      
+      network_dir        = network_dir,
+      
+      ml_dir = ml_dir,
+      
+      transects_dir      = transects_dir,
+      transects_base_dir = transects_base_dir,
+      transects_fema_extended_dir = transects_fema_extended_dir,
+      transects_cs_extended_dir = transects_cs_extended_dir,
+      transects_output_dir = transects_output_dir,
+      
+      cross_sections_dir = cross_sections_dir,
+      cross_sections_dem_dir     = cross_sections_dem_dir,
+      cross_sections_dem_pts_dir = cross_sections_dem_pts_dir,
+      cross_sections_ml_dir      = cross_sections_ml_dir,
+      cross_sections_coastal_bathy_dir = cross_sections_coastal_bathy_dir,
+      cross_sections_output_dir = cross_sections_output_dir
+      
+    )
+  )
+}
+
+
+combine_gpkg_files <- function(gpkg_paths, output_gpkg) {
+  
+  layer_counter <- list()
+  
+  for (gpkg_path in gpkg_paths) {
+    
+    base_name <- tools::file_path_sans_ext(basename(gpkg_path))
+    # base_name <- gsub("_output.gpkg", "", basename(gpkg_path))
+    
+    if (base_name %in% names(layer_counter)) {
+      layer_counter[[base_name]] <- layer_counter[[base_name]] + 1
+      layer_name <- paste0(base_name, "_", layer_counter[[base_name]])
+    } else {
+      
+      layer_counter[[base_name]] <- 1
+      layer_name <- base_name
+    }
+    
+    tryCatch({
+      sf_layer <- st_read(gpkg_path, quiet = TRUE)
+      
+      sf::st_write(sf_layer, 
+                   dsn = output_gpkg, 
+                   layer = layer_name, 
+                   append = TRUE,
+                   quiet = TRUE)
+      
+      message("Successfully added '", basename(gpkg_path), "' as layer: '", layer_name, "' to\n > '", output_gpkg, "'")
+      
+    }, error = function(e) {
+      warning("Error processing: ",  basename(gpkg_path))
+      warning(e)
+    })
+  }
+}
+
+get_transect_filenames <- function(vpu, sep = "-", ext = ".gpkg") {
+  # ext <- ".gpkg" 
+  # sep = "_"
+  transects_base_path          <- paste0(paste0(c(vpu, "transects", "base"), collapse = sep) , ext)
+  transects_fema_extended_path <- paste0(paste0(c(vpu, "transects", "extended", "by", "fema"), collapse = sep) , ext)
+  transects_cs_extended_path   <- paste0(paste0(c(vpu, "transects", "extended", "by", "cs", "attributes"), collapse = sep) , ext)
+  transects_output_path        <- paste0(paste0(c(vpu, "transects"), collapse = sep) , ext)
+  
+  return(
+    list(
+      transects_base_path = transects_base_path,
+      transects_fema_extended_path = transects_fema_extended_path,
+      transects_cs_extended_path = transects_cs_extended_path,
+      transects_output_path = transects_output_path
+    )
+  )
+}
+
+get_cross_section_filenames <- function(vpu, sep = "-", ext = ".parquet") {
+  # ext <- ".parquet" 
+  # sep = "_"
+  cs_pts_base_path           <- paste0(paste0(c(vpu, "cross", "sections", "base"), collapse = sep) , ext)
+  cs_pts_fema_extended_path  <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "fema"), collapse = sep) , ext)
+  cs_pts_cs_extended_path    <- paste0(paste0(c(vpu, "cross", "sections", "extended", "by", "cs", "attributes"), collapse = sep) , ext)
+  cs_pts_output_path         <- paste0(paste0(c(vpu, "cross", "sections"), collapse = sep) , ext)
+  
+  return(
+    list(
+      cs_pts_base_path = cs_pts_base_path,
+      cs_pts_fema_extended_path = cs_pts_fema_extended_path,
+      cs_pts_cs_extended_path = cs_pts_cs_extended_path,
+      cs_pts_output_path = cs_pts_output_path
+    )
+  )
+}
+
+# derive the FEMA VPU layer name from the conus_fema.gpkg
+get_fema_conus_layer_name <- function(vpu) {
+  
+  return(paste0("fema-vpu-", vpu))
+  
+}
+
+list_s3_objects <- function(s3_bucket, pattern = NULL, aws_profile = NULL) {
+  
+  profile_option <- if (!is.null(aws_profile)) paste0("--profile ", aws_profile) else ""
+  
+  if (is.null(pattern) || pattern == "") {
+    grep_command <- ""  # no filtering if empty or NULL
+  } else {
+    grep_command <- paste0(" | grep -E \"", pattern, "\"")  # grep if a pattern is given
+  }
+  
+  cmd <- paste0(
+    '#!/bin/bash\n',
+    'S3_BUCKET="', s3_bucket, '"\n',
+    'PATTERN="', pattern, '"\n',
+    'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\' | grep -E "$PATTERN")\n',
+    'echo "$S3_OBJECTS"'
+  )
+  # cmd <- paste0(
+  #   '#!/bin/bash\n',
+  #   'S3_BUCKET="', s3_bucket, '"\n',
+  #   'S3_OBJECTS=$(aws s3 ls "$S3_BUCKET" ', profile_option, ' | awk \'{print $4}\'', grep_command, ')\n',
+  #   'echo "$S3_OBJECTS"'
+  # )
+  ls_output <- system(cmd, intern = TRUE)
+  return(ls_output)
+}
+
+download_tiles <- function(tile_paths, output_dir) {
+  # output_dir <- DEM_TIF_DIR
+  # tile_paths
+  # error_tiles <- 
+  tif_save_paths       <- paste0(output_dir, "/", basename(tile_paths))
+  
+  error_tiles <- data.frame(
+    tile   = basename(tile_paths),
+    status = TRUE
+  )
+  
+  for (i in seq_along(tile_paths)) {
+    # i = 1
+    tile_path         <- tile_paths[i]
+    tif_save_path     <- tif_save_paths[i]
+    
+    message("[", i, "]", 
+            "\n > Tile: ", basename(tile_path),
+            "\n > Output path: ", tif_save_path
+    )
+    
+    download_tif_cmd    <- paste0("curl -o ", tif_save_path, " ", tile_path)
+    
+    tryCatch({
+      
+      tif_download_output <- system(download_tif_cmd, intern = TRUE)
+      message("   > Succesfully downloaded tile: ", basename(tile_path))
+      
+    }, error = function(e) {
+      
+      message("Error downloading tile: ", basename(tile_path))
+      message("ERROW below: \n  ", e)
+      
+      error_tiles[error_tiles$tile == basename(tile_path), ] <- FALSE
+      
+    })
+    
+  }
+  
+  return(error_tiles)
+  
+}
+
+# Given 2 character vectors of filenames both including VPU strings after a "nextgen_" string, match them together to
+# make sure they are aligned and in the same order
+# x is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# y is a character vector of file paths with a VPU ID preceeded by a "nextgen_" string 
+# base is a character vector of the base directory of the files. Defaults to NULL
+# Returns a dataframe with VPU, x, and y columns
+align_files_by_vpu <- function(
+    x, 
+    y, 
+    base = NULL
+) {
+
+  # Regular expression pattern to match numeric pattern after "nextgen_" and remove everything after the ending period
+  regex_pattern <- "nextgen_(\\d+[A-Za-z]?).*"
+  
+  # path dataframe for X filepaths
+  x_paths <- data.frame(x = x)
+  
+  # path dataframe for Y filepaths
+  y_paths <- data.frame(y = y)
+  
+  # generate VPU IDs based on file path regular expression matching with "regex_pattern" above
+  x_paths$vpu <- gsub(regex_pattern, "\\1", x_paths$x)
+  y_paths$vpu <- gsub(regex_pattern, "\\1", y_paths$y)
+  
+  # match paths based on VPU column
+  matched_paths <- dplyr::left_join(
+    x_paths,
+    y_paths,
+    by = "vpu"
+  ) 
+  
+  # reorder columns
+  matched_paths <- dplyr::relocate(matched_paths, vpu, x, y)
+  
+  if(!is.null(base)) {
+    matched_paths$base_dir <- base
+  }
+  
+  return(matched_paths)
+  
+}
+
+# -------------------------------------------------------------------------------------
+# FEMA processing functions: 
+# -------------------------------------------------------------------------------------
+
+resolve_internal_fema_boundaries <- function(fema, source_file = "") {
+#   message("Resolving internal boundaries, islands, and topology issues:\n > '", basename(file_path), "'")
+  
+#   fema <- sf::read_sf(file_path)
+  
+  fema <-
+    fema[!sf::st_is_empty(fema), ] %>% 
+    sf::st_transform(5070)
+  
+  fema <-
+    fema %>% 
+    dplyr::select(geometry = geom) %>%
+    add_predicate_group_id(sf::st_intersects) %>% 
+    sf::st_make_valid() %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::summarise(
+      geometry = sf::st_combine(sf::st_union(geometry))
+    ) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-group_id) %>% 
+    add_predicate_group_id(sf::st_intersects) %>% 
+    rmapshaper::ms_dissolve(sys = TRUE, sys_mem = 16) %>% 
+    rmapshaper::ms_explode(sys = TRUE, sys_mem = 16) %>% 
+    dplyr::mutate(
+      fema_id = as.character(1:dplyr::n())
+    ) %>% 
+    dplyr::select(fema_id, geometry)
+  
+  fema <- 
+    fema %>% 
+    dplyr::mutate(
+      source = basename(source_file),
+      state  = gsub("-100yr-flood_valid_clean.gpkg", "", source)
+    ) %>%
+    dplyr::select(fema_id, source, state, geometry)
+  
+  return(fema)
+}
+
+add_predicate_group_id <- function(polys, predicate) {
+  # GROUP BY SPATIAL PREDICATES
+  # ----------------------------------------- 
+  # predicate = sf::st_touches
+  # polys <- sf_df
+  # ----------------------------------------- 
+  
+  
+  relations <- predicate(polys)
+  
+  relations <- lapply(seq_along(relations), function(i) { as.character(sort(unique(c(relations[i][[1]], i)))) })
+  
+  group_ids_map <- fastmap::fastmap()
+  ids_to_groups <- fastmap::fastmap()
+  
+  group_id <- 0
+  
+  for (i in seq_along(relations)) {
+    
+    predicate_ids <- relations[i][[1]]
+    
+    # message("(", i, ") - ", predicate_ids)
+    # message("Start Group ID: ", group_id)
+    
+    id_group_check <- ids_to_groups$has(predicate_ids)
+    
+    if(any(id_group_check)) {
+      
+      known_groups  <- ids_to_groups$mget(predicate_ids)
+      known_group   <- known_groups[unname(sapply(known_groups , function(kg) {
+        !is.null(kg)
+      }))][[1]]
+      
+      # message("IDs part of past group ID > '", known_group, "'")
+      
+      past_group_ids     <- group_ids_map$get(known_group)[[1]]
+      updated_group_ids  <- as.character(
+        sort(as.numeric(unique(c(past_group_ids, predicate_ids))))
+      )
+      
+      group_ids_map$set(known_group, list(updated_group_ids))
+      
+      new_ids <- predicate_ids[!predicate_ids %in% past_group_ids]
+      
+      # message("Adding ", new_ids, " to seen set...")
+      
+      # add any newly added IDs to the seen map
+      for (seen_id in new_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+      
+    } else {
+      # get a new group ID number
+      group_id <- group_id + 1    
+      # message("IDs form NEW group > '", group_id, "'")
+      
+      # create a new key in the map with the predicate IDs list as the value
+      group_ids_map$set(as.character(group_id), list(predicate_ids))
+      
+      # message("Adding ", predicate_ids, " to seen set...")
+      
+      # add each predicate ID to the map storing the seen indexes and their respecitve group IDs 
+      for (seen_id in predicate_ids) {
+        # message(seen_id)
+        ids_to_groups$set(as.character(seen_id), as.character(group_id))
+      }
+    }
+    # message("End group ID: ", group_id, "\n") 
+  }
+  
+  group_ids   <- group_ids_map$as_list() 
+  
+  grouping_df <- lapply(seq_along(group_ids), function(i) {
+    # i = 2
+    grouping  <- group_ids[i] 
+    group_id  <- names(grouping)
+    indices   <- grouping[[1]][[1]]
+    
+    data.frame(
+      index      = as.numeric(indices),
+      group_id   = rep(group_id, length(indices))   
+    )
+    
+  }) %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::arrange(i) 
+  
+  # count up the number of IDs for each group, well use this to determine which group 
+  # to put any indices that had MULTIPLE groups they were apart of (use the group with the most other members)
+  group_id_counts <- 
+    grouping_df %>% 
+    dplyr::group_by(group_id) %>% 
+    dplyr::count() %>% 
+    # dplyr::arrange(-n) %>% 
+    dplyr::ungroup()
+  
+  # select the IDs with the most other members
+  grouping_df <- 
+    grouping_df %>% 
+    dplyr::left_join(
+      group_id_counts, 
+      by = 'group_id'
+    ) %>% 
+    dplyr::group_by(index) %>% 
+    dplyr::slice_max(n, with_ties = FALSE) %>% 
+    dplyr::ungroup() %>% 
+    dplyr::select(-n) %>% 
+    dplyr::arrange(-index) 
+  
+  polys$group_id <- grouping_df$group_id
+  
+  return(polys)
+  
+}
+
+# Update flowlines and transects to remove flowlines and transects that intersect with reference_features waterbodies
+# flowlines: flowlines linestring sf object
+# trans: transects linestring sf object
+# waterbodies: waterbodies polygon sf object
+# Returns a list of length 2 with logical vectors that subsets the "flowlines" and "transects" sf objects to remove flowlines and transects that intersect waterbodies
+### Returns a list of length 2 with updated "flowlines" and "transects" sf objects
+wb_intersects <- function(flowlines, trans, waterbodies) {
+  
+  ########  ########  ########  ########  ########  ########
+  
+  flowlines_geos <- geos::as_geos_geometry(flowlines)
+  wbs_geos <- geos::as_geos_geometry(waterbodies)
+  
+  # temporary ID for transects that is the "hy_id", underscore, "cs_id", used for subsetting in future steps
+  trans$tmp_id <- paste0(trans$hy_id, "_", trans$cs_id)
+  
+  message("Checking flowlines against waterbodies...")
+  
+  # create an index between flowlines and waterbodies 
+  wb_index <- geos::geos_intersects_matrix(flowlines_geos, wbs_geos)
+  
+  # remove any flowlines that cross more than 1 waterbody
+  to_keep  <- flowlines[lengths(wb_index) == 0, ]
+  to_check <- flowlines[lengths(wb_index) != 0, ]
+  
+  # subset transects to the hy_ids in "to_check" set of flowlines
+  trans_check <- trans[trans$hy_id %in% unique(to_check$id), ]
+  # trans_check <- trans_geos[trans$hy_id %in% unique(to_check$id)]
+  
+  # check where the transects linestrings intersect with the waterbodies
+  trans_geos_check <- geos::as_geos_geometry(trans_check)
+  
+  message("Checking transects against waterbodies (v2) ...")
+  wb_trans_index <- geos::geos_intersects_matrix(trans_geos_check, wbs_geos)                    # (NEW METHOD)
+  # wb_trans_index <- geos::geos_intersects_any(trans_geos_check, wbs_geos[unlist(wb_index)])   # (OLD METHOD)
+  
+  # sum(lengths(wb_trans_index) == 0)
+  # length(wb_trans_index)
+  
+  # within the transects lines that are on a flowline that crosses a waterbody, 
+  # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  trans_keep <- trans_check[lengths(wb_trans_index) == 0, ]                       # (NEW METHOD)
+  # trans_keep <- trans_check[!wb_trans_index, ]                                  # (OLD METHOD)
+  
+  # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  
+  # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # have transects that are NOT in the waterbody
+  to_keep <- dplyr::bind_rows(to_keep, to_check)
+  
+  # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # that were determined to be valid (are being kept)
+  check_ids <- unique(trans_check$tmp_id)
+  keep_ids <- unique(trans_keep$tmp_id)
+  
+  # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  valid_flowlines <- flowlines$id %in% to_keep$id
+  valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+                                                     !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  
+  # return alist of updated flowlines and transects 
+  return(
+    list(
+      "valid_flowlines" =  valid_flowlines,
+      "valid_transects" =  valid_transects
+    )
+  )
+  
+  # # within the transects lines that are on a flowline that crosses a waterbody, 
+  # # check if any of these transects line DO NOT CROSS A WATERBODY AT ALL
+  # trans_keep <- trans_check[!trans_wb_index, ]
+  # # trans_keep <- trans_check[lengths(trans_wb_index2) == 0, ]
+  # 
+  # # preserve any flowlines that CROSS A WATERBODY BUT ALSO HAVE A TRANSECT LINE that does NOT cross any waterbodies
+  # to_check <- to_check[to_check$id %in% unique(trans_keep$hy_id), ]
+  # 
+  # # update flowlines to keep with flowlines that intersect a waterbody BUT STILL,
+  # # have transects that are NOT in the waterbody
+  # to_keep <- dplyr::bind_rows(to_keep, to_check)
+  # 
+  # # 'tmp_ids' of transects that are being checked and also the transects within trans_check 
+  # # that were determined to be valid (are being kept)
+  # check_ids <- unique(trans_check$tmp_id)
+  # keep_ids <- unique(trans_keep$tmp_id)
+  # 
+  # # logical vectors of which flowlines/transects to keep (KEEP == TRUE)
+  # # - Remove any transects that are on flowlines that cross a waterbody AND the transect crosses the waterbody too.
+  # # - Keep original transects that are not on flowlines that intersect waterbodies AND 
+  # # also the transects that do NOT intersect waterbodies but are on a flowline that DOES intersect a waterbody
+  # valid_flowlines <- flowlines$id %in% to_keep$id
+  # valid_transects <- trans$tmp_id %in% dplyr::filter(trans,
+  #                                                    !tmp_id %in% check_ids[!check_ids %in% keep_ids])$tmp_id
+  # 
+  # # return alist of updated flowlines and transects 
+  # return(
+  #   list(
+  #     "valid_flowlines" =  valid_flowlines,
+  #     "valid_transects" =  valid_transects
+  #   )
+  # )
+}
+
+add_intersects_ids <- function(x, y, id_col) {
+  # make sure the crs are tjhe same
+  y <- sf::st_transform(y, sf::st_crs(x))
+  
+  # Perform the intersection
+  intersections <- sf::st_intersects(x, y)
+  
+  # add the intersected values to the first dataframe
+  x[[id_col]] <- unlist(lapply(intersections, function(idx) {
+    if (length(idx) > 0) {
+      paste0(unlist(y[[id_col]][idx]), collapse = ", ")
+    } else {
+      NA
+    }
+  }))
+  
+  return(x)
+}
+
+unnest_ids <- function(ids) {
+  return(
+    unique(unlist(strsplit(unique(ids), ", ")))
+  )
+}
+
+#' Get the polygons that interesect with any of the linestring geometries
+#' This is just a wrapper around geos::geos_intersects_matrix. Takes in sf dataframes, uses geos, then outputs sf dataframes
+#' @param polygons polygon sf object. Default is NULL
+#' @param lines linestring sf object. Default is NULL.
+#'
+#' @return sf dataframe of polygons that intersect with the linestrings
+polygons_with_line_intersects <- function(polygons = NULL, lines = NULL) {
+  
+  if (is.null(polygons)) {
+    stop("NULL 'polygons' argument, provide an sf dataframe of POLYGON or MULTIPOLYGON geometries")
+  }
+  
+  if (is.null(lines)) {
+    stop("NULL 'lines' argument, provide an sf dataframe of LINESTRING or MULTILINESTRING geometries")
+  }
+  
+  # Convert the SF geometries to geos geometries
+  polygons_geos   <- geos::as_geos_geometry(polygons)
+  lines_geos      <- geos::as_geos_geometry(lines)
+  
+  # create an index between the polygons and linestrings
+  lines_index <-  geos::geos_intersects_matrix(polygons_geos, lines_geos)
+  
+  # get the polygons that have atleast 1 intersection with the 'lines'
+  polygons_with_lines <- polygons[lengths(lines_index) != 0, ]
+  
+  return(polygons_with_lines)
+}
+
+# TODO: DElete these NEW DOMAIN functions...
+# Create an empty file structure 
+# base_dir: character, top level directory path
+# domain_dirname: character, name of the intended new domain directory, if folder exists, then the required subdirectories are created (if they DO NOT exist)
+
+# Directory tree:
+# base_dir/
+#   └── domain_dirname/
+#     ├── flowlines/
+#     ├── dem/
+#     ├── transects/
+#     ├── cross_sections/
+#     └── cs_pts/
+create_new_domain_dirs <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  
+  if(with_output) {
+    output_dir       <- paste0(domain_dir, "/outputs")
+  }
+  
+  # create directories
+  create_if_not_exists(domain_dir)
+  create_if_not_exists(flowlines_dir)
+  create_if_not_exists(domain_subset_dir)
+  create_if_not_exists(dem_dir)
+  create_if_not_exists(transects_dir)
+  create_if_not_exists(cross_sections_dir)
+  create_if_not_exists(cs_pts_dir)
+  create_if_not_exists(vpu_subsets_dir)
+  
+  if(with_output) {
+    create_if_not_exists(output_dir)
+  }
+  
+}
+
+# get path strings for a domain dir (based of a base dir and domain dirname)
+# NOTE: this does NOT guarentee that these folders exist, 
+# NOTE: it just gets the paths if they were created by create_new_domain_dirs() 
+get_new_domain_paths <- function(base_dir, domain_dirname, with_output = FALSE) {
+  
+  # build paths
+  domain_dir         <- paste0(base_dir, "/", domain_dirname)
+  flowlines_dir      <- paste0(domain_dir, "/flowlines")
+  domain_subset_dir  <- paste0(domain_dir, "/domain_subset")
+  dem_dir            <- paste0(domain_dir, "/dem")
+  transects_dir      <- paste0(domain_dir, "/transects")
+  cross_sections_dir <- paste0(domain_dir, "/cross_sections")
+  cs_pts_dir         <- paste0(domain_dir, "/cs_pts")
+  vpu_subsets_dir    <- paste0(domain_dir, "/vpu-subsets")
+  output_dir         <- ifelse(with_output, paste0(domain_dir, "/outputs"), NA)
+  
+  
+  # named list of file paths
+  return(
+    list(
+      base_dir           = base_dir, 
+      domain_dir         = domain_dir,
+      flowlines_dir      = flowlines_dir,
+      domain_subset_dir  = domain_subset_dir,
+      dem_dir            = dem_dir,
+      transects_dir      = transects_dir,
+      cross_sections_dir = cross_sections_dir,
+      cs_pts_dir         = cs_pts_dir,
+      vpu_subsets_dir    = vpu_subsets_dir,
+      output_dir         = output_dir
+    )
+  )
+  
+}
+
+download_3dep_vrt <- function(base_dir) {
+  
+  ## Cmd+A/Cmd+C from: http://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/13/TIFF/current/
+  ### paste w/ `datapasta::vector_paste_vertical()`
+  ### Some manual cleaning of header and footer mess...
+  ### Reason? Un-scrapable page, and no index.gpkg...
+  
+  t <- c(
+    "0              n06e162/",
+    "0              n06e163/",
+    "0              n07e134/",
+    "0              n07e151/",
+    "0              n07e152/",
+    "0              n07e158/",
+    "0              n08e134/",
+    "0              n08e151/",
+    "0              n08e152/",
+    "0              n08e158/",
+    "0              n09e134/",
+    "0              n10e138/",
+    "0              n14e144/",
+    "0              n15e145/",
+    "0              n16e145/",
+    "0              n18w065/",
+    "0              n18w066/",
+    "0              n18w067/",
+    "0              n18w068/",
+    "0              n19w065/",
+    "0              n19w066/",
+    "0              n19w067/",
+    "0              n19w068/",
+    "0              n19w156/",
+    "0              n20w155/",
+    "0              n20w156/",
+    "0              n20w157/",
+    "0              n21w156/",
+    "0              n21w157/",
+    "0              n21w158/",
+    "0              n22w157/",
+    "0              n22w158/",
+    "0              n22w159/",
+    "0              n22w160/",
+    "0              n22w161/",
+    "0              n23w160/",
+    "0              n23w161/",
+    "0              n25w081/",
+    "0              n25w082/",
+    "0              n25w083/",
+    "0              n26w081/",
+    "0              n26w082/",
+    "0              n26w098/",
+    "0              n26w099/",
+    "0              n27w081/",
+    "0              n27w082/",
+    "0              n27w083/",
+    "0              n27w098/",
+    "0              n27w099/",
+    "0              n27w100/",
+    "0              n28w081/",
+    "0              n28w082/",
+    "0              n28w083/",
+    "0              n28w097/",
+    "0              n28w098/",
+    "0              n28w099/",
+    "0              n28w100/",
+    "0              n28w101/",
+    "0              n29w081/",
+    "0              n29w082/",
+    "0              n29w083/",
+    "0              n29w090/",
+    "0              n29w096/",
+    "0              n29w097/",
+    "0              n29w098/",
+    "0              n29w099/",
+    "0              n29w100/",
+    "0              n29w101/",
+    "0              n29w104/",
+    "0              n30w081/",
+    "0              n30w082/",
+    "0              n30w083/",
+    "0              n30w084/",
+    "0              n30w085/",
+    "0              n30w086/",
+    "0              n30w089/",
+    "0              n30w090/",
+    "0              n30w091/",
+    "0              n30w092/",
+    "0              n30w093/",
+    "0              n30w094/",
+    "0              n30w095/",
+    "0              n30w096/",
+    "0              n30w097/",
+    "0              n30w098/",
+    "0              n30w099/",
+    "0              n30w100/",
+    "0              n30w101/",
+    "0              n30w102/",
+    "0              n30w103/",
+    "0              n30w104/",
+    "0              n30w105/",
+    "0              n31w082/",
+    "0              n31w083/",
+    "0              n31w084/",
+    "0              n31w085/",
+    "0              n31w086/",
+    "0              n31w087/",
+    "0              n31w088/",
+    "0              n31w089/",
+    "0              n31w090/",
+    "0              n31w091/",
+    "0              n31w092/",
+    "0              n31w093/",
+    "0              n31w094/",
+    "0              n31w095/",
+    "0              n31w096/",
+    "0              n31w097/",
+    "0              n31w098/",
+    "0              n31w099/",
+    "0              n31w100/",
+    "0              n31w101/",
+    "0              n31w102/",
+    "0              n31w103/",
+    "0              n31w104/",
+    "0              n31w105/",
+    "0              n31w106/",
+    "0              n31w107/",
+    "0              n32w081/",
+    "0              n32w082/",
+    "0              n32w083/",
+    "0              n32w084/",
+    "0              n32w085/",
+    "0              n32w086/",
+    "0              n32w087/",
+    "0              n32w088/",
+    "0              n32w089/",
+    "0              n32w090/",
+    "0              n32w091/",
+    "0              n32w092/",
+    "0              n32w093/",
+    "0              n32w094/",
+    "0              n32w095/",
+    "0              n32w096/",
+    "0              n32w097/",
+    "0              n32w098/",
+    "0              n32w099/",
+    "0              n32w100/",
+    "0              n32w101/",
+    "0              n32w102/",
+    "0              n32w103/",
+    "0              n32w104/",
+    "0              n32w105/",
+    "0              n32w106/",
+    "0              n32w107/",
+    "0              n32w108/",
+    "0              n32w109/",
+    "0              n32w110/",
+    "0              n32w111/",
+    "0              n32w112/",
+    "0              n32w113/",
+    "0              n32w114/",
+    "0              n33w080/",
+    "0              n33w081/",
+    "0              n33w082/",
+    "0              n33w083/",
+    "0              n33w084/",
+    "0              n33w085/",
+    "0              n33w086/",
+    "0              n33w087/",
+    "0              n33w088/",
+    "0              n33w089/",
+    "0              n33w090/",
+    "0              n33w091/",
+    "0              n33w092/",
+    "0              n33w093/",
+    "0              n33w094/",
+    "0              n33w095/",
+    "0              n33w096/",
+    "0              n33w097/",
+    "0              n33w098/",
+    "0              n33w099/",
+    "0              n33w100/",
+    "0              n33w101/",
+    "0              n33w102/",
+    "0              n33w103/",
+    "0              n33w104/",
+    "0              n33w105/",
+    "0              n33w106/",
+    "0              n33w107/",
+    "0              n33w108/",
+    "0              n33w109/",
+    "0              n33w110/",
+    "0              n33w111/",
+    "0              n33w112/",
+    "0              n33w113/",
+    "0              n33w114/",
+    "0              n33w115/",
+    "0              n33w116/",
+    "0              n33w117/",
+    "0              n33w118/",
+    "0              n33w119/",
+    "0              n34w078/",
+    "0              n34w079/",
+    "0              n34w080/",
+    "0              n34w081/",
+    "0              n34w082/",
+    "0              n34w083/",
+    "0              n34w084/",
+    "0              n34w085/",
+    "0              n34w086/",
+    "0              n34w087/",
+    "0              n34w088/",
+    "0              n34w089/",
+    "0              n34w090/",
+    "0              n34w091/",
+    "0              n34w092/",
+    "0              n34w093/",
+    "0              n34w094/",
+    "0              n34w095/",
+    "0              n34w096/",
+    "0              n34w097/",
+    "0              n34w098/",
+    "0              n34w099/",
+    "0              n34w100/",
+    "0              n34w101/",
+    "0              n34w102/",
+    "0              n34w103/",
+    "0              n34w104/",
+    "0              n34w105/",
+    "0              n34w106/",
+    "0              n34w107/",
+    "0              n34w108/",
+    "0              n34w109/",
+    "0              n34w110/",
+    "0              n34w111/",
+    "0              n34w112/",
+    "0              n34w113/",
+    "0              n34w114/",
+    "0              n34w115/",
+    "0              n34w116/",
+    "0              n34w117/",
+    "0              n34w118/",
+    "0              n34w119/",
+    "0              n34w120/",
+    "0              n34w121/",
+    "0              n35w076/",
+    "0              n35w077/",
+    "0              n35w078/",
+    "0              n35w079/",
+    "0              n35w080/",
+    "0              n35w081/",
+    "0              n35w082/",
+    "0              n35w083/",
+    "0              n35w084/",
+    "0              n35w085/",
+    "0              n35w086/",
+    "0              n35w087/",
+    "0              n35w088/",
+    "0              n35w089/",
+    "0              n35w090/",
+    "0              n35w091/",
+    "0              n35w092/",
+    "0              n35w093/",
+    "0              n35w094/",
+    "0              n35w095/",
+    "0              n35w096/",
+    "0              n35w097/",
+    "0              n35w098/",
+    "0              n35w099/",
+    "0              n35w100/",
+    "0              n35w101/",
+    "0              n35w102/",
+    "0              n35w103/",
+    "0              n35w104/",
+    "0              n35w105/",
+    "0              n35w106/",
+    "0              n35w107/",
+    "0              n35w108/",
+    "0              n35w109/",
+    "0              n35w110/",
+    "0              n35w111/",
+    "0              n35w112/",
+    "0              n35w113/",
+    "0              n35w114/",
+    "0              n35w115/",
+    "0              n35w116/",
+    "0              n35w117/",
+    "0              n35w118/",
+    "0              n35w119/",
+    "0              n35w120/",
+    "0              n35w121/",
+    "0              n36w076/",
+    "0              n36w077/",
+    "0              n36w078/",
+    "0              n36w079/",
+    "0              n36w080/",
+    "0              n36w081/",
+    "0              n36w082/",
+    "0              n36w083/",
+    "0              n36w084/",
+    "0              n36w085/",
+    "0              n36w086/",
+    "0              n36w087/",
+    "0              n36w088/",
+    "0              n36w089/",
+    "0              n36w090/",
+    "0              n36w091/",
+    "0              n36w092/",
+    "0              n36w093/",
+    "0              n36w094/",
+    "0              n36w095/",
+    "0              n36w096/",
+    "0              n36w097/",
+    "0              n36w098/",
+    "0              n36w099/",
+    "0              n36w100/",
+    "0              n36w101/",
+    "0              n36w102/",
+    "0              n36w103/",
+    "0              n36w104/",
+    "0              n36w105/",
+    "0              n36w106/",
+    "0              n36w107/",
+    "0              n36w108/",
+    "0              n36w109/",
+    "0              n36w110/",
+    "0              n36w111/",
+    "0              n36w112/",
+    "0              n36w113/",
+    "0              n36w114/",
+    "0              n36w115/",
+    "0              n36w116/",
+    "0              n36w117/",
+    "0              n36w118/",
+    "0              n36w119/",
+    "0              n36w120/",
+    "0              n36w121/",
+    "0              n36w122/",
+    "0              n37w076/",
+    "0              n37w077/",
+    "0              n37w078/",
+    "0              n37w079/",
+    "0              n37w080/",
+    "0              n37w081/",
+    "0              n37w082/",
+    "0              n37w083/",
+    "0              n37w084/",
+    "0              n37w085/",
+    "0              n37w086/",
+    "0              n37w087/",
+    "0              n37w088/",
+    "0              n37w089/",
+    "0              n37w090/",
+    "0              n37w091/",
+    "0              n37w092/",
+    "0              n37w093/",
+    "0              n37w094/",
+    "0              n37w095/",
+    "0              n37w096/",
+    "0              n37w097/",
+    "0              n37w098/",
+    "0              n37w099/",
+    "0              n37w100/",
+    "0              n37w101/",
+    "0              n37w102/",
+    "0              n37w103/",
+    "0              n37w104/",
+    "0              n37w105/",
+    "0              n37w106/",
+    "0              n37w107/",
+    "0              n37w108/",
+    "0              n37w109/",
+    "0              n37w110/",
+    "0              n37w111/",
+    "0              n37w112/",
+    "0              n37w113/",
+    "0              n37w114/",
+    "0              n37w115/",
+    "0              n37w116/",
+    "0              n37w117/",
+    "0              n37w118/",
+    "0              n37w119/",
+    "0              n37w120/",
+    "0              n37w121/",
+    "0              n37w122/",
+    "0              n37w123/",
+    "0              n38w076/",
+    "0              n38w077/",
+    "0              n38w078/",
+    "0              n38w079/",
+    "0              n38w080/",
+    "0              n38w081/",
+    "0              n38w082/",
+    "0              n38w083/",
+    "0              n38w084/",
+    "0              n38w085/",
+    "0              n38w086/",
+    "0              n38w087/",
+    "0              n38w088/",
+    "0              n38w089/",
+    "0              n38w090/",
+    "0              n38w091/",
+    "0              n38w092/",
+    "0              n38w093/",
+    "0              n38w094/",
+    "0              n38w095/",
+    "0              n38w096/",
+    "0              n38w097/",
+    "0              n38w098/",
+    "0              n38w099/",
+    "0              n38w100/",
+    "0              n38w101/",
+    "0              n38w102/",
+    "0              n38w103/",
+    "0              n38w104/",
+    "0              n38w105/",
+    "0              n38w106/",
+    "0              n38w107/",
+    "0              n38w108/",
+    "0              n38w109/",
+    "0              n38w110/",
+    "0              n38w111/",
+    "0              n38w112/",
+    "0              n38w113/",
+    "0              n38w114/",
+    "0              n38w115/",
+    "0              n38w116/",
+    "0              n38w117/",
+    "0              n38w118/",
+    "0              n38w119/",
+    "0              n38w120/",
+    "0              n38w121/",
+    "0              n38w122/",
+    "0              n38w123/",
+    "0              n38w124/",
+    "0              n39w075/",
+    "0              n39w076/",
+    "0              n39w077/",
+    "0              n39w078/",
+    "0              n39w079/",
+    "0              n39w080/",
+    "0              n39w081/",
+    "0              n39w082/",
+    "0              n39w083/",
+    "0              n39w084/",
+    "0              n39w085/",
+    "0              n39w086/",
+    "0              n39w087/",
+    "0              n39w088/",
+    "0              n39w089/",
+    "0              n39w090/",
+    "0              n39w091/",
+    "0              n39w092/",
+    "0              n39w093/",
+    "0              n39w094/",
+    "0              n39w095/",
+    "0              n39w096/",
+    "0              n39w097/",
+    "0              n39w098/",
+    "0              n39w099/",
+    "0              n39w100/",
+    "0              n39w101/",
+    "0              n39w102/",
+    "0              n39w103/",
+    "0              n39w104/",
+    "0              n39w105/",
+    "0              n39w106/",
+    "0              n39w107/",
+    "0              n39w108/",
+    "0              n39w109/",
+    "0              n39w110/",
+    "0              n39w111/",
+    "0              n39w112/",
+    "0              n39w113/",
+    "0              n39w114/",
+    "0              n39w115/",
+    "0              n39w116/",
+    "0              n39w117/",
+    "0              n39w118/",
+    "0              n39w119/",
+    "0              n39w120/",
+    "0              n39w121/",
+    "0              n39w122/",
+    "0              n39w123/",
+    "0              n39w124/",
+    "0              n40w075/",
+    "0              n40w076/",
+    "0              n40w077/",
+    "0              n40w078/",
+    "0              n40w079/",
+    "0              n40w080/",
+    "0              n40w081/",
+    "0              n40w082/",
+    "0              n40w083/",
+    "0              n40w084/",
+    "0              n40w085/",
+    "0              n40w086/",
+    "0              n40w087/",
+    "0              n40w088/",
+    "0              n40w089/",
+    "0              n40w090/",
+    "0              n40w091/",
+    "0              n40w092/",
+    "0              n40w093/",
+    "0              n40w094/",
+    "0              n40w095/",
+    "0              n40w096/",
+    "0              n40w097/",
+    "0              n40w098/",
+    "0              n40w099/",
+    "0              n40w100/",
+    "0              n40w101/",
+    "0              n40w102/",
+    "0              n40w103/",
+    "0              n40w104/",
+    "0              n40w105/",
+    "0              n40w106/",
+    "0              n40w107/",
+    "0              n40w108/",
+    "0              n40w109/",
+    "0              n40w110/",
+    "0              n40w111/",
+    "0              n40w112/",
+    "0              n40w113/",
+    "0              n40w114/",
+    "0              n40w115/",
+    "0              n40w116/",
+    "0              n40w117/",
+    "0              n40w118/",
+    "0              n40w119/",
+    "0              n40w120/",
+    "0              n40w121/",
+    "0              n40w122/",
+    "0              n40w123/",
+    "0              n40w124/",
+    "0              n40w125/",
+    "0              n41w073/",
+    "0              n41w074/",
+    "0              n41w075/",
+    "0              n41w076/",
+    "0              n41w077/",
+    "0              n41w078/",
+    "0              n41w079/",
+    "0              n41w080/",
+    "0              n41w081/",
+    "0              n41w082/",
+    "0              n41w083/",
+    "0              n41w084/",
+    "0              n41w085/",
+    "0              n41w086/",
+    "0              n41w087/",
+    "0              n41w088/",
+    "0              n41w089/",
+    "0              n41w090/",
+    "0              n41w091/",
+    "0              n41w092/",
+    "0              n41w093/",
+    "0              n41w094/",
+    "0              n41w095/",
+    "0              n41w096/",
+    "0              n41w097/",
+    "0              n41w098/",
+    "0              n41w099/",
+    "0              n41w100/",
+    "0              n41w101/",
+    "0              n41w102/",
+    "0              n41w103/",
+    "0              n41w104/",
+    "0              n41w105/",
+    "0              n41w106/",
+    "0              n41w107/",
+    "0              n41w108/",
+    "0              n41w109/",
+    "0              n41w110/",
+    "0              n41w111/",
+    "0              n41w112/",
+    "0              n41w113/",
+    "0              n41w114/",
+    "0              n41w115/",
+    "0              n41w116/",
+    "0              n41w117/",
+    "0              n41w118/",
+    "0              n41w119/",
+    "0              n41w120/",
+    "0              n41w121/",
+    "0              n41w122/",
+    "0              n41w123/",
+    "0              n41w124/",
+    "0              n41w125/",
+    "0              n42w070/",
+    "0              n42w071/",
+    "0              n42w072/",
+    "0              n42w073/",
+    "0              n42w074/",
+    "0              n42w075/",
+    "0              n42w076/",
+    "0              n42w077/",
+    "0              n42w078/",
+    "0              n42w079/",
+    "0              n42w080/",
+    "0              n42w081/",
+    "0              n42w082/",
+    "0              n42w083/",
+    "0              n42w084/",
+    "0              n42w085/",
+    "0              n42w086/",
+    "0              n42w087/",
+    "0              n42w088/",
+    "0              n42w089/",
+    "0              n42w090/",
+    "0              n42w091/",
+    "0              n42w092/",
+    "0              n42w093/",
+    "0              n42w094/",
+    "0              n42w095/",
+    "0              n42w096/",
+    "0              n42w097/",
+    "0              n42w098/",
+    "0              n42w099/",
+    "0              n42w100/",
+    "0              n42w101/",
+    "0              n42w102/",
+    "0              n42w103/",
+    "0              n42w104/",
+    "0              n42w105/",
+    "0              n42w106/",
+    "0              n42w107/",
+    "0              n42w108/",
+    "0              n42w109/",
+    "0              n42w110/",
+    "0              n42w111/",
+    "0              n42w112/",
+    "0              n42w113/",
+    "0              n42w114/",
+    "0              n42w115/",
+    "0              n42w116/",
+    "0              n42w117/",
+    "0              n42w118/",
+    "0              n42w119/",
+    "0              n42w120/",
+    "0              n42w121/",
+    "0              n42w122/",
+    "0              n42w123/",
+    "0              n42w124/",
+    "0              n42w125/",
+    "0              n43w071/",
+    "0              n43w072/",
+    "0              n43w073/",
+    "0              n43w074/",
+    "0              n43w075/",
+    "0              n43w076/",
+    "0              n43w077/",
+    "0              n43w078/",
+    "0              n43w079/",
+    "0              n43w080/",
+    "0              n43w081/",
+    "0              n43w082/",
+    "0              n43w083/",
+    "0              n43w084/",
+    "0              n43w085/",
+    "0              n43w086/",
+    "0              n43w087/",
+    "0              n43w088/",
+    "0              n43w089/",
+    "0              n43w090/",
+    "0              n43w091/",
+    "0              n43w092/",
+    "0              n43w093/",
+    "0              n43w094/",
+    "0              n43w095/",
+    "0              n43w096/",
+    "0              n43w097/",
+    "0              n43w098/",
+    "0              n43w099/",
+    "0              n43w100/",
+    "0              n43w101/",
+    "0              n43w102/",
+    "0              n43w103/",
+    "0              n43w104/",
+    "0              n43w105/",
+    "0              n43w106/",
+    "0              n43w107/",
+    "0              n43w108/",
+    "0              n43w109/",
+    "0              n43w110/",
+    "0              n43w111/",
+    "0              n43w112/",
+    "0              n43w113/",
+    "0              n43w114/",
+    "0              n43w115/",
+    "0              n43w116/",
+    "0              n43w117/",
+    "0              n43w118/",
+    "0              n43w119/",
+    "0              n43w120/",
+    "0              n43w121/",
+    "0              n43w122/",
+    "0              n43w123/",
+    "0              n43w124/",
+    "0              n43w125/",
+    "0              n44w069/",
+    "0              n44w070/",
+    "0              n44w071/",
+    "0              n44w072/",
+    "0              n44w073/",
+    "0              n44w074/",
+    "0              n44w075/",
+    "0              n44w076/",
+    "0              n44w077/",
+    "0              n44w078/",
+    "0              n44w079/",
+    "0              n44w080/",
+    "0              n44w081/",
+    "0              n44w083/",
+    "0              n44w084/",
+    "0              n44w085/",
+    "0              n44w086/",
+    "0              n44w087/",
+    "0              n44w088/",
+    "0              n44w089/",
+    "0              n44w090/",
+    "0              n44w091/",
+    "0              n44w092/",
+    "0              n44w093/",
+    "0              n44w094/",
+    "0              n44w095/",
+    "0              n44w096/",
+    "0              n44w097/",
+    "0              n44w098/",
+    "0              n44w099/",
+    "0              n44w100/",
+    "0              n44w101/",
+    "0              n44w102/",
+    "0              n44w103/",
+    "0              n44w104/",
+    "0              n44w105/",
+    "0              n44w106/",
+    "0              n44w107/",
+    "0              n44w108/",
+    "0              n44w109/",
+    "0              n44w110/",
+    "0              n44w111/",
+    "0              n44w112/",
+    "0              n44w113/",
+    "0              n44w114/",
+    "0              n44w115/",
+    "0              n44w116/",
+    "0              n44w117/",
+    "0              n44w118/",
+    "0              n44w119/",
+    "0              n44w120/",
+    "0              n44w121/",
+    "0              n44w122/",
+    "0              n44w123/",
+    "0              n44w124/",
+    "0              n44w125/",
+    "0              n45w067/",
+    "0              n45w068/",
+    "0              n45w069/",
+    "0              n45w070/",
+    "0              n45w071/",
+    "0              n45w072/",
+    "0              n45w073/",
+    "0              n45w074/",
+    "0              n45w075/",
+    "0              n45w076/",
+    "0              n45w077/",
+    "0              n45w083/",
+    "0              n45w084/",
+    "0              n45w085/",
+    "0              n45w086/",
+    "0              n45w087/",
+    "0              n45w088/",
+    "0              n45w089/",
+    "0              n45w090/",
+    "0              n45w091/",
+    "0              n45w092/",
+    "0              n45w093/",
+    "0              n45w094/",
+    "0              n45w095/",
+    "0              n45w096/",
+    "0              n45w097/",
+    "0              n45w098/",
+    "0              n45w099/",
+    "0              n45w100/",
+    "0              n45w101/",
+    "0              n45w102/",
+    "0              n45w103/",
+    "0              n45w104/",
+    "0              n45w105/",
+    "0              n45w106/",
+    "0              n45w107/",
+    "0              n45w108/",
+    "0              n45w109/",
+    "0              n45w110/",
+    "0              n45w111/",
+    "0              n45w112/",
+    "0              n45w113/",
+    "0              n45w114/",
+    "0              n45w115/",
+    "0              n45w116/",
+    "0              n45w117/",
+    "0              n45w118/",
+    "0              n45w119/",
+    "0              n45w120/",
+    "0              n45w121/",
+    "0              n45w122/",
+    "0              n45w123/",
+    "0              n45w124/",
+    "0              n45w125/",
+    "0              n46w068/",
+    "0              n46w069/",
+    "0              n46w070/",
+    "0              n46w071/",
+    "0              n46w072/",
+    "0              n46w073/",
+    "0              n46w074/",
+    "0              n46w075/",
+    "0              n46w084/",
+    "0              n46w085/",
+    "0              n46w086/",
+    "0              n46w087/",
+    "0              n46w088/",
+    "0              n46w089/",
+    "0              n46w090/",
+    "0              n46w091/",
+    "0              n46w092/",
+    "0              n46w093/",
+    "0              n46w094/",
+    "0              n46w095/",
+    "0              n46w096/",
+    "0              n46w097/",
+    "0              n46w098/",
+    "0              n46w099/",
+    "0              n46w100/",
+    "0              n46w101/",
+    "0              n46w102/",
+    "0              n46w103/",
+    "0              n46w104/",
+    "0              n46w105/",
+    "0              n46w106/",
+    "0              n46w107/",
+    "0              n46w108/",
+    "0              n46w109/",
+    "0              n46w110/",
+    "0              n46w111/",
+    "0              n46w112/",
+    "0              n46w113/",
+    "0              n46w114/",
+    "0              n46w115/",
+    "0              n46w116/",
+    "0              n46w117/",
+    "0              n46w118/",
+    "0              n46w119/",
+    "0              n46w120/",
+    "0              n46w121/",
+    "0              n46w122/",
+    "0              n46w123/",
+    "0              n46w124/",
+    "0              n46w125/",
+    "0              n47w068/",
+    "0              n47w069/",
+    "0              n47w070/",
+    "0              n47w071/",
+    "0              n47w084/",
+    "0              n47w085/",
+    "0              n47w086/",
+    "0              n47w087/",
+    "0              n47w088/",
+    "0              n47w089/",
+    "0              n47w090/",
+    "0              n47w091/",
+    "0              n47w092/",
+    "0              n47w093/",
+    "0              n47w094/",
+    "0              n47w095/",
+    "0              n47w096/",
+    "0              n47w097/",
+    "0              n47w098/",
+    "0              n47w099/",
+    "0              n47w100/",
+    "0              n47w101/",
+    "0              n47w102/",
+    "0              n47w103/",
+    "0              n47w104/",
+    "0              n47w105/",
+    "0              n47w106/",
+    "0              n47w107/",
+    "0              n47w108/",
+    "0              n47w109/",
+    "0              n47w110/",
+    "0              n47w111/",
+    "0              n47w112/",
+    "0              n47w113/",
+    "0              n47w114/",
+    "0              n47w115/",
+    "0              n47w116/",
+    "0              n47w117/",
+    "0              n47w118/",
+    "0              n47w119/",
+    "0              n47w120/",
+    "0              n47w121/",
+    "0              n47w122/",
+    "0              n47w123/",
+    "0              n47w124/",
+    "0              n47w125/",
+    "0              n48w068/",
+    "0              n48w069/",
+    "0              n48w070/",
+    "0              n48w087/",
+    "0              n48w088/",
+    "0              n48w089/",
+    "0              n48w090/",
+    "0              n48w091/",
+    "0              n48w092/",
+    "0              n48w093/",
+    "0              n48w094/",
+    "0              n48w095/",
+    "0              n48w096/",
+    "0              n48w097/",
+    "0              n48w098/",
+    "0              n48w099/",
+    "0              n48w100/",
+    "0              n48w101/",
+    "0              n48w102/",
+    "0              n48w103/",
+    "0              n48w104/",
+    "0              n48w105/",
+    "0              n48w106/",
+    "0              n48w107/",
+    "0              n48w108/",
+    "0              n48w109/",
+    "0              n48w110/",
+    "0              n48w111/",
+    "0              n48w112/",
+    "0              n48w113/",
+    "0              n48w114/",
+    "0              n48w115/",
+    "0              n48w116/",
+    "0              n48w117/",
+    "0              n48w118/",
+    "0              n48w119/",
+    "0              n48w120/",
+    "0              n48w121/",
+    "0              n48w122/",
+    "0              n48w123/",
+    "0              n48w124/",
+    "0              n48w125/",
+    "0              n49w089/",
+    "0              n49w090/",
+    "0              n49w091/",
+    "0              n49w092/",
+    "0              n49w093/",
+    "0              n49w094/",
+    "0              n49w095/",
+    "0              n49w096/",
+    "0              n49w097/",
+    "0              n49w098/",
+    "0              n49w099/",
+    "0              n49w100/",
+    "0              n49w101/",
+    "0              n49w102/",
+    "0              n49w103/",
+    "0              n49w104/",
+    "0              n49w105/",
+    "0              n49w106/",
+    "0              n49w107/",
+    "0              n49w108/",
+    "0              n49w109/",
+    "0              n49w110/",
+    "0              n49w111/",
+    "0              n49w112/",
+    "0              n49w113/",
+    "0              n49w114/",
+    "0              n49w115/",
+    "0              n49w116/",
+    "0              n49w117/",
+    "0              n49w118/",
+    "0              n49w119/",
+    "0              n49w120/",
+    "0              n49w121/",
+    "0              n49w122/",
+    "0              n49w123/",
+    "0              n49w124/",
+    "0              n49w125/",
+    "0              n50w095/",
+    "0              n50w096/",
+    "0              n50w097/",
+    "0              n50w098/",
+    "0              n50w099/",
+    "0              n50w100/",
+    "0              n50w101/",
+    "0              n50w107/",
+    "0              n50w108/",
+    "0              n50w122/",
+    "0              n50w123/",
+    "0              n50w124/",
+    "0              n52e177/",
+    "0              n52e178/",
+    "0              n52e179/",
+    "0              n52w174/",
+    "0              n52w176/",
+    "0              n52w177/",
+    "0              n52w178/",
+    "0              n52w179/",
+    "0              n52w180/",
+    "0              n53e172/",
+    "0              n53e173/",
+    "0              n53e174/",
+    "0              n53e175/",
+    "0              n53e177/",
+    "0              n53e178/",
+    "0              n53e179/",
+    "0              n53w169/",
+    "0              n53w170/",
+    "0              n53w171/",
+    "0              n53w172/",
+    "0              n53w173/",
+    "0              n53w174/",
+    "0              n53w175/",
+    "0              n53w176/",
+    "0              n53w177/",
+    "0              n54e172/",
+    "0              n54e173/",
+    "0              n54w167/",
+    "0              n54w168/",
+    "0              n54w169/",
+    "0              n54w170/",
+    "0              n55w131/",
+    "0              n55w132/",
+    "0              n55w133/",
+    "0              n55w134/",
+    "0              n55w160/",
+    "0              n55w161/",
+    "0              n55w162/",
+    "0              n55w163/",
+    "0              n55w164/",
+    "0              n55w165/",
+    "0              n55w166/",
+    "0              n55w167/",
+    "0              n56w130/",
+    "0              n56w131/",
+    "0              n56w132/",
+    "0              n56w133/",
+    "0              n56w134/",
+    "0              n56w135/",
+    "0              n56w156/",
+    "0              n56w157/",
+    "0              n56w159/",
+    "0              n56w160/",
+    "0              n56w161/",
+    "0              n56w162/",
+    "0              n56w163/",
+    "0              n56w164/",
+    "0              n57w131/",
+    "0              n57w132/",
+    "0              n57w133/",
+    "0              n57w134/",
+    "0              n57w135/",
+    "0              n57w136/",
+    "0              n57w153/",
+    "0              n57w154/",
+    "0              n57w155/",
+    "0              n57w156/",
+    "0              n57w157/",
+    "0              n57w158/",
+    "0              n57w159/",
+    "0              n57w160/",
+    "0              n57w161/",
+    "0              n57w162/",
+    "0              n57w170/",
+    "0              n57w171/",
+    "0              n58w133/",
+    "0              n58w134/",
+    "0              n58w135/",
+    "0              n58w136/",
+    "0              n58w137/",
+    "0              n58w153/",
+    "0              n58w154/",
+    "0              n58w155/",
+    "0              n58w156/",
+    "0              n58w157/",
+    "0              n58w158/",
+    "0              n58w159/",
+    "0              n58w170/",
+    "0              n58w171/",
+    "0              n59w134/",
+    "0              n59w135/",
+    "0              n59w136/",
+    "0              n59w137/",
+    "0              n59w138/",
+    "0              n59w139/",
+    "0              n59w152/",
+    "0              n59w153/",
+    "0              n59w154/",
+    "0              n59w155/",
+    "0              n59w156/",
+    "0              n59w157/",
+    "0              n59w158/",
+    "0              n59w159/",
+    "0              n59w160/",
+    "0              n59w161/",
+    "0              n59w162/",
+    "0              n59w163/",
+    "0              n60w135/",
+    "0              n60w136/",
+    "0              n60w137/",
+    "0              n60w138/",
+    "0              n60w139/",
+    "0              n60w140/",
+    "0              n60w141/",
+    "0              n60w142/",
+    "0              n60w143/",
+    "0              n60w144/",
+    "0              n60w145/",
+    "0              n60w146/",
+    "0              n60w147/",
+    "0              n60w148/",
+    "0              n60w149/",
+    "0              n60w150/",
+    "0              n60w151/",
+    "0              n60w152/",
+    "0              n60w153/",
+    "0              n60w154/",
+    "0              n60w155/",
+    "0              n60w156/",
+    "0              n60w157/",
+    "0              n60w158/",
+    "0              n60w159/",
+    "0              n60w160/",
+    "0              n60w161/",
+    "0              n60w162/",
+    "0              n60w163/",
+    "0              n60w164/",
+    "0              n60w165/",
+    "0              n60w166/",
+    "0              n60w167/",
+    "0              n60w168/",
+    "0              n61w140/",
+    "0              n61w141/",
+    "0              n61w142/",
+    "0              n61w143/",
+    "0              n61w144/",
+    "0              n61w145/",
+    "0              n61w146/",
+    "0              n61w147/",
+    "0              n61w148/",
+    "0              n61w149/",
+    "0              n61w150/",
+    "0              n61w151/",
+    "0              n61w152/",
+    "0              n61w153/",
+    "0              n61w154/",
+    "0              n61w155/",
+    "0              n61w156/",
+    "0              n61w157/",
+    "0              n61w158/",
+    "0              n61w159/",
+    "0              n61w160/",
+    "0              n61w161/",
+    "0              n61w162/",
+    "0              n61w163/",
+    "0              n61w164/",
+    "0              n61w165/",
+    "0              n61w166/",
+    "0              n61w167/",
+    "0              n61w168/",
+    "0              n61w173/",
+    "0              n61w174/",
+    "0              n62w142/",
+    "0              n62w143/",
+    "0              n62w144/",
+    "0              n62w145/",
+    "0              n62w146/",
+    "0              n62w147/",
+    "0              n62w148/",
+    "0              n62w149/",
+    "0              n62w150/",
+    "0              n62w151/",
+    "0              n62w152/",
+    "0              n62w153/",
+    "0              n62w154/",
+    "0              n62w155/",
+    "0              n62w156/",
+    "0              n62w157/",
+    "0              n62w158/",
+    "0              n62w159/",
+    "0              n62w160/",
+    "0              n62w161/",
+    "0              n62w162/",
+    "0              n62w163/",
+    "0              n62w164/",
+    "0              n62w165/",
+    "0              n62w166/",
+    "0              n62w167/",
+    "0              n63w142/",
+    "0              n63w143/",
+    "0              n63w144/",
+    "0              n63w145/",
+    "0              n63w146/",
+    "0              n63w147/",
+    "0              n63w148/",
+    "0              n63w149/",
+    "0              n63w150/",
+    "0              n63w151/",
+    "0              n63w152/",
+    "0              n63w153/",
+    "0              n63w154/",
+    "0              n63w155/",
+    "0              n63w156/",
+    "0              n63w157/",
+    "0              n63w158/",
+    "0              n63w159/",
+    "0              n63w160/",
+    "0              n63w161/",
+    "0              n63w162/",
+    "0              n63w163/",
+    "0              n63w164/",
+    "0              n63w165/",
+    "0              n63w166/",
+    "0              n63w167/",
+    "0              n63w170/",
+    "0              n64w141/",
+    "0              n64w142/",
+    "0              n64w143/",
+    "0              n64w144/",
+    "0              n64w145/",
+    "0              n64w146/",
+    "0              n64w147/",
+    "0              n64w148/",
+    "0              n64w149/",
+    "0              n64w150/",
+    "0              n64w151/",
+    "0              n64w152/",
+    "0              n64w153/",
+    "0              n64w154/",
+    "0              n64w155/",
+    "0              n64w156/",
+    "0              n64w157/",
+    "0              n64w158/",
+    "0              n64w159/",
+    "0              n64w160/",
+    "0              n64w161/",
+    "0              n64w162/",
+    "0              n64w163/",
+    "0              n64w164/",
+    "0              n64w165/",
+    "0              n64w169/",
+    "0              n64w170/",
+    "0              n64w171/",
+    "0              n64w172/",
+    "0              n65w141/",
+    "0              n65w142/",
+    "0              n65w143/",
+    "0              n65w144/",
+    "0              n65w145/",
+    "0              n65w146/",
+    "0              n65w147/",
+    "0              n65w148/",
+    "0              n65w149/",
+    "0              n65w150/",
+    "0              n65w151/",
+    "0              n65w152/",
+    "0              n65w153/",
+    "0              n65w154/",
+    "0              n65w155/",
+    "0              n65w156/",
+    "0              n65w157/",
+    "0              n65w158/",
+    "0              n65w159/",
+    "0              n65w160/",
+    "0              n65w161/",
+    "0              n65w162/",
+    "0              n65w163/",
+    "0              n65w164/",
+    "0              n65w165/",
+    "0              n65w166/",
+    "0              n65w167/",
+    "0              n66w141/",
+    "0              n66w142/",
+    "0              n66w143/",
+    "0              n66w144/",
+    "0              n66w145/",
+    "0              n66w146/",
+    "0              n66w147/",
+    "0              n66w148/",
+    "0              n66w149/",
+    "0              n66w150/",
+    "0              n66w151/",
+    "0              n66w152/",
+    "0              n66w153/",
+    "0              n66w154/",
+    "0              n66w155/",
+    "0              n66w156/",
+    "0              n66w157/",
+    "0              n66w158/",
+    "0              n66w159/",
+    "0              n66w160/",
+    "0              n66w161/",
+    "0              n66w162/",
+    "0              n66w163/",
+    "0              n66w164/",
+    "0              n66w165/",
+    "0              n66w166/",
+    "0              n66w167/",
+    "0              n66w168/",
+    "0              n66w169/",
+    "0              n67w141/",
+    "0              n67w142/",
+    "0              n67w143/",
+    "0              n67w144/",
+    "0              n67w145/",
+    "0              n67w146/",
+    "0              n67w147/",
+    "0              n67w148/",
+    "0              n67w149/",
+    "0              n67w150/",
+    "0              n67w151/",
+    "0              n67w152/",
+    "0              n67w153/",
+    "0              n67w154/",
+    "0              n67w155/",
+    "0              n67w156/",
+    "0              n67w157/",
+    "0              n67w158/",
+    "0              n67w159/",
+    "0              n67w160/",
+    "0              n67w161/",
+    "0              n67w162/",
+    "0              n67w163/",
+    "0              n67w164/",
+    "0              n67w165/",
+    "0              n67w166/",
+    "0              n67w167/",
+    "0              n67w168/",
+    "0              n68w141/",
+    "0              n68w142/",
+    "0              n68w143/",
+    "0              n68w144/",
+    "0              n68w145/",
+    "0              n68w146/",
+    "0              n68w147/",
+    "0              n68w148/",
+    "0              n68w149/",
+    "0              n68w150/",
+    "0              n68w151/",
+    "0              n68w152/",
+    "0              n68w153/",
+    "0              n68w154/",
+    "0              n68w155/",
+    "0              n68w156/",
+    "0              n68w157/",
+    "0              n68w158/",
+    "0              n68w159/",
+    "0              n68w160/",
+    "0              n68w161/",
+    "0              n68w162/",
+    "0              n68w163/",
+    "0              n68w164/",
+    "0              n68w165/",
+    "0              n68w166/",
+    "0              n69w141/",
+    "0              n69w142/",
+    "0              n69w143/",
+    "0              n69w144/",
+    "0              n69w145/",
+    "0              n69w146/",
+    "0              n69w147/",
+    "0              n69w148/",
+    "0              n69w149/",
+    "0              n69w150/",
+    "0              n69w151/",
+    "0              n69w152/",
+    "0              n69w153/",
+    "0              n69w154/",
+    "0              n69w155/",
+    "0              n69w156/",
+    "0              n69w157/",
+    "0              n69w158/",
+    "0              n69w159/",
+    "0              n69w160/",
+    "0              n69w161/",
+    "0              n69w162/",
+    "0              n69w163/",
+    "0              n69w164/",
+    "0              n69w165/",
+    "0              n69w166/",
+    "0              n69w167/",
+    "0              n70w141/",
+    "0              n70w142/",
+    "0              n70w143/",
+    "0              n70w144/",
+    "0              n70w145/",
+    "0              n70w146/",
+    "0              n70w147/",
+    "0              n70w148/",
+    "0              n70w149/",
+    "0              n70w150/",
+    "0              n70w151/",
+    "0              n70w152/",
+    "0              n70w153/",
+    "0              n70w154/",
+    "0              n70w155/",
+    "0              n70w156/",
+    "0              n70w157/",
+    "0              n70w158/",
+    "0              n70w159/",
+    "0              n70w160/",
+    "0              n70w161/",
+    "0              n70w162/",
+    "0              n70w163/",
+    "0              n70w164/",
+    "0              n70w165/",
+    "0              n71w143/",
+    "0              n71w144/",
+    "0              n71w145/",
+    "0              n71w146/",
+    "0              n71w147/",
+    "0              n71w148/",
+    "0              n71w149/",
+    "0              n71w150/",
+    "0              n71w151/",
+    "0              n71w152/",
+    "0              n71w153/",
+    "0              n71w154/",
+    "0              n71w155/",
+    "0              n71w156/",
+    "0              n71w157/",
+    "0              n71w158/",
+    "0              n71w159/",
+    "0              n71w160/",
+    "0              n71w161/",
+    "0              n71w162/",
+    "0              n71w163/",
+    "0              n71w164/",
+    "0              n72w155/",
+    "0              n72w156/",
+    "0              n72w157/",
+    "0              n72w158/",
+    "0              s14w170/",
+    "0              s14w171/"
+  )
+  
+  
+  # sub out HTML copy pattern with vsi URL
+  t2 <- gsub("0              ",
+             "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/current/",
+             t)
+  
+  # add file paths following NED scheme
+  t3 <- paste0(t2,
+               "USGS_13_",
+               basename(t2),
+               ".tif")
+  
+  # Write table to data-raw
+  write.table(t3,
+              paste0(base_dir, "/ned_list_USGS_13.txt"),
+              row.names = FALSE,
+              col.names = FALSE,
+              quote = FALSE)
+  
+  
+}
+# dem_base_dir <- "/Users/anguswatters/Desktop/transects_paper/data/dem"
+# download_3dep_vrt(dem_base_dir)
+# 
+# # Create meta data object of three NED resoruces
+# ned <- data.frame(rbind(
+#   # c(id        = "USGS_1",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1",
+#   #   varname   = "30m elevation",
+#   #   long_name = "30m (1 arcsec) National Elevation Dataset",
+#   #   units     = "m"),
+#   # 
+#   # c(id        = "USGS_2",
+#   #   URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/2",
+#   #   varname   = "60m elevation",
+#   #   long_name = "60m (2 arcsec) National Elevation Dataset Alaska",
+#   #   units     = "m"),
+#   
+#   c(id        = "USGS_13",
+#     URL       = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13",
+#     varname   = "10m elevation",
+#     long_name = "10m (1/3th arcsec) National Elevation Dataset",
+#     units     = "m")
+# ))
+# 
+# 
+# 
+# 
+# # Loop over the three resolutions
+# for (i in 1:length(ned)) {
+#   i = 1
+#   
+#   # Define output text file path
+#   txt_file  <- paste0(dem_base_dir, "/ned_list_", ned$id[i], "_2.txt")
+#   
+#   # Define output VRT path
+#   vrt_file <- paste0(paste0(dem_base_dir, "/ned_", ned$id[i], ".vrt"))
+#   
+#   # If VRT does NOT exist, build VRT
+#   if (!file.exists(vrt_file)) {
+#     
+#     # read the corresponding index.gpkg
+#     files <- sf::read_sf(ned$domain_url[i])
+#     DEM_UR
+#     DEM_PATH        <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     # Build full HTTPS paths to "./current/"
+#     files <- c(file.path(ned$URL[i], "TIFF/current", gsub("[.]/", "", files$location)))
+#     
+#     files <- "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/USGS_Seamless_DEM_1.vrt"
+#     
+#     # write list of files to text file
+#     write.table(files, txt_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
+#     
+#     # build VRT from text file input using GDAL system call ...
+#     system(paste("gdalbuildvrt -input_file_list", txt_file, vrt_file))
+#   }
+#   
+#   logger::log_info("Finished ", ned$id[i], "...")
+# }
+
+# 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id, extension_pct = 0.5 ) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>% 
+#     #   dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     # 
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         crosswalk_id = crosswalk_id,
+#         pct        = extension_pct,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     # update_transect_lines <- 
+#     #   update_transect_lines %>%  
+#     #   dplyr::rename(!!sym(crosswalk_id) := hy_id)
+#     
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+# utility function for getting transects extended and 
+# matching cross section points that went through "get_improved_cs_pts()" and that were extended for improvement
+# returns the extended version of the transects 
+# match_transects_to_extended_cs_pts <- function(transect_lines, fixed_cs_pts, crosswalk_id) {
+#   
+#   # transect_lines = transects
+#   # fixed_cs_pts   = fixed_pts
+#   # crosswalk_id   = CROSSWALK_ID
+#   
+#   fixed_cs_pts <- nhdplusTools::rename_geometry(fixed_cs_pts, "geometry")
+#   transect_lines    <- nhdplusTools::rename_geometry(transect_lines, "geometry")
+#   
+#   # get the counts of each point type to add this data to the transect_lines dataset
+#   point_type_counts <- hydrofabric3D::get_point_type_counts(classified_pts = fixed_cs_pts, 
+#                                                             crosswalk_id = crosswalk_id)
+#   # Check the number of cross sections that were extended
+#   message("Subsetting cross section points generated after extending transect_lines...")
+#   
+#   # extract cross section points that have an "is_extended" value of TRUE
+#   extended_pts <- 
+#     fixed_cs_pts %>%
+#     dplyr::filter(is_extended) %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id)
+#   
+#   # extended_pts %>% 
+#   #   get_unique_tmp_ids() %>% 
+#   #   length()
+#   
+#   # extract transect_lines that have a "crosswalk_id" in the "extended_pts" dataset
+#   update_transect_lines <- 
+#     transect_lines %>%
+#     hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#     dplyr::filter(tmp_id %in% unique(extended_pts$tmp_id))
+#   
+#   cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = crosswalk_id)$tmp_id)
+#   
+#   # If any transect_lines were extended, update the transect_lines dataset, and overwrite local and S3 transect_lines geopackages
+#   if (nrow(update_transect_lines) > 0) {
+#     message("Updating ", nrow(update_transect_lines), " transect_lines")
+#     
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>% 
+#       dplyr::rename(hy_id := !!sym(crosswalk_id)) 
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%
+#       # apply extend_by_percent function to each transect line:
+#       hydrofabric3D:::extend_by_percent(
+#         pct        = EXTENSION_PCT,
+#         length_col = "cs_lengthm"
+#       )
+#     
+#     update_transect_lines <- hydroloom::rename_geometry(update_transect_lines, "geometry")
+#     
+#     update_transect_lines <- 
+#       update_transect_lines %>%  
+#       dplyr::rename(!!sym(crosswalk_id) := hy_id)
+# 
+#     # cs_pt_uids    <- unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)
+#     # transect_uids <- unique(hydrofabric3D::add_tmp_id(transect_lines, x = get(crosswalk_id))$tmp_id)
+#     
+#     # Filter down to ONLY points that were finalized and rectified from rectify_cs_pts()
+#     # Remove old transect_lines that have "tmp_id" in "extended_pts" (transect_lines that were unchanged and are "good_to_go")
+#     # and then replace with old transect_lines with the "update_transect_lines"
+#     out_transect_lines <-
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#       dplyr::bind_rows(
+#         dplyr::mutate(update_transect_lines, is_extended = TRUE)
+#       )
+#     
+#     # transect_lines %>% 
+#     #   hydrofabric3D::add_tmp_id(x = "hy_id") %>%
+#     #   # dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>%
+#     #   dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_pts, x = "hy_id")$tmp_id)) %>% # Subset down to the remaining tmp_ids in the fixed points
+#     #   dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id)) %>% # remove the tmp_ids that we are going add back in with the extended versions of those tmp_ids
+#     #   dplyr::bind_rows( # bring in the new updated extended transect_lines
+#     #     dplyr::mutate(
+#     #       update_transect_lines,
+#     #       is_extended = TRUE
+#     #     )
+#     #   )  
+#   } else {
+#     # If no transect_lines were extended
+#     out_transect_lines <- 
+#       transect_lines %>%
+#       hydrofabric3D::add_tmp_id(x = crosswalk_id) %>%
+#       dplyr::filter(tmp_id %in% cs_pt_uids) %>% 
+#       # dplyr::filter(tmp_id %in% unique(hydrofabric3D::add_tmp_id(fixed_cs_pts, x = get(crosswalk_id))$tmp_id)) %>%
+#       dplyr::filter(!tmp_id %in% unique(extended_pts$tmp_id))
+#   }
+#   
+#   # Finalize new transect_lines
+#   out_transect_lines <- 
+#     out_transect_lines %>%
+#     dplyr::left_join(
+#       point_type_counts, 
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::left_join(
+#       dplyr::ungroup(
+#         dplyr::slice(
+#           dplyr::group_by(
+#             dplyr::select(sf::st_drop_geometry(fixed_cs_pts),
+#                           dplyr::any_of(crosswalk_id), 
+#                           cs_id, bottom, left_bank, right_bank, valid_banks, has_relief
+#             ),
+#             dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))
+#           ),
+#           1
+#         )
+#       ),
+#       by = c(crosswalk_id, "cs_id")
+#     ) %>%
+#     dplyr::select(
+#       dplyr::any_of(crosswalk_id),
+#       cs_source, cs_id, cs_measure, cs_lengthm,
+#       # sinuosity,
+#       is_extended,
+#       left_bank_count, right_bank_count, channel_count, bottom_count,
+#       bottom, left_bank, right_bank, valid_banks, has_relief,
+#       geometry
+#     ) %>% 
+#     dplyr::mutate(
+#       is_extended = ifelse(is.na(is_extended), FALSE, is_extended)
+#     )  
+#   
+#   return(out_transect_lines)
+# }
+
+
+