Merge branch 'release/1.15.0'

PublicMapping · Feb 28, 2022 · a1a52d1 · a1a52d1
2 parents d9e0332 + 2856414
commit a1a52d1
Show file tree

Hide file tree

Showing 68 changed files with 7,173 additions and 1,092 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+## [1.15.0] - 2022-02-28
+
+### Added
+
+- Add modal to allow organization admins to create templates from projects [#1107](https://github.com/PublicMapping/districtbuilder/pull/1107)
+
+### Changed
+
+- Stop loading archived topology [#1132](https://github.com/PublicMapping/districtbuilder/pull/1132)
+- Change formatting of the PVI chart y-axis [#1136](https://github.com/PublicMapping/districtbuilder/pull/1136)
+- Refactor backend to use workers and improve performance [#1149](https://github.com/PublicMapping/districtbuilder/pull/1149)
+
+### Fixed
+
+- parameterize REINDEX rules so both staging and prod can exist [#1130](https://github.com/PublicMapping/districtbuilder/pull/1130)
+- Duplicated maps keep reference layers [#1137](https://github.com/PublicMapping/districtbuilder/pull/1137)
+- Fix bounds for AK mini-map [#1145](https://github.com/PublicMapping/districtbuilder/pull/1145)
+- Fix TopologyService health check[#1148](https://github.com/PublicMapping/districtbuilder/pull/1148)
+- Fix sidebar styling on firefox [#1155](https://github.com/PublicMapping/districtbuilder/pull/1155)
+- Updated PlanScore integration to use multi-step workflow [#1152](https://github.com/PublicMapping/districtbuilder/pull/1148)
+- Fix concurrent Jenkins builds breaking node dependencies [#1146](https://github.com/PublicMapping/districtbuilder/pull/1146)
+
 ## [1.14.0] - 2022-02-09
 
 ### Added
@@ -432,7 +454,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Initial release.
 
-[unreleased]: https://github.com/publicmapping/districtbuilder/compare/1.14.0...HEAD
+[unreleased]: https://github.com/publicmapping/districtbuilder/compare/1.15.0...HEAD
+[1.15.0]: https://github.com/publicmapping/districtbuilder/compare/1.14.0...1.15.0
 [1.14.0]: https://github.com/publicmapping/districtbuilder/compare/1.13.0...1.14.0
 [1.13.0]: https://github.com/publicmapping/districtbuilder/compare/1.12.1...1.13.0
 [1.12.1]: https://github.com/publicmapping/districtbuilder/compare/1.12.0...1.12.1

diff --git a/deployment/README.md b/deployment/README.md
@@ -93,8 +93,9 @@ This will attempt to apply the plan assembled in the previous step using Amazon'
 - Select **Actions** -> **Run Task**
 - Below the warning, click the blue link: "Switch to launch type"
 - Select the following
-  - **Launch type**: `Fargate`
   - **Cluster**: (See table below)
+  - **Launch type**: `Fargate`
+  - **Operating system family**: `Linux`
   - **Cluster VPC**: (See table below)
   - **Subnets**: Select any named `PrivateSubnet`
   - **Select existing security group**: (See table below)

diff --git a/deployment/terraform/scheduled_tasks.tf b/deployment/terraform/scheduled_tasks.tf
@@ -1,11 +1,11 @@
 resource "aws_cloudwatch_event_rule" "reindex_task" {
-  name                = "reindex-scheduled-ecs-event-rule"
+  name                = "reindex-scheduled-ecs-event-rule-${var.environment}"
   // Run monthly at 5AM UTC (midnight EST)
   schedule_expression = "cron(0 5 1 * ? *)"
 }
 
 resource "aws_cloudwatch_event_target" "reindex_task" {
-  target_id = "reindex-scheduled-ecs-event-rule"
+  target_id = "reindex-scheduled-ecs-event-rule-${var.environment}"
   rule      = aws_cloudwatch_event_rule.reindex_task.name
   arn       = aws_ecs_cluster.app.arn
   role_arn  = aws_iam_role.ecs_task_role.arn

diff --git a/doc/arch/adr-05-server-concurrency-strategy.md b/doc/arch/adr-05-server-concurrency-strategy.md
@@ -0,0 +1,68 @@
+# Server concurrency strategy
+
+## Context
+
+CPU intensive work on the main event thread is hurting our performance and causing our health-checks to fail. Because Node is single-threaded by default, CPU intensive work on the main thread blocks us from responding to other requests.
+
+There is an exploratory branch here [`c214bac6`](https://github.com/PublicMapping/districtbuilder/commit/c214bac64111615e7a213972d512a7d765c68821) that does some initial work at moving the TopoJSON merge operation off of the main thread - it's likely that we'll also want to move imports & exports as well - but this is still unacceptably slow for large regions due to the need to `deserialize` the topojson data for each merge operation.
+
+The Node.js memory model prevents us from directly sharing the TopoJSON object as-is between threads or processes - for the most part Node uses an actor model, using message passing to copy data w/o directly sharing memory.
+
+Node does have two exceptions to this: `ArrayBuffer` can be _transferred_ between threads instead of copied, and `SharedArrayBuffer` can be used on multiple threads at once - but both of these can only be used to store typed numeric arrays - not generic objects.
+
+### Options considered
+
+#### Improve deserialization on-demand performance further
+
+The threading strategy pursued in [`c214bac6`](https://github.com/PublicMapping/districtbuilder/commit/c214bac64111615e7a213972d512a7d765c68821) involved keeping the binary serialized data in a `SharedArrayBuffer`, and deserializing it in the worker for each request.
+
+I did some benchmarking on some of our largest regions using the current `v8.deserialize` method, which showed that deserializing the PA LRC or the TX regions (which are the two largest) took about 6-9 seconds on average.
+
+I attempted to swap `v8` for Protobuf by using `geobuf` version `1.0.1`, which was the last version to support TopoJSON, but saw no significant difference in performance [`c90a760`](https://github.com/PublicMapping/districtbuilder/commit/c90a7602a440c1aea035b8ec676cdc71b15ef086). I _did_ however note a significant size reduction, so we should seriously consider switching to using Protobuffer as part of #1117
+
+I also tried using [`mmap-object`](https://github.com/allenluce/mmap-object) but that caused a performance decline relative to just using `v8` serialization.
+
+#### Fork the TopoJSON library / merge algorithm to store data in SharedArrayBuffer
+
+I did some basic work towards this, but I think we would need to _radically_ rework how the topojson merge operation works and how the format is stored for this to be worthwhile. My experiments involved swapping out the `arcs` array in `Topology` for an [`ndarray`](https://www.npmjs.com/package/ndarray) backed by a `SharedArrayBuffer`, which was *worse* performance-wise than the `v8` (de)serialize route, including attempts to go further by doing the same to the `arcs` array for each feature.
+
+My guess here is that transferring hundreds of thousands of `SharedArrayBuffer` objects is still pretty slow, so the only way to really push this further would be a radical restructuring of the entire way we access the GeometryCollection to store data as a struct-of-arrays, which would limit the number of `SharedArrayBuffer` to a fixed amount determined by the number of fields.
+
+#### Rewrite the CPU intensive tasks in C++
+
+I think this is a viable option. Node.JS has good bindings for C++, and C++ is not subject to the constraints of the Node.JS memory-model - we could easily share the read-only Topology data between threads.
+
+The downsides here are significant though - we'd have to not just rewrite the TopoJSON `mergeArcs` method, but also all of our own business logic that uses TopoJSON data: the district merge operation and project import and export. This is a significant amount of code to rewrite, and while there is a working test harness we don't have many tests - we'd need to write a much more significant test suite tested against the current codebase to feel confident the rewrite was implemented correctly.
+
+We also don't have any real C++ expertise on the team, so we would either need to use off-team resources to build and maintain this part of the codebase, or expect a significant amount of training and learning to be required beforehand.
+
+#### Cache data load in worker threads
+
+For this approach we keep the buffer data available on the main thread and deserialize it as needed, like in the first approach, but we also add an LRU cache in each worker thread. This allows us to skip the deserialization performance penalty after the first request, and setting the `maxSize` parameter on the LRU cache ensures that we don't exceed the memory limits of the system. I made a test branch here [`09df46a5`](https://github.com/PublicMapping/districtbuilder/commit/09df46a50f9ec54cc6e0af8a26fd3ce30302172c) that showed acceptable speeds and I believe there is room for further potential improvements in data caching.
+
+We can route requests to worker threads that already have the data loaded, or to another worker if those are busy, allowing for a degree of concurrency.
+
+The main downsides here are that we're being a bit duplicative (data in both serialized and deserialized formats, data potentially duplicated between threads) with the data we keep around, which could increase our memory requirements. We also go from having our largest datasets being mostly static, to instead constantly serializing and deserializing as we change what data is cached in which worker threads - which has a potential to change the runtime behavior of the application in unexpected ways.
+
+This option becomes much more attractive if we can reduce the time spent loading the loading the buffer (see #1117) to under a few seconds, as we could also load the buffer in the worker thread before caching the deserialized data in that case and not need the memory requirements of keeping the deserialized files around in memory on the main thread.
+
+#### Load data in worker threads
+
+A variation of above keeping data cached in worker threads, but instead of loading the binary data on the main thread we would instead load data directly in each worker thread, with each thread loading a subset of states.
+
+This is conceptually simpler than the caching approach, ensuring the memory footprint of our large datasets stays static and predictable, and avoids the potential of a cache-hit causing a request to be slower, but it limits our concurrency potential; by having only one thread capable of handling any given region, any concurrent requests from other users would have to block.
+
+## Decision
+
+We'll use the caching approach outlined above as option four.
+
+This should be relatively quick to implement, and allow for some degree of concurrent data processing for the same state.
+We should evaluate our options for improving file loading performance, and consider whether we can skip the initialization step where we load and cache the binary data, but regardless of that we'll still use an LRU cache in each worker thread.
+
+Switching to a PBF format as part of this would be prudent if we need to keep the pre-caching, as that seemed to reduce the binary size by about 50%. Doing this would require maintaining our own protobuffer bindings for TopoJSON - we could fork `geobuf` version `1.0.1` for this, which was the last version that supported TopoJSON.
+
+## Consequences
+
+We'll need to carefully monitor application performance after making this change, as it will likely affect our steady-state memory usage - and consequently we should probably wait to reduce our instance size until after we make this change, which we should instead defer until a later release.
+
+Long-term the C++ approach might be better for the best-possible performance, but sticking to an entirely Node.JS backend seems worthwhile in terms of long-term maintenance and support.
diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml
@@ -2,9 +2,19 @@ version: "2.4"
 services:
   server:
     image: "districtbuilder:${GIT_COMMIT:-latest}"
+    volumes:
+      - "/var/cache/district-builder-${EXECUTOR_NUMBER}-server-node-modules:/home/node/app/server/node_modules"
+
+  client:
+    image: node:16-slim
+    volumes:
+      - "/var/cache/district-builder-${EXECUTOR_NUMBER}-client-node-modules:/home/node/app/node_modules"
 
   manage:
     image: "districtbuilder-manage:${GIT_COMMIT:-latest}"
+    volumes:
+      - "/var/cache/district-builder-${EXECUTOR_NUMBER}-server-node-modules:/home/node/app/server/node_modules"
+      - "/var/cache/district-builder-${EXECUTOR_NUMBER}-manage-node-modules:/home/node/app/manage/node_modules"
 
   shellcheck:
     image: koalaman/shellcheck:stable

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,7 +1,7 @@
 version: "2.4"
 services:
   database:
-    image: quay.io/azavea/postgis:3-postgres12.2-slim
+    image: quay.io/azavea/postgis:3-postgres12.4-slim
     environment:
       - POSTGRES_USER=districtbuilder
       - POSTGRES_PASSWORD=districtbuilder
@@ -28,13 +28,15 @@ services:
       - JWT_SECRET=insecure
       - JWT_EXPIRATION_IN_MS=604800000 # 1 week
       - CLIENT_URL=http://localhost:3003
+      - TOPOLOGY_CACHE_DIRECTORY=/opt/topology
     build:
       context: ./src/server
       dockerfile: Dockerfile
     volumes:
       - ./src/server:/home/node/app/server
       - ./src/shared:/home/node/app/shared
       - /var/cache/district-builder-server-node-modules:/home/node/app/server/node_modules
+      - /var/cache/district-builder-server-topology:/opt/topology
       - $HOME/.cache/district-builder:/usr/local/share/.cache/yarn
       - $HOME/.aws:/root/.aws:ro
     working_dir: /home/node/app/server
@@ -44,7 +46,7 @@ services:
     command: start:dev
 
   client:
-    image: node:16-slim
+    image:  node:16-bullseye-slim
     environment:
       - BASE_URL=${BASE_URL:-http://server:3005}
       - PORT=3003

diff --git a/scripts/cibuild b/scripts/cibuild
@@ -23,7 +23,7 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
     if [ "${1:-}" = "--help" ]; then
         usage
     else
-        ./scripts/update
+        CI=1 ./scripts/update
 
         # Build tagged container images
         GIT_COMMIT="${GIT_COMMIT}" docker-compose \

diff --git a/scripts/update b/scripts/update
@@ -17,35 +17,41 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
     if [ "${1:-}" = "--help" ]; then
         usage
     else
+        if [[ -n "${CI}" ]]; then
+            CONFIG_ARGS=( "-f" "docker-compose.yml" "-f" "docker-compose.ci.yml" )
+        else
+            CONFIG_ARGS=( "-f" "docker-compose.yml" )
+        fi
         # Ensure container images are current
-        docker-compose build --pull
+        docker-compose "${CONFIG_ARGS[@]}" build --pull
 
         # Clean dist directory
-        docker-compose \
+        docker-compose "${CONFIG_ARGS[@]}" \
             run --rm --no-deps server \
             clean
 
         # Update frontend, Yarn dependencies and build static asset bundle
-        docker-compose \
+        docker-compose "${CONFIG_ARGS[@]}" \
             run --rm --no-deps client \
             bash -c "yarn install && yarn compile && yarn build"
 
         # Update backend, Yarn dependencies and build server
-        docker-compose \
+        docker-compose "${CONFIG_ARGS[@]}" \
             run --rm --no-deps --entrypoint "bash -c" server \
             "yarn install && npm rebuild && yarn build"
 
         # Update manage, Yarn dependencies and build
-        docker-compose \
+        docker-compose "${CONFIG_ARGS[@]}" \
             run --rm --no-deps manage \
             bash -c "yarn install && yarn build"
 
         # Bring up PostgreSQL and NestJS in a way that respects
         # configured service health checks.
-        docker-compose \
-            -f docker-compose.yml \
+        docker-compose "${CONFIG_ARGS[@]}" \
             up -d database server
 
-        ./scripts/migration
+        docker-compose "${CONFIG_ARGS[@]}" \
+            run --rm server \
+            migration:run
     fi
 fi
diff --git a/scripts/yarn b/scripts/yarn
@@ -17,12 +17,28 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
     if [[ "${1:-}" == "--help" ]] || [[ ! "${1}" == "client" && ! "${1}" == "server" && ! "${1}" == "manage" ]]; then
         usage
     elif [[ "${1}" == "server" ]]; then
-        docker-compose \
-            run --rm "${1}" \
-            "${@:2}"
+        if [[ -n "${CI}" ]]; then
+            docker-compose \
+                -f docker-compose.yml \
+                -f docker-compose.ci.yml \
+                run --rm "${1}" \
+                "${@:2}"
+        else
+            docker-compose \
+                run --rm "${1}" \
+                "${@:2}"
+        fi
     else
-        docker-compose \
-            run --rm --no-deps "${1}" \
-            bash -c "yarn ${*:2}"
+        if [[ -n "${CI}" ]]; then
+            docker-compose \
+                -f docker-compose.yml \
+                -f docker-compose.ci.yml \
+                run --rm --no-deps "${1}" \
+                bash -c "yarn ${*:2}"
+        else
+            docker-compose \
+                run --rm --no-deps "${1}" \
+                bash -c "yarn ${*:2}"
+        fi
     fi
 fi
diff --git a/src/client/actions/projectModals.ts b/src/client/actions/projectModals.ts
@@ -6,5 +6,4 @@ export const showAdvancedEditingModal = createAction(
   "Show advanced editing warning modal"
 )<boolean>();
 export const showCopyMapModal = createAction("Show copy map modal")<boolean>();
-export const showConvertMapModal = createAction("Show convert map modal")<boolean>();
 export const setImportFlagsModal = createAction("Show import flags modal")<boolean>();
diff --git a/src/client/actions/projects.ts b/src/client/actions/projects.ts
@@ -20,6 +20,10 @@ export const setDeleteProject = createAction("Set the id for the delete project
   IProject | undefined
 >();
 
+export const setTemplateProject = createAction("Set the id for the template project modal")<
+  IProject | undefined
+>();
+
 export const projectArchive = createAction("Archive project")<ProjectId>();
 export const projectArchiveSuccess = createAction("Archive project success")<IProject>();
 export const projectArchiveFailure = createAction("Archive project failure")<string>();