From 2a83cde508a5e6bee2aa563ae8ca9b2d8d1de0fd Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Fri, 6 Sep 2024 17:20:49 +0200 Subject: [PATCH 01/22] chore(ci): add performance tests workflow with manual trigger --- .github/workflows/performance.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/performance.yaml diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml new file mode 100644 index 00000000..216704ab --- /dev/null +++ b/.github/workflows/performance.yaml @@ -0,0 +1,20 @@ +name: "Performance Tests" + +on: + workflow_dispatch: + +jobs: + test: + runs-on: [self-hosted, wind-tunnel] + steps: + - uses: actions/checkout@v4 + + - name: Smoke test - zome_call_single_value + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 5 --no-progress + + pkill hc && pkill holochain && pkill lair-keystore From 330c4f93aaa6b62520e0902c4f0aedd71ab0f6cb Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 9 Sep 2024 17:17:03 +0200 Subject: [PATCH 02/22] chore(ci): set the tests reporter to be influx-file --- .github/workflows/performance.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 216704ab..9b3d567d 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -15,6 +15,6 @@ jobs: nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" # Run the scenario for 5 seconds - RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 5 --no-progress + RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore From 602d90c7e85aa95169e3c56dc9c2445514f0fecf Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Tue, 10 Sep 2024 11:25:35 +0200 Subject: [PATCH 03/22] chore(ci): add more simple scenarios as steps --- .github/workflows/performance.yaml | 80 ++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 9b3d567d..4665ce2a 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -18,3 +18,83 @@ jobs: RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - single_write_many_read + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#single_write_many_read -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - dht_sync_lag + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#dht_sync_lag -- --connection-string ws://localhost:8888 --agents 2 --behaviour write:1 --behaviour record_lag:1 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - app_install + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#app_install -- --connection-string ws://localhost:8888 --agents 2 --behaviour minimal:1 --behaviour large:1 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - first_call + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#first_call -- --connection-string ws://localhost:8888 --agents 1 --behaviour local:1 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - write_read + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#write_read -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - write_query + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#write_query -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - local_signals + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#local_signals -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - write_validated + run: | + # Start a sandbox conductor and run it in the background + nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" + + # Run the scenario for 5 seconds + RUST_LOG=info nix run .#write_validated -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + + pkill hc && pkill holochain && pkill lair-keystore From 9b34405cfc68783c92239770bf8c8b2e02c1883d Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Wed, 11 Sep 2024 13:38:14 +0200 Subject: [PATCH 04/22] chore(ci): add trycp_write_validated scenario --- .github/workflows/performance.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 4665ce2a..a9f12958 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -98,3 +98,20 @@ jobs: RUST_LOG=info nix run .#write_validated -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore + + - name: Smoke test - trycp_write_validated + run: | + set -x + + # Start local network services + nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" + # Start a TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" + + # Run the scenario for 10 seconds + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#trycp_write_validated -- --targets targets-ci.yaml --instances-per-target 2 --duration 10 --no-progress --reporter influx-file + + # Stop the TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" + # Stop local network services + pkill hc-run-local From e61032c1ecc01bd41304ab66de3a7c44573f902a Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Wed, 11 Sep 2024 14:45:08 +0200 Subject: [PATCH 05/22] chore(ci): update duration of performance tests to 120 seconds --- .github/workflows/performance.yaml | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index a9f12958..bc70371a 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -14,8 +14,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -24,8 +23,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#single_write_many_read -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#single_write_many_read -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -34,8 +32,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#dht_sync_lag -- --connection-string ws://localhost:8888 --agents 2 --behaviour write:1 --behaviour record_lag:1 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#dht_sync_lag -- --connection-string ws://localhost:8888 --agents 2 --behaviour write:1 --behaviour record_lag:1 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -44,8 +41,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#app_install -- --connection-string ws://localhost:8888 --agents 2 --behaviour minimal:1 --behaviour large:1 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#app_install -- --connection-string ws://localhost:8888 --agents 2 --behaviour minimal:1 --behaviour large:1 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -54,8 +50,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#first_call -- --connection-string ws://localhost:8888 --agents 1 --behaviour local:1 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#first_call -- --connection-string ws://localhost:8888 --agents 1 --behaviour local:1 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -64,8 +59,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#write_read -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#write_read -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -74,8 +68,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#write_query -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#write_query -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -84,8 +77,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#local_signals -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#local_signals -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -94,8 +86,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # Run the scenario for 5 seconds - RUST_LOG=info nix run .#write_validated -- --connection-string ws://localhost:8888 --duration 5 --no-progress --reporter influx-file + RUST_LOG=info nix run .#write_validated -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore @@ -108,8 +99,7 @@ jobs: # Start a TryCP instance nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - # Run the scenario for 10 seconds - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#trycp_write_validated -- --targets targets-ci.yaml --instances-per-target 2 --duration 10 --no-progress --reporter influx-file + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#trycp_write_validated -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file # Stop the TryCP instance nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" From 1c9dce3aff704eef0818d6babced87ecdc3498bf Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Thu, 12 Sep 2024 14:14:32 +0200 Subject: [PATCH 06/22] chore(ci): add final performance tests --- .github/workflows/performance.yaml | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index bc70371a..c08352c9 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -105,3 +105,49 @@ jobs: nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" # Stop local network services pkill hc-run-local + + - name: Smoke test - remote_call_rate + run: | + set -x + + # Start local network services + nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" + # Start a TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" + + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#remote_call_rate -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file + + # Stop the TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" + # Stop local network services + pkill hc-run-local + + - name: Smoke test - two_party_countersigning + run: | + # Start local network services + nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" + # Start a TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" + + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#two_party_countersigning -- --targets targets-ci.yaml --behaviour initiate:1 --behaviour participate:1 --instances-per-target 2 --duration 120 --no-progress --reporter influx-file + + # Stop the TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" + # Stop local network services + pkill hc-run-local + + - name: Smoke test - validation_receipts + run: | + set -x + + # Start local network services + nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" + # Start a TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" + + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#validation_receipts -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file + + # Stop the TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" + # Stop local network services + pkill hc-run-local From 0dc9b9bdf4a41a839b6e90c208d626f0874bb83f Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Thu, 12 Sep 2024 15:14:18 +0200 Subject: [PATCH 07/22] chore(ci): use a job matrix instead of a single job --- .github/workflows/performance.yaml | 147 ++------------------ telegraf/runner-telegraf.conf | 216 +++++++++++++++++++++++++++++ 2 files changed, 228 insertions(+), 135 deletions(-) create mode 100644 telegraf/runner-telegraf.conf diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index c08352c9..4556c179 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -3,151 +3,28 @@ name: "Performance Tests" on: workflow_dispatch: +env: + INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }} + WT_METRICS_DIR: "${{ github.workspace }}/telegraf/metrics" + jobs: test: runs-on: [self-hosted, wind-tunnel] + strategy: + fail-fast: false + matrix: + scenario: [ zome_call_single_value ] steps: - uses: actions/checkout@v4 - - name: Smoke test - zome_call_single_value - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - single_write_many_read - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#single_write_many_read -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - dht_sync_lag - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#dht_sync_lag -- --connection-string ws://localhost:8888 --agents 2 --behaviour write:1 --behaviour record_lag:1 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - app_install - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#app_install -- --connection-string ws://localhost:8888 --agents 2 --behaviour minimal:1 --behaviour large:1 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - first_call + - name: Smoke test - ${{ matrix.scenario }} run: | # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - RUST_LOG=info nix run .#first_call -- --connection-string ws://localhost:8888 --agents 1 --behaviour local:1 --duration 120 --no-progress --reporter influx-file + RUST_LOG=info nix run .#${{ matrix.scenario }} -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file pkill hc && pkill holochain && pkill lair-keystore - - name: Smoke test - write_read - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#write_read -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - write_query - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#write_query -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - local_signals - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#local_signals -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - write_validated - run: | - # Start a sandbox conductor and run it in the background - nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - - RUST_LOG=info nix run .#write_validated -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file - - pkill hc && pkill holochain && pkill lair-keystore - - - name: Smoke test - trycp_write_validated - run: | - set -x - - # Start local network services - nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" - # Start a TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#trycp_write_validated -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file - - # Stop the TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" - # Stop local network services - pkill hc-run-local - - - name: Smoke test - remote_call_rate - run: | - set -x - - # Start local network services - nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" - # Start a TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#remote_call_rate -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file - - # Stop the TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" - # Stop local network services - pkill hc-run-local - - - name: Smoke test - two_party_countersigning - run: | - # Start local network services - nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" - # Start a TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#two_party_countersigning -- --targets targets-ci.yaml --behaviour initiate:1 --behaviour participate:1 --instances-per-target 2 --duration 120 --no-progress --reporter influx-file - - # Stop the TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" - # Stop local network services - pkill hc-run-local - - - name: Smoke test - validation_receipts - run: | - set -x - - # Start local network services - nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" - # Start a TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#validation_receipts -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file - - # Stop the TryCP instance - nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" - # Stop local network services - pkill hc-run-local + - name: Run Telegraf to upload influx metrics + run: nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once diff --git a/telegraf/runner-telegraf.conf b/telegraf/runner-telegraf.conf new file mode 100644 index 00000000..7b6ff6a8 --- /dev/null +++ b/telegraf/runner-telegraf.conf @@ -0,0 +1,216 @@ +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## Maximum number of unwritten metrics per output. Increasing this value + ## allows for longer periods of output downtime without dropping metrics at the + ## cost of higher maximum memory usage. + metric_buffer_limit = 1000000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Collection offset is used to shift the collection by the given amount. + ## This can be be used to avoid many plugins querying constraint devices + ## at the same time by manually scheduling them in time. + # collection_offset = "0s" + + ## Default flushing interval for all outputs. Maximum flush_interval will be + ## flush_interval + flush_jitter + flush_interval = "60s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## Collected metrics are rounded to the precision specified. Precision is + ## specified as an interval with an integer + unit (e.g. 0s, 10ms, 2us, 4s). + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + ## + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s: + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + precision = "0s" + + ## Log at debug level. + # debug = false + ## Log only error level messages. + # quiet = false + + ## Log target controls the destination for logs and can be one of "file", + ## "stderr" or, on Windows, "eventlog". When set to "file", the output file + ## is determined by the "logfile" setting. + # logtarget = "file" + + ## Name of the file to be logged to when using the "file" logtarget. If set to + ## the empty string then logs are written to stderr. + # logfile = "" + + ## The logfile will be rotated after the time interval specified. When set + ## to 0 no time based rotation is performed. Logs are rotated only when + ## written to, if there is no log activity rotation may be delayed. + # logfile_rotation_interval = "0h" + + ## The logfile will be rotated when it becomes larger than the specified + ## size. When set to 0 no size based rotation is performed. + # logfile_rotation_max_size = "0MB" + + ## Maximum number of rotated archives to keep, any older logs are deleted. + ## If set to -1, no archives are removed. + # logfile_rotation_max_archives = 5 + + ## Pick a timezone to use when logging or type 'local' for local time. + ## Example: America/Chicago + # log_with_timezone = "" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + + ## Method of translating SNMP objects. Can be "netsnmp" (deprecated) which + ## translates by calling external programs snmptranslate and snmptable, + ## or "gosmi" which translates using the built-in gosmi library. + # snmp_translator = "netsnmp" + + ## Name of the file to load the state of plugins from and store the state to. + ## If uncommented and not empty, this file will be used to save the state of + ## stateful plugins on termination of Telegraf. If the file exists on start, + ## the state in the file will be restored for the plugins. + # statefile = "" + + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + + +# Configuration for sending metrics to InfluxDB 2.0 +[[outputs.influxdb_v2]] + ## The URLs of the InfluxDB cluster nodes. + ## + ## Multiple URLs can be specified for a single cluster, only ONE of the + ## urls will be written to each interval. + ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] + urls = ["https://ifdb.holochain.org"] + + ## Token for authentication. + token = "${INFLUX_TOKEN}" + + ## Organization is the name of the organization you wish to write to. + organization = "holo" + + ## Destination bucket to write into. + bucket = "windtunnel" + + ## The value of this tag will be used to determine the bucket. If this + ## tag is not set the 'bucket' option is used as the default. + # bucket_tag = "" + + ## If true, the bucket tag will not be added to the metric. + # exclude_bucket_tag = false + + ## Timeout for HTTP messages. + # timeout = "5s" + + ## Additional HTTP headers + # http_headers = {"X-Special-Header" = "Special-Value"} + + ## HTTP Proxy override, if unset values the standard proxy environment + ## variables are consulted to determine which proxy, if any, should be used. + # http_proxy = "http://corporate.proxy:3128" + + ## HTTP User-Agent + # user_agent = "telegraf" + + ## Content-Encoding for write request body, can be set to "gzip" to + ## compress body or "identity" to apply no encoding. + # content_encoding = "gzip" + + ## Enable or disable uint support for writing uints influxdb 2.0. + # influx_uint_support = false + + ## HTTP/2 Timeouts + ## The following values control the HTTP/2 client's timeouts. These settings + ## are generally not required unless a user is seeing issues with client + ## disconnects. If a user does see issues, then it is suggested to set these + ## values to "15s" for ping timeout and "30s" for read idle timeout and + ## retry. + ## + ## Note that the timer for read_idle_timeout begins at the end of the last + ## successful write and not at the beginning of the next write. + # ping_timeout = "0s" + # read_idle_timeout = "0s" + + ## Optional TLS Config for use on HTTP connections. + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false + + +############################################################################### +# INPUT PLUGINS # +############################################################################### + + +[[inputs.file]] + ## Files to parse each interval. Accept standard unix glob matching rules, + ## as well as ** to match recursive files and directories. + files = ["${WT_METRICS_DIR}/*.influx"] + + ## Character encoding to use when interpreting the file contents. Invalid + ## characters are replaced using the unicode replacement character. When set + ## to the empty string the data is not decoded to text. + ## ex: character_encoding = "utf-8" + ## character_encoding = "utf-16le" + ## character_encoding = "utf-16be" + ## character_encoding = "" + character_encoding = "utf-8" + + ## Data format to consume. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" + + + ## Name a tag containing the name of the file the data was parsed from. Leave empty + ## to disable. Cautious when file name variation is high, this can increase the cardinality + ## significantly. Read more about cardinality here: + ## https://docs.influxdata.com/influxdb/cloud/reference/glossary/#series-cardinality + # file_tag = "" + From 00b0d1525a3ff40da5a96f05385d00184ebfe85c Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Thu, 12 Sep 2024 15:50:32 +0200 Subject: [PATCH 08/22] chore(ci): add extra-args option for scenarios that need it --- .github/workflows/performance.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 4556c179..fb62c213 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -22,7 +22,7 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - RUST_LOG=info nix run .#${{ matrix.scenario }} -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file + RUST_LOG=info nix run .#${{ matrix.scenario }} -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} pkill hc && pkill holochain && pkill lair-keystore From 35f278d1d404a91db7770f19b79de3c21b199285 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Thu, 12 Sep 2024 16:07:08 +0200 Subject: [PATCH 09/22] chore(ci): add missing local tests --- .github/workflows/performance.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index fb62c213..a744d5db 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -13,7 +13,14 @@ jobs: strategy: fail-fast: false matrix: - scenario: [ zome_call_single_value ] + scenario: [ zome_call_single_value, single_write_many_read, write_read, write_query, local_signals, write_validated ] + include: + - scenario: dht_sync_lag + extra-args: "--agents 2 --behaviour write:1 --behaviour record_lag:1" + - scenario: app_install + extra-args: "--agents 2 --behaviour minimal:1 --behaviour large:1" + - scenario: first_call + extra-args: "--agents 1 --behaviour local:1" steps: - uses: actions/checkout@v4 From 46c033774423b331cc10fa6811f1ad3ca52d89f1 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Thu, 12 Sep 2024 16:09:40 +0200 Subject: [PATCH 10/22] chore(ci): add TryCP performance tests --- .github/workflows/performance.yaml | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index a744d5db..33be3746 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -35,3 +35,34 @@ jobs: - name: Run Telegraf to upload influx metrics run: nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once + + trycp-test: + runs-on: [self-hosted, wind-tunnel] + strategy: + fail-fast: false + matrix: + scenario: [ trycp_write_validated, remote_call_rate, validation_receipts ] + include: + - scenario: two_party_countersigning + extra-args: "--behaviour initiate:1 --behaviour participate:1" + steps: + - uses: actions/checkout@v4 + + - name: Smoke test - ${{ matrix.scenario }} + run: | + set -x + + # Start local network services + nix develop .#ci -c bash -c "hc-run-local-services --bootstrap-port 4422 --signal-port 4423 &" + # Start a TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" + + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#${{ matrix.scenario }} -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} + + # Stop the TryCP instance + nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" + # Stop local network services + pkill hc-run-local + + - name: Run Telegraf to upload influx metrics + run: nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once From 3d0e70286e772b211e0b8858c35867b80d3a1f2b Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Thu, 12 Sep 2024 17:20:53 +0200 Subject: [PATCH 11/22] chore(ci): add printing of telegraf errors and warnings --- .github/workflows/performance.yaml | 34 ++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 33be3746..ba1313d2 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -34,7 +34,22 @@ jobs: pkill hc && pkill holochain && pkill lair-keystore - name: Run Telegraf to upload influx metrics - run: nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once + run: | + if ! nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2) + then + echo "::group::Telegraf errors" + status=1 + # Print errors as such in GitHub logs. + grep "E!" logs/telegraf-stderr.log | xargs -l echo "::error ::" + echo "::endgroup::" + fi + + echo "::group::Telegraf warnings" + # Print warnings as such in GitHub logs. + grep "W!" logs/telegraf-stderr.log | xargs -l echo "::warning ::" + echo "::endgroup::" + + exit ${status-0} trycp-test: runs-on: [self-hosted, wind-tunnel] @@ -65,4 +80,19 @@ jobs: pkill hc-run-local - name: Run Telegraf to upload influx metrics - run: nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once + run: | + if ! nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2) + then + echo "::group::Telegraf errors" + status=1 + # Print errors as such in GitHub logs. + grep "E!" logs/telegraf-stderr.log | xargs -l echo "::error ::" + echo "::endgroup::" + fi + + echo "::group::Telegraf warnings" + # Print warnings as such in GitHub logs. + grep "W!" logs/telegraf-stderr.log | xargs -l echo "::warning ::" + echo "::endgroup::" + + exit ${status-0} From 0aaa1b9a093a86bd850c69b3cea2694bc10cdb7b Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Fri, 13 Sep 2024 10:56:26 +0200 Subject: [PATCH 12/22] chore(ci): get Run ID from logs and set as output and summary --- .github/workflows/performance.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index ba1313d2..0c13c857 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -25,14 +25,19 @@ jobs: - uses: actions/checkout@v4 - name: Smoke test - ${{ matrix.scenario }} + id: run_test run: | # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - RUST_LOG=info nix run .#${{ matrix.scenario }} -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} + RUST_LOG=info nix run .#${{ matrix.scenario }} -- --connection-string ws://localhost:8888 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} > >(tee logs/scenario-stdout.log) 2> >(tee logs/scenario-stderr.log >&2) pkill hc && pkill holochain && pkill lair-keystore + RUN_ID=$(grep -m1 "#RunId" logs/scenario-stdout.log | sed 's/#RunId: \[\(.\+\)\]/\1/') + echo "RUN_ID=$RUN_ID" >> "$GITHUB_OUTPUT" + echo "# Run ID: $RUN_ID" >> $GITHUB_STEP_SUMMARY + - name: Run Telegraf to upload influx metrics run: | if ! nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2) @@ -72,13 +77,17 @@ jobs: # Start a TryCP instance nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#${{ matrix.scenario }} -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} + RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#${{ matrix.scenario }} -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} > >(tee logs/scenario-stdout.log) 2> >(tee logs/scenario-stderr.log >&2) # Stop the TryCP instance nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" # Stop local network services pkill hc-run-local + RUN_ID=$(grep -m1 "#RunId" logs/scenario-stdout.log | sed 's/#RunId: \[\(.\+\)\]/\1/') + echo "RUN_ID=$RUN_ID" >> "$GITHUB_OUTPUT" + echo "# Run ID: $RUN_ID" >> $GITHUB_STEP_SUMMARY + - name: Run Telegraf to upload influx metrics run: | if ! nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2) From 7422a4288c5a7cc0e35ef6c8e6fd53b25330b97e Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Fri, 13 Sep 2024 11:15:36 +0200 Subject: [PATCH 13/22] chore(ci): add step to upload logs as artifacts --- .github/workflows/performance.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 0c13c857..f02b5bd6 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -56,6 +56,17 @@ jobs: exit ${status-0} + - name: Upload logs as artifacts + if: success() || failure() + uses: actions/upload-artifact@v4 + with: + name: "logs_${{ matrix.scenario }}" + path: | + logs/scenario-stdout.log + logs/scenario-stderr.log + logs/telegraf-stdout.log + logs/telegraf-stderr.log + trycp-test: runs-on: [self-hosted, wind-tunnel] strategy: @@ -69,6 +80,7 @@ jobs: - uses: actions/checkout@v4 - name: Smoke test - ${{ matrix.scenario }} + id: run_test run: | set -x @@ -105,3 +117,15 @@ jobs: echo "::endgroup::" exit ${status-0} + + - name: Upload logs as artifacts + if: success() || failure() + uses: actions/upload-artifact@v4 + with: + name: "logs_${{ matrix.scenario }}" + path: | + logs/scenario-stdout.log + logs/scenario-stderr.log + logs/telegraf-stdout.log + logs/telegraf-stderr.log + logs/${{ steps.run_test.outputs.RUN_ID }}/ From d92d1e1f56c0cc2302815e4f57059406c7653414 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Fri, 13 Sep 2024 13:59:03 +0200 Subject: [PATCH 14/22] chore(ci): add link to logs in job summary --- .github/workflows/performance.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index f02b5bd6..f0c112c2 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -58,6 +58,7 @@ jobs: - name: Upload logs as artifacts if: success() || failure() + id: upload-artifact uses: actions/upload-artifact@v4 with: name: "logs_${{ matrix.scenario }}" @@ -67,6 +68,10 @@ jobs: logs/telegraf-stdout.log logs/telegraf-stderr.log + - name: Output Path to logs in summary + run: | + echo "# Logs: [${{ steps.upload-artifact.outputs.artifact-id }}](${{ steps.upload-artifact.outputs.artifact-url }})" >> $GITHUB_STEP_SUMMARY + trycp-test: runs-on: [self-hosted, wind-tunnel] strategy: @@ -120,6 +125,7 @@ jobs: - name: Upload logs as artifacts if: success() || failure() + id: upload-artifact uses: actions/upload-artifact@v4 with: name: "logs_${{ matrix.scenario }}" @@ -129,3 +135,7 @@ jobs: logs/telegraf-stdout.log logs/telegraf-stderr.log logs/${{ steps.run_test.outputs.RUN_ID }}/ + + - name: Output Path to logs in summary + run: | + echo "# Logs: [${{ steps.upload-artifact.outputs.artifact-id }}](${{ steps.upload-artifact.outputs.artifact-url }})" >> $GITHUB_STEP_SUMMARY From eb59f4042f8a732472c21cf9b743d8534976ad94 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Fri, 13 Sep 2024 15:26:12 +0200 Subject: [PATCH 15/22] docs(ci): add comments about how to add additional scenarios --- .github/workflows/performance.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index f0c112c2..057ccd8d 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -13,7 +13,9 @@ jobs: strategy: fail-fast: false matrix: + # To run a local test with default configuration, add the scenario name to this array. scenario: [ zome_call_single_value, single_write_many_read, write_read, write_query, local_signals, write_validated ] + # To run a local test with additional configuration, add the scenario name and `extra-args` as an `include` item. include: - scenario: dht_sync_lag extra-args: "--agents 2 --behaviour write:1 --behaviour record_lag:1" @@ -77,7 +79,9 @@ jobs: strategy: fail-fast: false matrix: + # To run a test with TryCP and default configuration, add the scenario name to this array. scenario: [ trycp_write_validated, remote_call_rate, validation_receipts ] + # To run a test with TryCP and additional configuration, add the scenario name and `extra-args` as an `include` item. include: - scenario: two_party_countersigning extra-args: "--behaviour initiate:1 --behaviour participate:1" From 5f376c0c3cdaaa554818b021f81fd58f9f57804b Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Fri, 13 Sep 2024 15:22:21 +0200 Subject: [PATCH 16/22] chore(ci): increase log level of TryCP tests to info --- .github/workflows/performance.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 057ccd8d..48c350a1 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -98,7 +98,7 @@ jobs: # Start a TryCP instance nix develop .#ci -c bash -c "source ./scripts/trycp.sh && start_trycp &" - RUST_LOG=warn CONDUCTOR_CONFIG="CI" MIN_PEERS=2 nix run .#${{ matrix.scenario }} -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} > >(tee logs/scenario-stdout.log) 2> >(tee logs/scenario-stderr.log >&2) + RUST_LOG=info CONDUCTOR_CONFIG="CI" TRYCP_RUST_LOG="info" MIN_PEERS=2 nix run .#${{ matrix.scenario }} -- --targets targets-ci.yaml --instances-per-target 2 --duration 120 --no-progress --reporter influx-file ${{ matrix.extra-args }} > >(tee logs/scenario-stdout.log) 2> >(tee logs/scenario-stderr.log >&2) # Stop the TryCP instance nix develop .#ci -c bash -c "source ./scripts/trycp.sh && stop_trycp" From e53d598f6e71b036a9592c41159dc0226361d04b Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 16 Sep 2024 15:31:27 +0200 Subject: [PATCH 17/22] refactor(ci): rename `test` job to `local-test` Co-authored-by: ThetaSinner --- .github/workflows/performance.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 48c350a1..0dfe1cce 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -8,7 +8,7 @@ env: WT_METRICS_DIR: "${{ github.workspace }}/telegraf/metrics" jobs: - test: + local-test: runs-on: [self-hosted, wind-tunnel] strategy: fail-fast: false From 899b26b1369897bf8dd38a54ca2b1db999474a05 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 16 Sep 2024 15:35:06 +0200 Subject: [PATCH 18/22] refactor(ci): rename steps that run the scenarios Co-authored-by: ThetaSinner --- .github/workflows/performance.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 0dfe1cce..a930e961 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -26,7 +26,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Smoke test - ${{ matrix.scenario }} + - name: Run - ${{ matrix.scenario }} id: run_test run: | # Start a sandbox conductor and run it in the background @@ -88,7 +88,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Smoke test - ${{ matrix.scenario }} + - name: Run - ${{ matrix.scenario }} id: run_test run: | set -x From acfee97e7bd4df28118062b4970cd0f4478d2ce2 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 16 Sep 2024 16:54:21 +0200 Subject: [PATCH 19/22] chore(ci): add script to flake to call telegraf as expected in the CI This is so that we use the telegraf version from the lockfile. --- .github/workflows/performance.yaml | 4 ++-- flake.nix | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index a930e961..4edb23cf 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -42,7 +42,7 @@ jobs: - name: Run Telegraf to upload influx metrics run: | - if ! nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2) + if ! nix run .#ci-telegraf then echo "::group::Telegraf errors" status=1 @@ -111,7 +111,7 @@ jobs: - name: Run Telegraf to upload influx metrics run: | - if ! nix run nixpkgs#telegraf -- --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2) + if ! nix run .#ci-telegraf then echo "::group::Telegraf errors" status=1 diff --git a/flake.nix b/flake.nix index d0d79503..ea139cff 100644 --- a/flake.nix +++ b/flake.nix @@ -111,6 +111,11 @@ packages = { default = config.workspace.workspace; inherit (config.workspace) workspace; + ci-telegraf = pkgs.writeShellApplication { + name = "ci-telegraf"; + runtimeInputs = [ pkgs.telegraf ]; + text = "telegraf --config telegraf/runner-telegraf.conf --once > >(tee logs/telegraf-stdout.log) 2> >(tee logs/telegraf-stderr.log >&2)"; + }; }; checks = { From e4d2501f9a2915f3273db5cd3a31934081003f44 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 16 Sep 2024 17:12:59 +0200 Subject: [PATCH 20/22] docs(ci): remove out-dated TODO --- .github/workflows/test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9c804247..3ee858da 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -72,7 +72,6 @@ jobs: # Start a sandbox conductor and run it in the background nix develop .#ci -c bash -c "hc s clean && echo "1234" | hc s --piped create && echo "1234" | hc s --piped -f 8888 run &" - # TODO using `localhost` is resolving to an IPv6 address, but why is that giving a connection refused? # Run the scenario for 5 seconds RUST_LOG=info nix run .#zome_call_single_value -- --connection-string ws://localhost:8888 --duration 5 --no-progress From d8ccaebe8370ac3493a50b3a819c15657b0ad012 Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 16 Sep 2024 17:14:09 +0200 Subject: [PATCH 21/22] style(ci): add whitespace between scenarios in list for readability --- .github/workflows/performance.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 4edb23cf..611b4f82 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -19,8 +19,10 @@ jobs: include: - scenario: dht_sync_lag extra-args: "--agents 2 --behaviour write:1 --behaviour record_lag:1" + - scenario: app_install extra-args: "--agents 2 --behaviour minimal:1 --behaviour large:1" + - scenario: first_call extra-args: "--agents 1 --behaviour local:1" steps: From 7411581d613a478c1e7af9072a07ff812dda481e Mon Sep 17 00:00:00 2001 From: Callum Dunster Date: Mon, 16 Sep 2024 18:13:00 +0200 Subject: [PATCH 22/22] chore(ci): split the app_install scenario in two This tests installing the minimally-small and large hApps separately. --- .github/workflows/performance.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index 611b4f82..d9197d3e 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -20,8 +20,13 @@ jobs: - scenario: dht_sync_lag extra-args: "--agents 2 --behaviour write:1 --behaviour record_lag:1" + # Test how long it takes to install a minimally-small hApp. - scenario: app_install - extra-args: "--agents 2 --behaviour minimal:1 --behaviour large:1" + extra-args: "--behaviour minimal:1" + + # Test how long it takes to install a large hApp. + - scenario: app_install + extra-args: "--behaviour large:1" - scenario: first_call extra-args: "--agents 1 --behaviour local:1" @@ -65,7 +70,7 @@ jobs: id: upload-artifact uses: actions/upload-artifact@v4 with: - name: "logs_${{ matrix.scenario }}" + name: "logs_${{ matrix.scenario }}_${{ steps.run_test.outputs.RUN_ID }}" path: | logs/scenario-stdout.log logs/scenario-stderr.log @@ -134,7 +139,7 @@ jobs: id: upload-artifact uses: actions/upload-artifact@v4 with: - name: "logs_${{ matrix.scenario }}" + name: "logs_${{ matrix.scenario }}_${{ steps.run_test.outputs.RUN_ID }}" path: | logs/scenario-stdout.log logs/scenario-stderr.log