From 9f60c83e792d33e9c281cdda761ddf61b52d83f6 Mon Sep 17 00:00:00 2001 From: vthiery Date: Thu, 9 Jan 2025 15:32:45 +0100 Subject: [PATCH] chore: fix references --- .github/workflows/buildChaosBlog.yml | 2 +- .github/workflows/go-ci.yml | 2 +- README.md | 2 +- .../blog/2020-06-04-first-chaos-day/index.md | 2 +- .../blog/2020-06-11-high-cpu-gateway/index.md | 2 +- .../index.md | 4 ++-- .../2020-07-16-big-multi-instance/index.md | 4 ++-- .../index.md | 4 ++-- .../blog/2020-10-06-toxi-proxy/index.md | 2 +- .../index.md | 6 ++--- .../2020-10-20-non-graceful-shutdown/index.md | 8 +++---- .../index.md | 4 ++-- .../index.md | 6 ++--- .../index.md | 4 ++-- .../index.md | 6 ++--- .../2021-03-30-set-file-immutable/index.md | 2 +- .../index.md | 6 ++--- .../2021-04-29-Corrupted-Snapshot/index.md | 2 +- .../blog/2021-05-25-Reset-Clock/index.md | 2 +- chaos-days/blog/2021-06-08-Full-Disk/index.md | 6 ++--- .../blog/2021-09-23-Old-Clients/index.md | 4 ++-- .../index.md | 4 ++-- .../index.md | 2 +- .../index.md | 2 +- .../index.md | 10 ++++---- .../index.md | 24 +++++++++---------- .../index.md | 6 ++--- .../2023-04-06-gateway-termination/index.md | 6 ++--- .../index.md | 4 ++-- .../index.md | 8 +++---- .../index.md | 2 +- .../2023-11-30-Job-push-overloading/index.md | 2 +- .../index.md | 6 ++--- .../2024-08-16-Operate-load-handling/index.md | 2 +- .../index.md | 2 +- .../2024-10-24-Camunda-Exporter-MVP/index.md | 6 ++--- .../index.md | 6 ++--- chaos-days/docusaurus.config.js | 10 ++++---- go-chaos/README.md | 2 +- go-chaos/backend/clients.go | 2 +- go-chaos/backend/connection.go | 2 +- go-chaos/build.sh | 2 +- go-chaos/cmd/backup.go | 2 +- go-chaos/cmd/cluster.go | 2 +- go-chaos/cmd/connect.go | 2 +- go-chaos/cmd/dataloss_sim.go | 2 +- go-chaos/cmd/deploy.go | 4 ++-- go-chaos/cmd/disconnect.go | 2 +- go-chaos/cmd/exporting.go | 2 +- go-chaos/cmd/publish.go | 2 +- go-chaos/cmd/restart.go | 2 +- go-chaos/cmd/root.go | 2 +- go-chaos/cmd/stress.go | 2 +- go-chaos/cmd/terminate.go | 2 +- go-chaos/cmd/topology.go | 2 +- go-chaos/cmd/verify.go | 2 +- go-chaos/cmd/version.go | 2 +- go-chaos/cmd/worker.go | 4 ++-- go-chaos/cmd/zeebePods.go | 2 +- go-chaos/deploy/README.md | 6 ++--- go-chaos/go.mod | 2 +- .../cluster_cmd_integration_test.go | 2 +- go-chaos/integration/integration_test.go | 4 ++-- .../chaos-experiments/chaos_experiments.go | 2 +- go-chaos/main.go | 2 +- go-chaos/main_test.go | 2 +- go-chaos/worker/chaos_worker.go | 4 ++-- go-chaos/worker/chaos_worker_test.go | 2 +- 68 files changed, 127 insertions(+), 127 deletions(-) diff --git a/.github/workflows/buildChaosBlog.yml b/.github/workflows/buildChaosBlog.yml index c8fd6b1ed..ada2beda7 100644 --- a/.github/workflows/buildChaosBlog.yml +++ b/.github/workflows/buildChaosBlog.yml @@ -31,7 +31,7 @@ jobs: name: Auto-merge dependency PRs runs-on: ubuntu-latest needs: [ build ] - if: github.repository == 'zeebe-io/zeebe-chaos' && (github.actor == 'dependabot[bot]' || github.actor == 'renovate[bot]') + if: github.repository == 'camunda/zeebe-chaos' && (github.actor == 'dependabot[bot]' || github.actor == 'renovate[bot]') permissions: checks: read pull-requests: write diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml index be476c65b..6333a20d2 100644 --- a/.github/workflows/go-ci.yml +++ b/.github/workflows/go-ci.yml @@ -44,7 +44,7 @@ jobs: name: Auto-merge dependabot PRs runs-on: ubuntu-latest needs: [ go-ci ] - if: github.repository == 'zeebe-io/zeebe-chaos' && (github.actor == 'dependabot[bot]' || github.actor == 'renovate[bot]') + if: github.repository == 'camunda/zeebe-chaos' && (github.actor == 'dependabot[bot]' || github.actor == 'renovate[bot]') permissions: checks: read pull-requests: write diff --git a/README.md b/README.md index d2e58defb..4711cc404 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,6 @@ makes it easy to create chaos experiments and also automate them later on. All our current experiments are located under `chaos-days/blog/`, for more details please have a look at the [README](chaos-days/blog/README.md). -Alternatively all our chaos-days experiments can be found [here](https://zeebe-io.github.io/zeebe-chaos/) in blog +Alternatively all our chaos-days experiments can be found [here](https://camunda.github.io/zeebe-chaos/) in blog format. diff --git a/chaos-days/blog/2020-06-04-first-chaos-day/index.md b/chaos-days/blog/2020-06-04-first-chaos-day/index.md index f135a3306..3143bccb6 100644 --- a/chaos-days/blog/2020-06-04-first-chaos-day/index.md +++ b/chaos-days/blog/2020-06-04-first-chaos-day/index.md @@ -14,7 +14,7 @@ authors: zell * Documented failure cases for exporter (already some exist, it seemed) gave me a new idea for ZEP * Introduced Peter to our Chaos Repository, discussed a bit about the hypothesis backlog, reopened the Chaos Trello board where we will organize ourselves - * Run a chaos experiment, where we put high CPU load on the Leader [#6](https://github.com/zeebe-io/zeebe-chaos/issues/6) + * Run a chaos experiment, where we put high CPU load on the Leader [#6](https://github.com/camunda/zeebe-chaos/issues/6) diff --git a/chaos-days/blog/2020-06-11-high-cpu-gateway/index.md b/chaos-days/blog/2020-06-11-high-cpu-gateway/index.md index ca3aedddc..29e48ea38 100644 --- a/chaos-days/blog/2020-06-11-high-cpu-gateway/index.md +++ b/chaos-days/blog/2020-06-11-high-cpu-gateway/index.md @@ -13,7 +13,7 @@ authors: zell * Updated failure cases documentation for exporter based on review * Documented failure cases for ZeebeDB * Wrote an chaostoolkit experiment based on the last manual Chaos experiment - * Run a chaos experiment with @Deepthi, where we put high CPU load on the standalone gateway https://github.com/zeebe-io/zeebe-chaos/issues/28 + * Run a chaos experiment with @Deepthi, where we put high CPU load on the standalone gateway https://github.com/camunda/zeebe-chaos/issues/28 diff --git a/chaos-days/blog/2020-07-09-timer-and-huge-variables/index.md b/chaos-days/blog/2020-07-09-timer-and-huge-variables/index.md index 2c3309b0b..421762691 100644 --- a/chaos-days/blog/2020-07-09-timer-and-huge-variables/index.md +++ b/chaos-days/blog/2020-07-09-timer-and-huge-variables/index.md @@ -21,12 +21,12 @@ authors: zell ### A Lot of Timers -Based on the Hypothesis written here: [#31](https://github.com/zeebe-io/zeebe-chaos/issues/31) we run an experiment with a stable load of 10 simple workflow instances per second (only start and end event) and 10 workflow instances with +Based on the Hypothesis written here: [#31](https://github.com/camunda/zeebe-chaos/issues/31) we run an experiment with a stable load of 10 simple workflow instances per second (only start and end event) and 10 workflow instances with multiple timers. We wanted to explore what happens when we have a lot of timers running and especially what happens when the are triggered at once. We created the following workflow model, where timers are exponentially created. ![timerProcess](timerProcess.png) -The experiments is based on the hypotheses we wrote here [#31](https://github.com/zeebe-io/zeebe-chaos/issues/31). +The experiments is based on the hypotheses we wrote here [#31](https://github.com/camunda/zeebe-chaos/issues/31). #### Expectations diff --git a/chaos-days/blog/2020-07-16-big-multi-instance/index.md b/chaos-days/blog/2020-07-16-big-multi-instance/index.md index 28589eda7..fbbc3247b 100644 --- a/chaos-days/blog/2020-07-16-big-multi-instance/index.md +++ b/chaos-days/blog/2020-07-16-big-multi-instance/index.md @@ -10,7 +10,7 @@ authors: zell # Chaos Day Summary - * investigate and fix automated chaos experiments - works again with [88c404f](https://github.com/zeebe-io/zeebe-chaos/commit/88c404f97514d4a7a511ce9751085acdd1720cd9) and [cd8d685](https://github.com/zeebe-io/zeebe-chaos/commit/cd8d685b83eaa1ac9050ad3d16868389e1c0c36d) + * investigate and fix automated chaos experiments - works again with [88c404f](https://github.com/camunda/zeebe-chaos/commit/88c404f97514d4a7a511ce9751085acdd1720cd9) and [cd8d685](https://github.com/camunda/zeebe-chaos/commit/cd8d685b83eaa1ac9050ad3d16868389e1c0c36d) * Closed some issues in the backlog * Run a chaos experiment with bigger multi instance to reach `maxMessageSize` @@ -18,7 +18,7 @@ authors: zell ## Chaos Experiment - We wanted to run a chaos experiment, which covers [#33](https://github.com/zeebe-io/zeebe-chaos/issues/33). + We wanted to run a chaos experiment, which covers [#33](https://github.com/camunda/zeebe-chaos/issues/33). ### Expected diff --git a/chaos-days/blog/2020-07-30-experiment-without-exporters/index.md b/chaos-days/blog/2020-07-30-experiment-without-exporters/index.md index 688b8f41e..a2973802b 100644 --- a/chaos-days/blog/2020-07-30-experiment-without-exporters/index.md +++ b/chaos-days/blog/2020-07-30-experiment-without-exporters/index.md @@ -16,7 +16,7 @@ authors: zell ## Chaos Experiment - We wanted to run a chaos experiment, which covers [#20](https://github.com/zeebe-io/zeebe-chaos/issues/20). + We wanted to run a chaos experiment, which covers [#20](https://github.com/camunda/zeebe-chaos/issues/20). Furthermore, it was recently asked in the forum whether it makes a difference performance wise to run a broker without exporters, see [here](https://forum.zeebe.io/t/zeebe-low-performance/1356/17) ### Expected @@ -32,7 +32,7 @@ authors: zell * only with metrics exporter * without any exporter - These benchmarks run overnight without bigger issues. This means all of three where able to take snapshots and compact the log. This satisfy our hypothesis of https://github.com/zeebe-io/zeebe-chaos/issues/20 . + These benchmarks run overnight without bigger issues. This means all of three where able to take snapshots and compact the log. This satisfy our hypothesis of https://github.com/camunda/zeebe-chaos/issues/20 . | Default | Without exporters | |---|---| diff --git a/chaos-days/blog/2020-10-06-toxi-proxy/index.md b/chaos-days/blog/2020-10-06-toxi-proxy/index.md index a1c711a02..50215afd0 100644 --- a/chaos-days/blog/2020-10-06-toxi-proxy/index.md +++ b/chaos-days/blog/2020-10-06-toxi-proxy/index.md @@ -174,7 +174,7 @@ Actually I would expect here an error instead of just returning null. Peter volunteered for automating a new chaos experiment, where we put high load on a broker and expect that we have no leader change. This was previous an issue, since the leaders were not able to send heartbeats in time. Related issue #7. ### Time reset -I wanted to work on the clock reset [#3](https://github.com/zeebe-io/zeebe-chaos/issues/3). +I wanted to work on the clock reset [#3](https://github.com/camunda/zeebe-chaos/issues/3). This seems to be not easily possible in kubernetes or at least with our current images, since we need for that root privilges. ```sh diff --git a/chaos-days/blog/2020-10-13-multiple-leader-changes/index.md b/chaos-days/blog/2020-10-13-multiple-leader-changes/index.md index 6c4127b68..316c1f541 100644 --- a/chaos-days/blog/2020-10-13-multiple-leader-changes/index.md +++ b/chaos-days/blog/2020-10-13-multiple-leader-changes/index.md @@ -12,7 +12,7 @@ authors: zell Today I wanted to add new chaostoolkit experiment, which we can automate. We already have experiments like restarting followers and leaders for a partition, but in the past what also caused issues was multiple restarts/leader changes -in a short period of time. This is the reason why I created [#39](https://github.com/zeebe-io/zeebe-chaos/issues/39). +in a short period of time. This is the reason why I created [#39](https://github.com/camunda/zeebe-chaos/issues/39). @@ -35,7 +35,7 @@ We requesting the Topology, determine the leader for partition one restart that ### Result -The corresponding experiment was added via this [commit](https://github.com/zeebe-io/zeebe-chaos/commit/11c3a96fc87991f649fb1559363ba335b2bf42a1). +The corresponding experiment was added via this [commit](https://github.com/camunda/zeebe-chaos/commit/11c3a96fc87991f649fb1559363ba335b2bf42a1). We were able to prove that our hypothesis is true. we are able to handle multiple leader changes even in a short period of time. #### Metrics @@ -101,7 +101,7 @@ Put high load on the cluster for several minutes, via creating workflow instance ### Result -@pihme create a new PR to add the experiment [#41](https://github.com/zeebe-io/zeebe-chaos/pull/41) +@pihme create a new PR to add the experiment [#41](https://github.com/camunda/zeebe-chaos/pull/41) #### Metrics diff --git a/chaos-days/blog/2020-10-20-non-graceful-shutdown/index.md b/chaos-days/blog/2020-10-20-non-graceful-shutdown/index.md index 933074a9a..bf2897000 100644 --- a/chaos-days/blog/2020-10-20-non-graceful-shutdown/index.md +++ b/chaos-days/blog/2020-10-20-non-graceful-shutdown/index.md @@ -20,14 +20,14 @@ I did that on Wednesday (21-10-2020). ## PR Merge I tried again the new chaos experiment with a Production M cluster, before merging. It worked quite smooth. -PR is merged [#41](https://github.com/zeebe-io/zeebe-chaos/pull/41) :tada: +PR is merged [#41](https://github.com/camunda/zeebe-chaos/pull/41) :tada: ## Non-graceful shutdown Currently in our experiments we do a normal `kubectl delete pod`, which does an graceful shutdown. The application has time to stop it's services etc. It would be interesting how Zeebe handles non-graceful shutdowns. In order to achieve that we can use the option `--grace-period=0`. For more information you can read for example [this](https://kubernetes.io/docs/tasks/run-application/force-delete-stateful-set-pod/#force-deletion) I added additional experiments to our normal follower and leader restarts experiments, such that we have both graceful and non-graceful restarts. -Both seem to work without any issues. I was also able to fix some bash script error with the help of [shellcheck](https://github.com/koalaman/shellcheck). Related issue https://github.com/zeebe-io/zeebe-chaos/issues/42. +Both seem to work without any issues. I was also able to fix some bash script error with the help of [shellcheck](https://github.com/koalaman/shellcheck). Related issue https://github.com/camunda/zeebe-chaos/issues/42. Example output: @@ -56,8 +56,8 @@ Example output: Related commits: - * [Restart leader non-gracefully](https://github.com/zeebe-io/zeebe-chaos/commit/e6260cb8612a983c8ed74fd2a37a249987ad3d3d) - * [Restart follower non-gracefully](https://github.com/zeebe-io/zeebe-chaos/commit/63c481c0c7dd7026f03be4e51d61a918613b0140) + * [Restart leader non-gracefully](https://github.com/camunda/zeebe-chaos/commit/e6260cb8612a983c8ed74fd2a37a249987ad3d3d) + * [Restart follower non-gracefully](https://github.com/camunda/zeebe-chaos/commit/63c481c0c7dd7026f03be4e51d61a918613b0140) ## Participants diff --git a/chaos-days/blog/2020-11-03-investigate-failing-tests/index.md b/chaos-days/blog/2020-11-03-investigate-failing-tests/index.md index 114939b04..27c0158fa 100644 --- a/chaos-days/blog/2020-11-03-investigate-failing-tests/index.md +++ b/chaos-days/blog/2020-11-03-investigate-failing-tests/index.md @@ -40,7 +40,7 @@ To run all experiments in a loop I used in the `chaos-experiments/kubernetes` fo while [ $? -eq 0 ]; do for ex in */experiment.json; do chaos run $ex; done; done ``` -During running the experiments I found a bug in our chaos experiments, where it seems that some experiments are not executed correctly, see [#43](https://github.com/zeebe-io/zeebe-chaos/issues/43). +During running the experiments I found a bug in our chaos experiments, where it seems that some experiments are not executed correctly, see [#43](https://github.com/camunda/zeebe-chaos/issues/43). It took a while, but at some point the experiments start to fail. Interesting is that if you look at the pods all seem to be ready, but in the metrics we can see that one partition is unhealthy (Partition one this time). @@ -88,7 +88,7 @@ tar -xvf broker-2-data.tar.gz ## New Issues - * Gateway experiments are not executed [#43](https://github.com/zeebe-io/zeebe-chaos/issues/43) + * Gateway experiments are not executed [#43](https://github.com/camunda/zeebe-chaos/issues/43) * Deployment Reprocessing inconsistencies [#5753](https://github.com/zeebe-io/zeebe/issues/5753) ## Participants diff --git a/chaos-days/blog/2020-11-24-message-correlation-after-failover/index.md b/chaos-days/blog/2020-11-24-message-correlation-after-failover/index.md index 0bc8ecad6..e71d7eb15 100644 --- a/chaos-days/blog/2020-11-24-message-correlation-after-failover/index.md +++ b/chaos-days/blog/2020-11-24-message-correlation-after-failover/index.md @@ -13,7 +13,7 @@ authors: zell # Chaos Day Summary -Today I wanted to finally implement an experiment which I postponed for long time, see [#24](https://github.com/zeebe-io/zeebe-chaos/issues/24). +Today I wanted to finally implement an experiment which I postponed for long time, see [#24](https://github.com/camunda/zeebe-chaos/issues/24). The problem was that previous we were not able to determine on which partition the message was published, so we were not able to assert that it was published on the correct partition. With this [#4794](https://github.com/zeebe-io/zeebe/issues/4794) it is now possible, which was btw an community contribution. :tada: @@ -72,8 +72,8 @@ $ chaos run production-m/msg-correlation/experiment.json ``` Experiment added to all cluster plans: - * https://github.com/zeebe-io/zeebe-chaos/commit/adeab53915e12b4a76fd4d49bb359684619b117f - * https://github.com/zeebe-io/zeebe-chaos/commit/93daf11864fdd851267dae67fdfc31e0ea78b407 + * https://github.com/camunda/zeebe-chaos/commit/adeab53915e12b4a76fd4d49bb359684619b117f + * https://github.com/camunda/zeebe-chaos/commit/93daf11864fdd851267dae67fdfc31e0ea78b407 ## New Issues diff --git a/chaos-days/blog/2021-01-07-disconnect-leader-and-follower/index.md b/chaos-days/blog/2021-01-07-disconnect-leader-and-follower/index.md index 30f5bd6a0..b92802f28 100644 --- a/chaos-days/blog/2021-01-07-disconnect-leader-and-follower/index.md +++ b/chaos-days/blog/2021-01-07-disconnect-leader-and-follower/index.md @@ -15,7 +15,7 @@ authors: zell Happy new year everyone :tada: -This time I wanted to verify the following hypothesis `Disconnecting Leader and one Follower should not make cluster disruptive` ([#45](https://github.com/zeebe-io/zeebe-chaos/issues/45)). +This time I wanted to verify the following hypothesis `Disconnecting Leader and one Follower should not make cluster disruptive` ([#45](https://github.com/camunda/zeebe-chaos/issues/45)). But in order to do that we need to extract the Leader and Follower node for a partition from the Topology. Luckily in December we got an [external contribution](https://github.com/zeebe-io/zeebe/pull/5943) which allows us to print `zbctl status` as json. This gives us now more possibilities, since we can extract values much better out of it. @@ -224,7 +224,7 @@ function getIndexOfPodForPartitionInState() The previous function worked only with homogeneous clusters, which means where the partitions are equally distributed. This caused issues on experiments on Production L clusters, where partitions are heterogeneous distributed, see related issue [zeebe-io/zeebe-cluster-testbench#154](https://github.com/zeebe-io/zeebe-cluster-testbench/issues/154). With this new utility we can create some new experiments also for Production - L clusters. -I wrote a new script based on the [older disconnect/connect gateway scripts](https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-experiments/scripts/disconnect-standalone-gateway.sh), where we disconnect the gateway with the brokers. The new one disconnects an leader for an partition with the follower and vice-versa. +I wrote a new script based on the [older disconnect/connect gateway scripts](https://github.com/camunda/zeebe-chaos/blob/master/chaos-experiments/scripts/disconnect-standalone-gateway.sh), where we disconnect the gateway with the brokers. The new one disconnects an leader for an partition with the follower and vice-versa. Disconnect Leader-Follower: diff --git a/chaos-days/blog/2021-03-23-camunda-cloud-network-partition/index.md b/chaos-days/blog/2021-03-23-camunda-cloud-network-partition/index.md index 8567977fd..3e491d9f1 100644 --- a/chaos-days/blog/2021-03-23-camunda-cloud-network-partition/index.md +++ b/chaos-days/blog/2021-03-23-camunda-cloud-network-partition/index.md @@ -24,10 +24,10 @@ We were able to enhance the deployment distribution experiment and run it in the ## Chaos Experiment -We already had a [prepared chaos experiment](https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-experiments/helm/deployment-distribution/experiment.json), but we needed to enhance that. Deepthi was so kind to create [PR](https://github.com/zeebe-io/zeebe-chaos/pull/50) for that. +We already had a [prepared chaos experiment](https://github.com/camunda/zeebe-chaos/blob/master/chaos-experiments/helm/deployment-distribution/experiment.json), but we needed to enhance that. Deepthi was so kind to create [PR](https://github.com/camunda/zeebe-chaos/pull/50) for that. ### Enhancement -The changes contain a new step before creating the network partition on the deployment distribution experiment, see [here](https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-experiments/camunda-cloud/production-l/deployment-distribution/experiment.json#L25-L35). +The changes contain a new step before creating the network partition on the deployment distribution experiment, see [here](https://github.com/camunda/zeebe-chaos/blob/master/chaos-experiments/camunda-cloud/production-l/deployment-distribution/experiment.json#L25-L35). ```json { @@ -185,7 +185,7 @@ Thanks for participating [Deepthi](https://github.com/deepthidevaki). ##### Re-connecting might fail -We realized during testing the experiment that the re-connecting might fail, because the pod can be rescheduled and then a ip route can't be delete since it no longer exist. [This is now fixed](https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-experiments/scripts/connect-leaders.sh#L45-L48). We check for existence of the command `ip`, if this doesn't exist we know the pod was restarted and we ignore it. +We realized during testing the experiment that the re-connecting might fail, because the pod can be rescheduled and then a ip route can't be delete since it no longer exist. [This is now fixed](https://github.com/camunda/zeebe-chaos/blob/master/chaos-experiments/scripts/connect-leaders.sh#L45-L48). We check for existence of the command `ip`, if this doesn't exist we know the pod was restarted and we ignore it. *Before:* diff --git a/chaos-days/blog/2021-03-30-set-file-immutable/index.md b/chaos-days/blog/2021-03-30-set-file-immutable/index.md index e57c3ba3d..7d1d7c16b 100644 --- a/chaos-days/blog/2021-03-30-set-file-immutable/index.md +++ b/chaos-days/blog/2021-03-30-set-file-immutable/index.md @@ -17,7 +17,7 @@ Unfortunately I found out that our test chaos cluster was in a way broken, that Because of these circumstances I thought about different things to experiment with, and I remembered that in the [last chaos day](/2021-03-23-camunda-cloud-network-partition/index.md) we worked with patching running deployments, in order to add more capabilities. This allowed us to create ip routes and experiment with the zeebe deployment distribution. During this I have read the [capabilities list of linux](https://man7.org/linux/man-pages/man7/capabilities.7.html), and found out that we can mark files as immutable, which might be interesting for a chaos experiment. -In this chaos day I planned to find out how marking a file immutable affects our brokers and I made the hypothesis that: *If a leader has a write error, which is not recoverable, it will step down and another leader should take over.* I put this in our hypothesis backlog ([zeebe-chaos#52](https://github.com/zeebe-io/zeebe-chaos/issues/52)). +In this chaos day I planned to find out how marking a file immutable affects our brokers and I made the hypothesis that: *If a leader has a write error, which is not recoverable, it will step down and another leader should take over.* I put this in our hypothesis backlog ([zeebe-chaos#52](https://github.com/camunda/zeebe-chaos/issues/52)). In order to really run this kind of experiment I need to find out whether marking a file immutable will cause any problems and if not how I can cause write errors such that affects the broker. Unfortunately it turned out that immutable files will not cause issues on already opened file channels, but I found some other bugs/issues, which you can read below. diff --git a/chaos-days/blog/2021-04-03-bpmn-meets-chaos-engineering/index.md b/chaos-days/blog/2021-04-03-bpmn-meets-chaos-engineering/index.md index fff56dffd..3c0515508 100644 --- a/chaos-days/blog/2021-04-03-bpmn-meets-chaos-engineering/index.md +++ b/chaos-days/blog/2021-04-03-bpmn-meets-chaos-engineering/index.md @@ -88,7 +88,7 @@ The `chaosToolkit` has more features and extensions, but these are not used by u ### List of Chaos Experiments -The experiment above is just one experiment of our continuous growing collection of chaos experiments, which we have already defined. There exist chaos experiments for the helm charts, but also for camunda cloud, for each cluster plan separately. You can find them [here](https://github.com/zeebe-io/zeebe-chaos/tree/master/chaos-experiments). +The experiment above is just one experiment of our continuous growing collection of chaos experiments, which we have already defined. There exist chaos experiments for the helm charts, but also for camunda cloud, for each cluster plan separately. You can find them [here](https://github.com/camunda/zeebe-chaos/tree/master/chaos-experiments). ### Automated Chaos Experiments @@ -98,7 +98,7 @@ Chaos experiments need to be executed continously, not only once. For that we ha It is executed via a [zbctl chaos worker](https://github.com/zeebe-io/zeebe-cluster-testbench/tree/develop/core/chaos-workers), which is part of the `testbench`. The `chaos worker` polls for new jobs at the `testbench`. On new jobs it executes, based on the cluster plan, against the given/created Zeebe cluster the chaos experiments, via the `chaostoolkit`. -In general this was a good first solution, which is quite extensible since we just needed to add new experiments in the [zeebe-chaos](https://github.com/zeebe-io/zeebe-chaos) repository and on the next run the experiments are executed, without any further adjustments. +In general this was a good first solution, which is quite extensible since we just needed to add new experiments in the [zeebe-chaos](https://github.com/camunda/zeebe-chaos) repository and on the next run the experiments are executed, without any further adjustments. ### Challenges @@ -144,7 +144,7 @@ sequential multi instances, since we can have multiple probes/actions for the st As payload of the process instances we have the defined chaos experiment in JSON, which we have seen earlier. In this JSON we have all information we need to orchestrate this experiment. -We have implemented two Kotlin workers, one to read all experiment JSON files and one to execute the bash scripts, which are referenced in the chaos experiment descriptions. You can find the code [here](https://github.com/zeebe-io/zeebe-chaos/tree/master/chaos-model/chaos-worker), it is just 100 lines long. +We have implemented two Kotlin workers, one to read all experiment JSON files and one to execute the bash scripts, which are referenced in the chaos experiment descriptions. You can find the code [here](https://github.com/camunda/zeebe-chaos/tree/master/chaos-model/chaos-worker), it is just 100 lines long. ### Results diff --git a/chaos-days/blog/2021-04-29-Corrupted-Snapshot/index.md b/chaos-days/blog/2021-04-29-Corrupted-Snapshot/index.md index d87a2d0fc..834485839 100644 --- a/chaos-days/blog/2021-04-29-Corrupted-Snapshot/index.md +++ b/chaos-days/blog/2021-04-29-Corrupted-Snapshot/index.md @@ -13,7 +13,7 @@ authors: zell # Chaos Day Summary -A while ago we have written an experiment, which should verify that followers are not able to become leader, if they have a corrupted snapshot. You can find that specific experiment [here](https://github.com/zeebe-io/zeebe-chaos/tree/master/chaos-experiments/helm/snapshot-corruption). This experiment was executed regularly against Production-M and Production-S Camunda Cloud cluster plans. With the latest changes, in the upcoming 1.0 release, we changed some behavior in regard to detect snapshot corruption on followers. +A while ago we have written an experiment, which should verify that followers are not able to become leader, if they have a corrupted snapshot. You can find that specific experiment [here](https://github.com/camunda/zeebe-chaos/tree/master/chaos-experiments/helm/snapshot-corruption). This experiment was executed regularly against Production-M and Production-S Camunda Cloud cluster plans. With the latest changes, in the upcoming 1.0 release, we changed some behavior in regard to detect snapshot corruption on followers. **NEW** If a follower is restarted and has a corrupted snapshot it will detect it on bootstrap and will refuse to start related services and crash. This means the pod will end in a crash loop, until this is manually fixed. diff --git a/chaos-days/blog/2021-05-25-Reset-Clock/index.md b/chaos-days/blog/2021-05-25-Reset-Clock/index.md index 226585810..620a331c2 100644 --- a/chaos-days/blog/2021-05-25-Reset-Clock/index.md +++ b/chaos-days/blog/2021-05-25-Reset-Clock/index.md @@ -13,7 +13,7 @@ authors: zell # Chaos Day Summary -[Recently we run a Game day](https://confluence.camunda.com/display/ZEEBE/Game+Day+18.05.2021) where a lot of messages with high TTL have been stored in the state. This was based on an earlier incident, which we had seen in production. One suggested approach to resolve that incident was to increase the time, such that all messages are removed from the state. This and the fact that summer and winter time shifts can cause in other systems evil bugs, we wanted to find out how our system can handle time shifts. [Phil](https://github.com/saig0) joined me as participant and observer. There was a related issue which covers this topic as well, [zeebe-chaos#3](https://github.com/zeebe-io/zeebe-chaos/issues/3). +[Recently we run a Game day](https://confluence.camunda.com/display/ZEEBE/Game+Day+18.05.2021) where a lot of messages with high TTL have been stored in the state. This was based on an earlier incident, which we had seen in production. One suggested approach to resolve that incident was to increase the time, such that all messages are removed from the state. This and the fact that summer and winter time shifts can cause in other systems evil bugs, we wanted to find out how our system can handle time shifts. [Phil](https://github.com/saig0) joined me as participant and observer. There was a related issue which covers this topic as well, [zeebe-chaos#3](https://github.com/camunda/zeebe-chaos/issues/3). **TL;DR;** Zeebe is able to handle time shifts back and forth, without observable issues. Operate seems to dislike it. diff --git a/chaos-days/blog/2021-06-08-Full-Disk/index.md b/chaos-days/blog/2021-06-08-Full-Disk/index.md index 16ceb21df..3a5e5059e 100644 --- a/chaos-days/blog/2021-06-08-Full-Disk/index.md +++ b/chaos-days/blog/2021-06-08-Full-Disk/index.md @@ -13,7 +13,7 @@ authors: zell # Chaos Day Summary -On this chaos day we wanted to experiment with OOD recovery and ELS connection issues. This is related to the following issues from our hypothesis backlog: [zeebe-chaos#32](https://github.com/zeebe-io/zeebe-chaos/issues/32) and [zeebe-chaos#14](https://github.com/zeebe-io/zeebe-chaos/issues/14). This time [@Nico](https://github.com/korthout) joined me. +On this chaos day we wanted to experiment with OOD recovery and ELS connection issues. This is related to the following issues from our hypothesis backlog: [zeebe-chaos#32](https://github.com/camunda/zeebe-chaos/issues/32) and [zeebe-chaos#14](https://github.com/camunda/zeebe-chaos/issues/14). This time [@Nico](https://github.com/korthout) joined me. **TL;DR** The experiment was successful :muscle: and we found several things in the dashboard which we can improve :) @@ -38,7 +38,7 @@ We expect the following properties: #### Network disconnect to ELS -In order to disconnect the Brokers with ELS, we wanted to reuse one of our network disconnect scripts, e.g. [disconnect-leaders.sh](https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-experiments/scripts/disconnect-leaders.sh). This resolves the IP's of the brokers and creates an unreachable route via the `ip` tool at the given brokers. +In order to disconnect the Brokers with ELS, we wanted to reuse one of our network disconnect scripts, e.g. [disconnect-leaders.sh](https://github.com/camunda/zeebe-chaos/blob/master/chaos-experiments/scripts/disconnect-leaders.sh). This resolves the IP's of the brokers and creates an unreachable route via the `ip` tool at the given brokers. We copied that and adjusted it to our needs: @@ -210,7 +210,7 @@ If we take a look at the processing section we can see that the exporters lag wa ##### Connecting -Luckily we were able to reuse on of our already written reconnect scripts for this experiment, see [connect-leaders.sh](https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-experiments/scripts/connect-leaders.sh). +Luckily we were able to reuse on of our already written reconnect scripts for this experiment, see [connect-leaders.sh](https://github.com/camunda/zeebe-chaos/blob/master/chaos-experiments/scripts/connect-leaders.sh). After removing the ip route (connecting the Brokers with ELS again) we can see that it immediately starts to export again. diff --git a/chaos-days/blog/2021-09-23-Old-Clients/index.md b/chaos-days/blog/2021-09-23-Old-Clients/index.md index 95a08e825..02663f3a2 100644 --- a/chaos-days/blog/2021-09-23-Old-Clients/index.md +++ b/chaos-days/blog/2021-09-23-Old-Clients/index.md @@ -14,7 +14,7 @@ authors: zell It has been awhile since the last post, I'm happy to be back. -In today's chaos day we want to verify the hypothesis from [zeebe-chaos#34](https://github.com/zeebe-io/zeebe-chaos/issues/34) that old +In today's chaos day we want to verify the hypothesis from [zeebe-chaos#34](https://github.com/camunda/zeebe-chaos/issues/34) that old clients can't disrupt a running cluster. It might happen that after upgrading your Zeebe to the newest shiny version, you might forget to @@ -86,7 +86,7 @@ Furthermore, taking a look at the resource consumption, especially at the gatewa ### Result -We were able to confirm the hypothesis written in [zeebe-chaos#34](https://github.com/zeebe-io/zeebe-chaos/issues/34), that an old client can't disrupt a running cluster. +We were able to confirm the hypothesis written in [zeebe-chaos#34](https://github.com/camunda/zeebe-chaos/issues/34), that an old client can't disrupt a running cluster. ## Found Bugs diff --git a/chaos-days/blog/2021-10-29-Throughput-on-big-state/index.md b/chaos-days/blog/2021-10-29-Throughput-on-big-state/index.md index 219a0db3f..1a3c41cd6 100644 --- a/chaos-days/blog/2021-10-29-Throughput-on-big-state/index.md +++ b/chaos-days/blog/2021-10-29-Throughput-on-big-state/index.md @@ -13,7 +13,7 @@ authors: zell # Chaos Day Summary -In this chaos day we wanted to prove the hypothesis that the throughput should not significantly change even if we have bigger state, see [zeebe-chaos#64](https://github.com/zeebe-io/zeebe-chaos/issues/64) +In this chaos day we wanted to prove the hypothesis that the throughput should not significantly change even if we have bigger state, see [zeebe-chaos#64](https://github.com/camunda/zeebe-chaos/issues/64) This came up due observations from the [last chaos day](/2021-10-05-recovery-time/index.md). We already had a bigger investigation here [zeebe#7955](https://github.com/camunda-cloud/zeebe/issues/7955). @@ -135,4 +135,4 @@ As written above the throughput seem to break, after we reach a certain state si It might be just a trigger to get the system into stumblling, which means: after one thing takes a bit longer the processing queue gets longer and the processor is not able to catch up any more. This causes then backpressure to kick in etc. -I think we need to further investigate this. \ No newline at end of file +I think we need to further investigate this. diff --git a/chaos-days/blog/2022-02-01-High-Snapshot-Frequency/index.md b/chaos-days/blog/2022-02-01-High-Snapshot-Frequency/index.md index 42d89b976..11cf4d112 100644 --- a/chaos-days/blog/2022-02-01-High-Snapshot-Frequency/index.md +++ b/chaos-days/blog/2022-02-01-High-Snapshot-Frequency/index.md @@ -12,7 +12,7 @@ authors: zell # Chaos Day Summary -Today we wanted to experiment with the snapshot interval and verify that a high snapshot frequency will not impact our availability ([#21](https://github.com/zeebe-io/zeebe-chaos/issues/21)). +Today we wanted to experiment with the snapshot interval and verify that a high snapshot frequency will not impact our availability ([#21](https://github.com/camunda/zeebe-chaos/issues/21)). **TL;DR;** The chaos experiment succeeded :muscle: We were able to prove our hypothesis. diff --git a/chaos-days/blog/2022-02-15-Standalone-Gateway-in-CCSaaS/index.md b/chaos-days/blog/2022-02-15-Standalone-Gateway-in-CCSaaS/index.md index 37c314830..8d6da140a 100644 --- a/chaos-days/blog/2022-02-15-Standalone-Gateway-in-CCSaaS/index.md +++ b/chaos-days/blog/2022-02-15-Standalone-Gateway-in-CCSaaS/index.md @@ -15,7 +15,7 @@ authors: zell We recently introduced the Zeebe Standalone Gateway in CCSaaS. Today I wanted to do a first simple chaos experiment with the gateway, where we just restart one gateway. -Ideally in the future we could enable some gateway chaos experiments again, which we currently only support for [helm](https://github.com/zeebe-io/zeebe-chaos/tree/master/chaos-workers/chaos-experiments/helm). +Ideally in the future we could enable some gateway chaos experiments again, which we currently only support for [helm](https://github.com/camunda/zeebe-chaos/tree/master/chaos-workers/chaos-experiments/helm). **TL;DR;** Our Camunda Cloud clusters can handle gateway restarts without issues. diff --git a/chaos-days/blog/2022-08-02-deployment-distribution/index.md b/chaos-days/blog/2022-08-02-deployment-distribution/index.md index b4bff58be..a70a9ca30 100644 --- a/chaos-days/blog/2022-08-02-deployment-distribution/index.md +++ b/chaos-days/blog/2022-08-02-deployment-distribution/index.md @@ -17,11 +17,11 @@ authors: zell We encountered recently a severe bug [zeebe#9877](https://github.com/camunda/camunda/issues/9877) and I was wondering why we haven't spotted it earlier, since we have chaos experiments for it. I realized two things: 1. The experiments only check for parts of it (BPMN resource only). The production code has changed, and a new feature has been added (DMN) but the experiments/tests haven't been adjusted. - 2. More importantly we disabled the automated execution of the deployment distribution experiment because it was flaky due to a missing standalone gateway in Camunda Cloud SaaS [zeebe-io/zeebe-chaos#61](https://github.com/zeebe-io/zeebe-chaos/issues/61). This is no longer the case, see [Standalone Gateway in CCSaaS](../2022-02-15-Standalone-Gateway-in-CCSaaS/index.md) + 2. More importantly we disabled the automated execution of the deployment distribution experiment because it was flaky due to a missing standalone gateway in Camunda Cloud SaaS [camunda/zeebe-chaos#61](https://github.com/camunda/zeebe-chaos/issues/61). This is no longer the case, see [Standalone Gateway in CCSaaS](../2022-02-15-Standalone-Gateway-in-CCSaaS/index.md) On this chaos day I want to bring the automation of this chaos experiment back to life. If I have still time I want to enhance the experiment. -**TL;DR;** The experiment still worked, and our deployment distribution is still resilient against network partitions. It also works with DMN resources. I can enable the experiment again, and we can close [zeebe-io/zeebe-chaos#61](https://github.com/zeebe-io/zeebe-chaos/issues/61). Unfortunately, we were not able to reproduce [zeebe#9877](https://github.com/camunda/camunda/issues/9877) but we did some good preparation work for it. +**TL;DR;** The experiment still worked, and our deployment distribution is still resilient against network partitions. It also works with DMN resources. I can enable the experiment again, and we can close [camunda/zeebe-chaos#61](https://github.com/camunda/zeebe-chaos/issues/61). Unfortunately, we were not able to reproduce [zeebe#9877](https://github.com/camunda/camunda/issues/9877) but we did some good preparation work for it. @@ -61,9 +61,9 @@ chaos --version #### Executing chaos toolkit -As mentioned, the deployment distribution was not enabled for Production-S clusters, which is currently the only configuration we test via [Zeebe Testbench](https://github.com/zeebe-io/zeebe-cluster-testbench). We have to use the experiment that is defined under [production-l/deployment-distribution](https://github.com/zeebe-io/zeebe-chaos/tree/master/chaos-workers/chaos-experiments/camunda-cloud/production-l/deployment-distribution), which is the same*. +As mentioned, the deployment distribution was not enabled for Production-S clusters, which is currently the only configuration we test via [Zeebe Testbench](https://github.com/zeebe-io/zeebe-cluster-testbench). We have to use the experiment that is defined under [production-l/deployment-distribution](https://github.com/camunda/zeebe-chaos/tree/master/chaos-workers/chaos-experiments/camunda-cloud/production-l/deployment-distribution), which is the same*. -* That is not 100% true. During running the Production-l experiment I realized that it made some assumptions regarding the partition count which needs to be adjusted for the Production-S setup. +* That is not 100% true. During running the Production-l experiment I realized that it made some assumptions regarding the partition count which needs to be adjusted for the Production-S setup. ```sh chaos run production-l/deployment-distribution/experiment.json @@ -208,7 +208,7 @@ Should do the trick, but I was not yet able to reproduce the issue with 8.0.4. I ## Further Work -Based on today's outcome we can enable again the Deployment Distribution experiment for Production-S, such that is executed by Zeebe Testbench (our automation tooling). We can close [zeebe-io/zeebe-chaos#61](https://github.com/zeebe-io/zeebe-chaos/issues/61) +Based on today's outcome we can enable again the Deployment Distribution experiment for Production-S, such that is executed by Zeebe Testbench (our automation tooling). We can close [camunda/zeebe-chaos#61](https://github.com/camunda/zeebe-chaos/issues/61) We should adjust our Chaos Worker implementation such that we also deploy DMN resources as we did in today's Chaos Day, since the scripts we changed aren't used in the automation. diff --git a/chaos-days/blog/2022-08-31-Message-Correlation-after-Network-Partition/index.md b/chaos-days/blog/2022-08-31-Message-Correlation-after-Network-Partition/index.md index 41eccb4f0..64ee2f101 100644 --- a/chaos-days/blog/2022-08-31-Message-Correlation-after-Network-Partition/index.md +++ b/chaos-days/blog/2022-08-31-Message-Correlation-after-Network-Partition/index.md @@ -76,7 +76,7 @@ To make the experiment easier to reproduce and allow us to experiment in differe ##### Message Publish -I added a new feature ([PR #166](https://github.com/zeebe-io/zeebe-chaos/pull/166)) that allows us to publish a message to a specific partition: +I added a new feature ([PR #166](https://github.com/camunda/zeebe-chaos/pull/166)) that allows us to publish a message to a specific partition: ```sh $ ./zbchaos publish message -v --partitionId 3 @@ -90,7 +90,7 @@ Message was sent and returned key 6755399441055796, which corresponds to partiti For the steady-state verification, multiple enhancements have been added. -1. Previously the `zbchaos` didn't allow us to create instances of specific models, which is now added as new feature ([PR #167](https://github.com/zeebe-io/zeebe-chaos/pull/167)). +1. Previously the `zbchaos` didn't allow us to create instances of specific models, which is now added as new feature ([PR #167](https://github.com/camunda/zeebe-chaos/pull/167)). 2. In order to await the process instance completion a new flag was added `--awaitResult`, which allows us to await the PI completeness. 3. To make sure that our message can be correlated we have to set the right correlationKey/value. This means we need to create instances with certain variables, which is now possible as well (`--variables`). @@ -182,18 +182,18 @@ Encountered an error during process instance creation. Error: rpc error: code = panic: Expected to create process instance on partition 1, but timed out after 30s. goroutine 1 [running]: -github.com/zeebe-io/zeebe-chaos/go-chaos/cmd.glob..func10(0x247f740?, {0x1758c60?, 0x7?, 0x7?}) - /home/zell/goPath/src/github.com/zeebe-io/zeebe-chaos/go-chaos/cmd/verify.go:97 +0x1c5 +github.com/camunda/zeebe-chaos/go-chaos/cmd.glob..func10(0x247f740?, {0x1758c60?, 0x7?, 0x7?}) + /home/zell/goPath/src/github.com/camunda/zeebe-chaos/go-chaos/cmd/verify.go:97 +0x1c5 github.com/spf13/cobra.(*Command).execute(0x247f740, {0xc000426540, 0x7, 0x7}) /home/zell/goPath/pkg/mod/github.com/spf13/cobra@v1.5.0/command.go:876 +0x67b github.com/spf13/cobra.(*Command).ExecuteC(0x24808c0) /home/zell/goPath/pkg/mod/github.com/spf13/cobra@v1.5.0/command.go:990 +0x3bd github.com/spf13/cobra.(*Command).Execute(...) /home/zell/goPath/pkg/mod/github.com/spf13/cobra@v1.5.0/command.go:918 -github.com/zeebe-io/zeebe-chaos/go-chaos/cmd.Execute() - /home/zell/goPath/src/github.com/zeebe-io/zeebe-chaos/go-chaos/cmd/root.go:61 +0x25 +github.com/camunda/zeebe-chaos/go-chaos/cmd.Execute() + /home/zell/goPath/src/github.com/camunda/zeebe-chaos/go-chaos/cmd/root.go:61 +0x25 main.main() - /home/zell/goPath/src/github.com/zeebe-io/zeebe-chaos/go-chaos/main.go:8 +0x17 + /home/zell/goPath/src/github.com/camunda/zeebe-chaos/go-chaos/main.go:8 +0x17 ``` I retried it: ```shell @@ -211,18 +211,18 @@ Encountered an error during process instance creation. Error: rpc error: code = panic: Expected to create process instance on partition 1, but timed out after 30s. goroutine 1 [running]: -github.com/zeebe-io/zeebe-chaos/go-chaos/cmd.glob..func10(0x247f740?, {0x1758c60?, 0x8?, 0x8?}) - /home/zell/goPath/src/github.com/zeebe-io/zeebe-chaos/go-chaos/cmd/verify.go:97 +0x1c5 +github.com/camunda/zeebe-chaos/go-chaos/cmd.glob..func10(0x247f740?, {0x1758c60?, 0x8?, 0x8?}) + /home/zell/goPath/src/github.com/camunda/zeebe-chaos/go-chaos/cmd/verify.go:97 +0x1c5 github.com/spf13/cobra.(*Command).execute(0x247f740, {0xc00007e500, 0x8, 0x8}) /home/zell/goPath/pkg/mod/github.com/spf13/cobra@v1.5.0/command.go:876 +0x67b github.com/spf13/cobra.(*Command).ExecuteC(0x24808c0) /home/zell/goPath/pkg/mod/github.com/spf13/cobra@v1.5.0/command.go:990 +0x3bd github.com/spf13/cobra.(*Command).Execute(...) /home/zell/goPath/pkg/mod/github.com/spf13/cobra@v1.5.0/command.go:918 -github.com/zeebe-io/zeebe-chaos/go-chaos/cmd.Execute() - /home/zell/goPath/src/github.com/zeebe-io/zeebe-chaos/go-chaos/cmd/root.go:61 +0x25 +github.com/camunda/zeebe-chaos/go-chaos/cmd.Execute() + /home/zell/goPath/src/github.com/camunda/zeebe-chaos/go-chaos/cmd/root.go:61 +0x25 main.main() - /home/zell/goPath/src/github.com/zeebe-io/zeebe-chaos/go-chaos/main.go:8 +0x17 + /home/zell/goPath/src/github.com/camunda/zeebe-chaos/go-chaos/main.go:8 +0x17 ``` And got a similar exception. Taking a look at Operate we can see that process instances are created. It is likely that the await timed out since the message hasn't been correlated but the returned error is a bit unclear. Interesting is that on partition two the message is also not correlated. diff --git a/chaos-days/blog/2023-02-23-Recursive-call-activity/index.md b/chaos-days/blog/2023-02-23-Recursive-call-activity/index.md index 5d34cde33..b81b89086 100644 --- a/chaos-days/blog/2023-02-23-Recursive-call-activity/index.md +++ b/chaos-days/blog/2023-02-23-Recursive-call-activity/index.md @@ -35,7 +35,7 @@ We expect that our limit handling steps in during the execution and we can execu Before we can start with our experiment we need to start our benchmark Zeebe cluster. This has become easier now since I have written the last post. Previously we had to use the scripts and Makefile in the [zeebe/benchmark sub-directory](https://github.com/camunda/camunda/tree/main/benchmarks/setup). -We have now provided new [Benchmark Helm charts](https://github.com/zeebe-io/benchmark-helm), based on our Camunda Platform Helm charts. They allow us to deploy a new zeebe benchmark setup via: +We have now provided new [Benchmark Helm charts](https://github.com/camunda/zeebe-benchmark-helm), based on our Camunda Platform Helm charts. They allow us to deploy a new zeebe benchmark setup via: ```shell kubectl create namespace zell-chaos # create a new namespace @@ -48,7 +48,7 @@ helm install zell-chaos \ ``` -To deploy the model we can use [zbchaos v1.0.0](https://github.com/zeebe-io/zeebe-chaos/releases/tag/zbchaos-v1.0.0). +To deploy the model we can use [zbchaos v1.0.0](https://github.com/camunda/zeebe-chaos/releases/tag/zbchaos-v1.0.0). ```shell @@ -102,6 +102,6 @@ With this, I mark this chaos experiment as failed. We need to investigate this f ## Found Bugs -* [zbchaos logs debug message on normal usage](https://github.com/zeebe-io/zeebe-chaos/issues/323) +* [zbchaos logs debug message on normal usage](https://github.com/camunda/zeebe-chaos/issues/323) * [Every 2.5 seconds we send a topology request, which is shown in the metrics](https://github.com/camunda/camunda/issues/11799) * [Batch processing doesn't respect the limit](https://github.com/camunda/camunda/issues/11798) diff --git a/chaos-days/blog/2023-04-06-gateway-termination/index.md b/chaos-days/blog/2023-04-06-gateway-termination/index.md index 4a01d5e3b..7af8336bc 100644 --- a/chaos-days/blog/2023-04-06-gateway-termination/index.md +++ b/chaos-days/blog/2023-04-06-gateway-termination/index.md @@ -31,7 +31,7 @@ Furthermore, we have discovered a potential performance issue on lower load, whi ## Chaos Experiment -We will use our [Zeebe benchmark helm charts](https://github.com/zeebe-io/benchmark-helm) to set up the test cluster, and +We will use our [Zeebe benchmark helm charts](https://github.com/camunda/zeebe-benchmark-helm) to set up the test cluster, and our helper scripts [here](https://github.com/camunda/camunda/tree/main/benchmarks/setup). ### Setup: @@ -82,7 +82,7 @@ The performance drop is expected to be not significant, or at least should recov ### Actual -We will run the experiment in two ways, first via terminating the gateway (using [zbchaos](https://github.com/zeebe-io/zeebe-chaos/releases/tag/zbchaos-v1.0.0)) +We will run the experiment in two ways, first via terminating the gateway (using [zbchaos](https://github.com/camunda/zeebe-chaos/releases/tag/zbchaos-v1.0.0)) and later via scaling down the gateway deployment to one replica. We want to verify whether this makes any difference, since terminating will cause Kubernetes to recreate immediately the pod. @@ -273,7 +273,7 @@ The experiment itself succeeded :muscle: :white_check_marks: ### Zbchaos print verbose logs -I realized that we still have [the issue with zbchaos](https://github.com/zeebe-io/zeebe-chaos/issues/323) which is printing verbose logs: +I realized that we still have [the issue with zbchaos](https://github.com/camunda/zeebe-chaos/issues/323) which is printing verbose logs: ```shell $ zbchaos terminate gateway diff --git a/chaos-days/blog/2023-05-15-SST-Partitioning-toggle/index.md b/chaos-days/blog/2023-05-15-SST-Partitioning-toggle/index.md index 031e673ba..f26668eee 100644 --- a/chaos-days/blog/2023-05-15-SST-Partitioning-toggle/index.md +++ b/chaos-days/blog/2023-05-15-SST-Partitioning-toggle/index.md @@ -44,7 +44,7 @@ The experiment we want to do on this chaos day will look like the following: * Verify steady state: * verify the readiness of the cluster - * deploy a process model (which contains a [simple model](https://github.com/zeebe-io/zeebe-chaos/blob/main/go-chaos/internal/bpmn/one_task.bpmn)) + * deploy a process model (which contains a [simple model](https://github.com/camunda/zeebe-chaos/blob/main/go-chaos/internal/bpmn/one_task.bpmn)) * Chaos Action: * start a process instance (PI), with a service task * enable the SST partitioning @@ -73,7 +73,7 @@ When operating a cluster, I can enable the SST partitioning without an impact on As linked above I used again our [benchmark/setup](https://github.com/camunda/camunda/tree/main/benchmarks/setup) scripts to set up a cluster. #### First Part: Verify Steady state -To verify the readiness and run all actions I used the [zbchaos](https://github.com/zeebe-io/zeebe-chaos/tree/zbchaos-v1.0.0) tool. +To verify the readiness and run all actions I used the [zbchaos](https://github.com/camunda/zeebe-chaos/tree/zbchaos-v1.0.0) tool. Verifying readiness is fairly easy with zbchaos. diff --git a/chaos-days/blog/2023-05-19-Continuing-SST-Partitioning-toggle/index.md b/chaos-days/blog/2023-05-19-Continuing-SST-Partitioning-toggle/index.md index 9c83f52ab..3806b9ea4 100644 --- a/chaos-days/blog/2023-05-19-Continuing-SST-Partitioning-toggle/index.md +++ b/chaos-days/blog/2023-05-19-Continuing-SST-Partitioning-toggle/index.md @@ -33,7 +33,7 @@ The verification of the steady state will consist, of checking the readiness and In our first experiment, we will enable the SST partitioning. **First chaos action** - * Deploy a process model (which contains a [simple model](https://github.com/zeebe-io/zeebe-chaos/blob/main/go-chaos/internal/bpmn/one_task.bpmn)) + * Deploy a process model (which contains a [simple model](https://github.com/camunda/zeebe-chaos/blob/main/go-chaos/internal/bpmn/one_task.bpmn)) * Start 1000 process instances (PIs), with a service task * Enable the SST partitioning * Restart the cluster, and await readiness @@ -80,7 +80,7 @@ $ diff ../default/values.yaml values.yaml #### First Experiment: Verify Steady state -To verify the readiness and run all actions I used the [zbchaos](https://github.com/zeebe-io/zeebe-chaos/tree/zbchaos-v1.0.0) tool. +To verify the readiness and run all actions I used the [zbchaos](https://github.com/camunda/zeebe-chaos/tree/zbchaos-v1.0.0) tool. ```shell $ zbchaos verify readiness @@ -128,7 +128,7 @@ This is then as well visible in operate. ![operate-process](operate-process.png) As the next step, we will create 1000 process instances of our simple process model, with one service task. -For that, we can [use a new functionality](https://github.com/zeebe-io/zeebe-chaos/tree/zell-chaos-create-count-of-instances) of `zbchaos` I built for this chaos day. +For that, we can [use a new functionality](https://github.com/camunda/zeebe-chaos/tree/zell-chaos-create-count-of-instances) of `zbchaos` I built for this chaos day. On the first try, I had smaller issues, with timeouts etc. ```shell @@ -178,7 +178,7 @@ $ zbchaos verify readiness All Zeebe nodes are running. ``` -Now starting to complete the previously created jobs, we can use again a new feature in `zbchaos` ([which has been added during the chaos day](https://github.com/zeebe-io/zeebe-chaos/tree/zell-chaos-create-count-of-instances)) +Now starting to complete the previously created jobs, we can use again a new feature in `zbchaos` ([which has been added during the chaos day](https://github.com/camunda/zeebe-chaos/tree/zell-chaos-create-count-of-instances)) Unfortunately, I missed using the verbose flag. ```shell $ ./dist/zbchaos verify job-completion --jobCount 1001 --timeoutInSec 1200 diff --git a/chaos-days/blog/2023-06-02-Using-Large-Multi-Instance/index.md b/chaos-days/blog/2023-06-02-Using-Large-Multi-Instance/index.md index ce2fe1d1c..686aaf756 100644 --- a/chaos-days/blog/2023-06-02-Using-Large-Multi-Instance/index.md +++ b/chaos-days/blog/2023-06-02-Using-Large-Multi-Instance/index.md @@ -12,7 +12,7 @@ authors: zell # Chaos Day Summary -New day new chaos. :skull: In today's chaos day I want to pick up a topic, which had bothered people for long time. I created a [chaos day three years ago](https://zeebe-io.github.io/zeebe-chaos/2020/07/16/big-multi-instance/) around this topic as well. +New day new chaos. :skull: In today's chaos day I want to pick up a topic, which had bothered people for long time. I created a [chaos day three years ago](https://camunda.github.io/zeebe-chaos/2020/07/16/big-multi-instance/) around this topic as well. Today, we experiment with large multi-instances again. In the recent patch release [8.2.5](https://github.com/camunda/camunda/releases/tag/8.2.5) we fixed an issue with spawning larger multi instances. Previously if you have created a process instance with a large multi-instance it was likely that this caused to blacklist the process instance, since the multi-instance spawning ran into `maxMessageSize` limitations. diff --git a/chaos-days/blog/2023-11-30-Job-push-overloading/index.md b/chaos-days/blog/2023-11-30-Job-push-overloading/index.md index 874769123..8ed1e4097 100644 --- a/chaos-days/blog/2023-11-30-Job-push-overloading/index.md +++ b/chaos-days/blog/2023-11-30-Job-push-overloading/index.md @@ -34,7 +34,7 @@ We expect that if the workers are slowing down, the load is distributed to other We deployed a normal benchmark, with [default configurations](https://github.com/camunda/camunda/blob/main/benchmarks/setup/default/values.yaml). -We slowed the workers down, in the sense that we changed [the completionDelay to 1250 ms](https://github.com/zeebe-io/benchmark-helm/blob/main/charts/zeebe-benchmark/templates/worker.yaml#L30) +We slowed the workers down, in the sense that we changed [the completionDelay to 1250 ms](https://github.com/camunda/zeebe-benchmark-helm/blob/main/charts/zeebe-benchmark/templates/worker.yaml#L30) ![](exp1-general.png) diff --git a/chaos-days/blog/2024-01-19-Job-Activation-Latency/index.md b/chaos-days/blog/2024-01-19-Job-Activation-Latency/index.md index ade6a6ce0..ddef300b6 100644 --- a/chaos-days/blog/2024-01-19-Job-Activation-Latency/index.md +++ b/chaos-days/blog/2024-01-19-Job-Activation-Latency/index.md @@ -204,7 +204,7 @@ One of the downsides of switching to a push approach, unfortunately, is that the Thankfully, HTTP/2 and gRPC both have mechanisms to ensure flow control for server streaming RPCs. -[You can find our tests results in a separate blog post](https://zeebe-io.github.io/zeebe-chaos/2023/11/30/Job-push-overloading). +[You can find our tests results in a separate blog post](https://camunda.github.io/zeebe-chaos/2023/11/30/Job-push-overloading). ## Further reading @@ -217,6 +217,6 @@ You can read more about job push here: Additionally, we've already written two other blog posts: -- [Client backpressure resilience](https://zeebe-io.github.io/zeebe-chaos/2023/11/30/Job-push-overloading) -- [Job stream fault tolerance](https://zeebe-io.github.io/zeebe-chaos/2023/12/06/Job-Push-resiliency) +- [Client backpressure resilience](https://camunda.github.io/zeebe-chaos/2023/11/30/Job-push-overloading) +- [Job stream fault tolerance](https://camunda.github.io/zeebe-chaos/2023/12/06/Job-Push-resiliency) diff --git a/chaos-days/blog/2024-08-16-Operate-load-handling/index.md b/chaos-days/blog/2024-08-16-Operate-load-handling/index.md index 47841d206..b99e5d837 100644 --- a/chaos-days/blog/2024-08-16-Operate-load-handling/index.md +++ b/chaos-days/blog/2024-08-16-Operate-load-handling/index.md @@ -42,7 +42,7 @@ During building that dashboard I realized that we missed some detail metrics. Fo is currently not measured. Furthermore, we have operating limited metrics, thus allowing us only to see the average latency, not p99 nor p90. This needs to be enhanced in the future. -We will run three benchmarks (base, high load, and low load), and use again our [benchmark helm chart](https://github.com/zeebe-io/benchmark-helm) for such. +We will run three benchmarks (base, high load, and low load), and use again our [benchmark helm chart](https://github.com/camunda/zeebe-benchmark-helm) for such. All defaults from the helm charts are used, if not other specified. The most important ones, which are static over all benchmarks are listed below. | Config | Value | diff --git a/chaos-days/blog/2024-08-19-Operate-improve-import-latency/index.md b/chaos-days/blog/2024-08-19-Operate-improve-import-latency/index.md index a0ebeb937..3ff28da1d 100644 --- a/chaos-days/blog/2024-08-19-Operate-improve-import-latency/index.md +++ b/chaos-days/blog/2024-08-19-Operate-improve-import-latency/index.md @@ -129,7 +129,7 @@ exporters: delay: 1 ``` -This can be set in our [benchmark-helm](https://github.com/zeebe-io/benchmark-helm) directly via: `--set zeebe.config.zeebe.broker.exporters.elasticsearch.args.bulk.delay=1` +This can be set in our [benchmark-helm](https://github.com/camunda/zeebe-benchmark-helm) directly via: `--set zeebe.config.zeebe.broker.exporters.elasticsearch.args.bulk.delay=1`
Lower flush delay: Helm install command diff --git a/chaos-days/blog/2024-10-24-Camunda-Exporter-MVP/index.md b/chaos-days/blog/2024-10-24-Camunda-Exporter-MVP/index.md index ec7643b16..51b9affee 100644 --- a/chaos-days/blog/2024-10-24-Camunda-Exporter-MVP/index.md +++ b/chaos-days/blog/2024-10-24-Camunda-Exporter-MVP/index.md @@ -87,12 +87,12 @@ I can deploy the newest helm charts (alpha stage), by disabling Importer manuall ### Actual -As always we use our [benchmark-helm charts](https://github.com/zeebe-io/benchmark-helm) (that building on top of our [Camunda Platform Helm](https://github.com/camunda/camunda-platform-helm) charts). +As always we use our [benchmark-helm charts](https://github.com/camunda/zeebe-benchmark-helm) (that building on top of our [Camunda Platform Helm](https://github.com/camunda/camunda-platform-helm) charts). ### Installation -I had to adjust our benchmarks to [use the alpha snapshots ](https://github.com/zeebe-io/benchmark-helm/commit/db682a89788d6c511083ec743c6cf7d358155e3c) +I had to adjust our benchmarks to [use the alpha snapshots ](https://github.com/camunda/zeebe-benchmark-helm/commit/db682a89788d6c511083ec743c6cf7d358155e3c) ```yaml dependencies: @@ -103,7 +103,7 @@ dependencies: ``` -and [disable the Importer via ENV](https://github.com/zeebe-io/benchmark-helm/commit/aafac6e9ec78e9cfd2e59a5b6f30bf887a4fcbd0) +and [disable the Importer via ENV](https://github.com/camunda/zeebe-benchmark-helm/commit/aafac6e9ec78e9cfd2e59a5b6f30bf887a4fcbd0) ```yaml env: diff --git a/chaos-days/blog/2024-11-14-Impact-of-Camunda-Exporter-on-processing-performance/index.md b/chaos-days/blog/2024-11-14-Impact-of-Camunda-Exporter-on-processing-performance/index.md index 1baa279c2..3f206e35c 100644 --- a/chaos-days/blog/2024-11-14-Impact-of-Camunda-Exporter-on-processing-performance/index.md +++ b/chaos-days/blog/2024-11-14-Impact-of-Camunda-Exporter-on-processing-performance/index.md @@ -16,7 +16,7 @@ In our [last Chaos day](../2024-10-24-Camunda-Exporter-MVP/index.md) we experime ![](it2-migration.png) -Additionally, [some fixes and improvements](https://github.com/zeebe-io/benchmark-helm/pull/202) have been done to the realistic benchmarks that should allow us to better compare the general performance with a realistic good performing benchmark. +Additionally, [some fixes and improvements](https://github.com/camunda/zeebe-benchmark-helm/pull/202) have been done to the realistic benchmarks that should allow us to better compare the general performance with a realistic good performing benchmark. Actually, this is what we want to explore and experiment with today. @@ -31,9 +31,9 @@ Actually, this is what we want to explore and experiment with today. ## Benchmarks As in the [last Chaos day](../2024-10-24-Camunda-Exporter-MVP/index.md) we use the new realistic benchmarks, that contain a much more complex process model and workload. -We recently found some smaller issues in our benchmarks, related to [CPU throttling](https://github.com/zeebe-io/benchmark-helm/pull/204) and [undersized workers](https://github.com/zeebe-io/benchmark-helm/pull/202), these issues have been fixed. This allowed us to reach a much better workload/throughput on our weekly benchmarks, which we take here as a base for our comparison. +We recently found some smaller issues in our benchmarks, related to [CPU throttling](https://github.com/camunda/zeebe-benchmark-helm/pull/204) and [undersized workers](https://github.com/camunda/zeebe-benchmark-helm/pull/202), these issues have been fixed. This allowed us to reach a much better workload/throughput on our weekly benchmarks, which we take here as a base for our comparison. -The newest benchmark helm charts have been updated to the first [Camunda Platform alpha1](https://github.com/zeebe-io/benchmark-helm/releases/tag/zeebe-benchmark-0.3.8), which includes the Camunda Exporter. +The newest benchmark helm charts have been updated to the first [Camunda Platform alpha1](https://github.com/camunda/zeebe-benchmark-helm/releases/tag/zeebe-benchmark-0.3.8), which includes the Camunda Exporter. Today we run the following benchmarks diff --git a/chaos-days/docusaurus.config.js b/chaos-days/docusaurus.config.js index 98884c5a2..537025582 100644 --- a/chaos-days/docusaurus.config.js +++ b/chaos-days/docusaurus.config.js @@ -6,12 +6,12 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula'); (module.exports = { title: 'Zeebe Chaos', tagline: 'Chaos Day Summaries', - url: 'https://zeebe-io.github.io', + url: 'https://camunda.github.io', baseUrl: '/zeebe-chaos/', onBrokenLinks: 'throw', onBrokenMarkdownLinks: 'warn', favicon: 'img/zeebe-logo.png', - organizationName: 'zeebe-io', // Usually your GitHub org/user name. + organizationName: 'camunda', // Usually your GitHub org/user name. projectName: 'zeebe-chaos', // Usually your repo name. plugins: [ @@ -28,7 +28,7 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula'); showReadingTime: true, // Please change this to your repo. editUrl: - 'https://github.com/zeebe-io/zeebe-chaos/blob/master/chaos-days/', + 'https://github.com/camunda/zeebe-chaos/blob/master/chaos-days/', routeBasePath: '/', blogSidebarTitle: 'All posts', blogSidebarCount: 'ALL', @@ -53,7 +53,7 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula'); {to: '/', label: 'Chaos Summaries', position: 'left'}, { - href: 'https://github.com/zeebe-io/zeebe-chaos', + href: 'https://github.com/camunda/zeebe-chaos', label: 'GitHub', position: 'right', }, @@ -84,7 +84,7 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula'); items: [ { label: 'GitHub', - href: 'https://github.com/zeebe-io/zeebe-chaos/', + href: 'https://github.com/camunda/zeebe-chaos/', }, ], }, diff --git a/go-chaos/README.md b/go-chaos/README.md index 035a6d6c9..ba8a824da 100644 --- a/go-chaos/README.md +++ b/go-chaos/README.md @@ -26,4 +26,4 @@ make test The release is fully automated with the corresponding `./release.sh` script. -You can run it locally or you use the corresponding github action https://github.com/zeebe-io/zeebe-chaos/actions/workflows/release.yaml. +You can run it locally or you use the corresponding github action https://github.com/camunda/zeebe-chaos/actions/workflows/release.yaml. diff --git a/go-chaos/backend/clients.go b/go-chaos/backend/clients.go index 7c4fc926a..e25575b19 100644 --- a/go-chaos/backend/clients.go +++ b/go-chaos/backend/clients.go @@ -16,7 +16,7 @@ package backend import ( "github.com/camunda/zeebe/clients/go/v8/pkg/zbc" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func ConnectToZeebeCluster(k8Client internal.K8Client) (zbc.Client, func(), error) { diff --git a/go-chaos/backend/connection.go b/go-chaos/backend/connection.go index 3f789c95b..60a369da0 100644 --- a/go-chaos/backend/connection.go +++ b/go-chaos/backend/connection.go @@ -19,7 +19,7 @@ import ( "fmt" "github.com/camunda/zeebe/clients/go/v8/pkg/zbc" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" v1 "k8s.io/api/core/v1" ) diff --git a/go-chaos/build.sh b/go-chaos/build.sh index 4ad011c3f..3a4a53196 100755 --- a/go-chaos/build.sh +++ b/go-chaos/build.sh @@ -19,7 +19,7 @@ rm -rf ${DIST_DIR}/* for i in "${!OS[@]}"; do if [ $# -eq 0 ] || [ ${OS[$i]} = $1 ]; then - CGO_ENABLED=0 GOOS="${OS[$i]}" GOARCH=amd64 go build -a -tags netgo -ldflags "-w -X github.com/zeebe-io/zeebe-chaos/go-chaos/cmd.Version=${VERSION} -X github.com/zeebe-io/zeebe-chaos/go-chaos/cmd.Commit=${COMMIT}" -o "${DIST_DIR}/${BINARY[$i]}" "${SRC_DIR}/main.go" # & + CGO_ENABLED=0 GOOS="${OS[$i]}" GOARCH=amd64 go build -a -tags netgo -ldflags "-w -X github.com/camunda/zeebe-chaos/go-chaos/cmd.Version=${VERSION} -X github.com/camunda/zeebe-chaos/go-chaos/cmd.Commit=${COMMIT}" -o "${DIST_DIR}/${BINARY[$i]}" "${SRC_DIR}/main.go" # & fi done diff --git a/go-chaos/cmd/backup.go b/go-chaos/cmd/backup.go index 1b5217fff..79ac95f25 100644 --- a/go-chaos/cmd/backup.go +++ b/go-chaos/cmd/backup.go @@ -31,7 +31,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddBackupCommand(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/cluster.go b/go-chaos/cmd/cluster.go index b56c4dad3..5873c1574 100644 --- a/go-chaos/cmd/cluster.go +++ b/go-chaos/cmd/cluster.go @@ -23,7 +23,7 @@ import ( "time" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddClusterCommands(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/connect.go b/go-chaos/cmd/connect.go index d89bc0a71..44bfbbe50 100644 --- a/go-chaos/cmd/connect.go +++ b/go-chaos/cmd/connect.go @@ -16,7 +16,7 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/backend" + "github.com/camunda/zeebe-chaos/go-chaos/backend" ) func AddConnectCmd(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/dataloss_sim.go b/go-chaos/cmd/dataloss_sim.go index decb3db6e..23b4501a9 100644 --- a/go-chaos/cmd/dataloss_sim.go +++ b/go-chaos/cmd/dataloss_sim.go @@ -18,7 +18,7 @@ import ( "time" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddDatalossSimulationCmd(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/deploy.go b/go-chaos/cmd/deploy.go index 23bf60f3a..31d6c1406 100644 --- a/go-chaos/cmd/deploy.go +++ b/go-chaos/cmd/deploy.go @@ -16,8 +16,8 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/backend" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/backend" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddDeployCmd(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/disconnect.go b/go-chaos/cmd/disconnect.go index efb066b47..5ed5eedfd 100644 --- a/go-chaos/cmd/disconnect.go +++ b/go-chaos/cmd/disconnect.go @@ -16,7 +16,7 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/backend" + "github.com/camunda/zeebe-chaos/go-chaos/backend" ) func ensureNoError(err error) { diff --git a/go-chaos/cmd/exporting.go b/go-chaos/cmd/exporting.go index 49c4863ef..092cae202 100644 --- a/go-chaos/cmd/exporting.go +++ b/go-chaos/cmd/exporting.go @@ -19,7 +19,7 @@ import ( "net/http" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddExportingCmds(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/publish.go b/go-chaos/cmd/publish.go index bf1e32167..27080b5d8 100644 --- a/go-chaos/cmd/publish.go +++ b/go-chaos/cmd/publish.go @@ -19,7 +19,7 @@ import ( "time" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddPublishCmd(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/restart.go b/go-chaos/cmd/restart.go index 08638ff6b..021872ef4 100644 --- a/go-chaos/cmd/restart.go +++ b/go-chaos/cmd/restart.go @@ -16,7 +16,7 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddRestartCmd(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/root.go b/go-chaos/cmd/root.go index d265abc2e..ccce554ed 100644 --- a/go-chaos/cmd/root.go +++ b/go-chaos/cmd/root.go @@ -23,7 +23,7 @@ import ( "github.com/rs/zerolog/log" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func init() { diff --git a/go-chaos/cmd/stress.go b/go-chaos/cmd/stress.go index f855ef705..5d0e47e92 100644 --- a/go-chaos/cmd/stress.go +++ b/go-chaos/cmd/stress.go @@ -20,7 +20,7 @@ import ( "github.com/camunda/zeebe/clients/go/v8/pkg/zbc" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" v1 "k8s.io/api/core/v1" ) diff --git a/go-chaos/cmd/terminate.go b/go-chaos/cmd/terminate.go index 321c5c8c0..f441568a3 100644 --- a/go-chaos/cmd/terminate.go +++ b/go-chaos/cmd/terminate.go @@ -19,7 +19,7 @@ import ( "fmt" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddTerminateCommand(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/topology.go b/go-chaos/cmd/topology.go index 14cc6f73d..54b1f7db9 100644 --- a/go-chaos/cmd/topology.go +++ b/go-chaos/cmd/topology.go @@ -24,7 +24,7 @@ import ( "github.com/camunda/zeebe/clients/go/v8/pkg/pb" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddTopologyCmd(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/verify.go b/go-chaos/cmd/verify.go index 8c57642a8..615ba4840 100644 --- a/go-chaos/cmd/verify.go +++ b/go-chaos/cmd/verify.go @@ -18,7 +18,7 @@ import ( "time" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddVerifyCommands(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/cmd/version.go b/go-chaos/cmd/version.go index ced2f6f01..c65646146 100644 --- a/go-chaos/cmd/version.go +++ b/go-chaos/cmd/version.go @@ -19,7 +19,7 @@ import ( "math" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func VersionString() string { diff --git a/go-chaos/cmd/worker.go b/go-chaos/cmd/worker.go index 82d4f9761..2d0180e8e 100644 --- a/go-chaos/cmd/worker.go +++ b/go-chaos/cmd/worker.go @@ -23,8 +23,8 @@ import ( zbworker "github.com/camunda/zeebe/clients/go/v8/pkg/worker" "github.com/camunda/zeebe/clients/go/v8/pkg/zbc" "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" - worker "github.com/zeebe-io/zeebe-chaos/go-chaos/worker" + "github.com/camunda/zeebe-chaos/go-chaos/internal" + worker "github.com/camunda/zeebe-chaos/go-chaos/worker" "google.golang.org/grpc" ) diff --git a/go-chaos/cmd/zeebePods.go b/go-chaos/cmd/zeebePods.go index 90df3bd82..f5ef69179 100644 --- a/go-chaos/cmd/zeebePods.go +++ b/go-chaos/cmd/zeebePods.go @@ -16,7 +16,7 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" ) func AddBrokersCommand(rootCmd *cobra.Command, flags *Flags) { diff --git a/go-chaos/deploy/README.md b/go-chaos/deploy/README.md index d1a1a4fa6..9633ed843 100644 --- a/go-chaos/deploy/README.md +++ b/go-chaos/deploy/README.md @@ -2,16 +2,16 @@ The zbchaos worker is deployed via the [deployment.yaml](/go-chaos/deploy/deployment.yaml) file to the Zeebe Team GKE (zeebe-io). It will connect to the Testbench production environment (Zeebe cluster in SaaS) and poll for work. -If there is a new chaos experiment executed (and a target cluster created in a separate chaos GKE) zbchaos will connect to such cluster and run certain actions, like verifications or chaos injections. In order to connect to such chaos cluster zbchaos uses a separate kubeconfig, which is [injected via a secret](https://github.com/zeebe-io/zeebe-chaos/blob/main/go-chaos/deploy/deployment.yaml#L60). +If there is a new chaos experiment executed (and a target cluster created in a separate chaos GKE) zbchaos will connect to such cluster and run certain actions, like verifications or chaos injections. In order to connect to such chaos cluster zbchaos uses a separate kubeconfig, which is [injected via a secret](https://github.com/camunda/zeebe-chaos/blob/main/go-chaos/deploy/deployment.yaml#L60). -The secret is currently deployed **manually** but contains the content of the [kubernetes config file stored in this repository](https://github.com/zeebe-io/zeebe-chaos/blob/main/go-chaos/deploy/kubeconfig.yaml). +The secret is currently deployed **manually** but contains the content of the [kubernetes config file stored in this repository](https://github.com/camunda/zeebe-chaos/blob/main/go-chaos/deploy/kubeconfig.yaml). ## Kubernetes configuration file The Kubernetes configuration file is encrypted using [sops](https://github.com/mozilla/sops). -You need the right permissions to decrypt or encrypt the file. We have created in our [google cloud project a key ring set up](https://github.com/zeebe-io/zeebe-chaos/blob/main/go-chaos/deploy/kubeconfig.yaml). The Zeebe and SRE team should have access to they key-ring (and keys). +You need the right permissions to decrypt or encrypt the file. We have created in our [google cloud project a key ring set up](https://github.com/camunda/zeebe-chaos/blob/main/go-chaos/deploy/kubeconfig.yaml). The Zeebe and SRE team should have access to they key-ring (and keys). If there are any updates necessary on the config, SREs can easily update the config in this repository. We should make sure to reflect the changes in our testbench namespace. diff --git a/go-chaos/go.mod b/go-chaos/go.mod index 96c006dc0..c7378ed69 100644 --- a/go-chaos/go.mod +++ b/go-chaos/go.mod @@ -1,4 +1,4 @@ -module github.com/zeebe-io/zeebe-chaos/go-chaos +module github.com/camunda/zeebe-chaos/go-chaos go 1.23.0 diff --git a/go-chaos/integration/cluster_cmd_integration_test.go b/go-chaos/integration/cluster_cmd_integration_test.go index 1065f6ddd..3d182802b 100644 --- a/go-chaos/integration/cluster_cmd_integration_test.go +++ b/go-chaos/integration/cluster_cmd_integration_test.go @@ -22,7 +22,7 @@ import ( "github.com/stretchr/testify/require" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" - "github.com/zeebe-io/zeebe-chaos/go-chaos/cmd" + "github.com/camunda/zeebe-chaos/go-chaos/cmd" ) func Test_ShouldBeAbleToQueryTopology(t *testing.T) { diff --git a/go-chaos/integration/integration_test.go b/go-chaos/integration/integration_test.go index 4ea1315d1..cc4a7d9b2 100644 --- a/go-chaos/integration/integration_test.go +++ b/go-chaos/integration/integration_test.go @@ -21,8 +21,8 @@ import ( "time" "github.com/stretchr/testify/require" - "github.com/zeebe-io/zeebe-chaos/go-chaos/cmd" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/cmd" + "github.com/camunda/zeebe-chaos/go-chaos/internal" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" diff --git a/go-chaos/internal/chaos-experiments/chaos_experiments.go b/go-chaos/internal/chaos-experiments/chaos_experiments.go index fffa14a4b..246db8f25 100644 --- a/go-chaos/internal/chaos-experiments/chaos_experiments.go +++ b/go-chaos/internal/chaos-experiments/chaos_experiments.go @@ -6,7 +6,7 @@ import ( "encoding/json" "strings" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" + "github.com/camunda/zeebe-chaos/go-chaos/internal" "golang.org/x/exp/slices" "golang.org/x/mod/semver" "k8s.io/apimachinery/pkg/util/yaml" diff --git a/go-chaos/main.go b/go-chaos/main.go index 6b143c80d..2a8e176c0 100644 --- a/go-chaos/main.go +++ b/go-chaos/main.go @@ -1,7 +1,7 @@ package main import ( - "github.com/zeebe-io/zeebe-chaos/go-chaos/cmd" + "github.com/camunda/zeebe-chaos/go-chaos/cmd" ) func main() { diff --git a/go-chaos/main_test.go b/go-chaos/main_test.go index 3978bc0f5..bde09960f 100644 --- a/go-chaos/main_test.go +++ b/go-chaos/main_test.go @@ -7,7 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/zeebe-io/zeebe-chaos/go-chaos/cmd" + "github.com/camunda/zeebe-chaos/go-chaos/cmd" ) func Test_ExecuteRootCmd(t *testing.T) { diff --git a/go-chaos/worker/chaos_worker.go b/go-chaos/worker/chaos_worker.go index 62f0ddea2..3f757bbfa 100644 --- a/go-chaos/worker/chaos_worker.go +++ b/go-chaos/worker/chaos_worker.go @@ -23,8 +23,8 @@ import ( "github.com/camunda/zeebe/clients/go/v8/pkg/entities" "github.com/camunda/zeebe/clients/go/v8/pkg/worker" - "github.com/zeebe-io/zeebe-chaos/go-chaos/internal" - chaos_experiments "github.com/zeebe-io/zeebe-chaos/go-chaos/internal/chaos-experiments" + "github.com/camunda/zeebe-chaos/go-chaos/internal" + chaos_experiments "github.com/camunda/zeebe-chaos/go-chaos/internal/chaos-experiments" ) type CommandRunner func([]string, context.Context) error diff --git a/go-chaos/worker/chaos_worker_test.go b/go-chaos/worker/chaos_worker_test.go index 07a586093..83ac2ce89 100644 --- a/go-chaos/worker/chaos_worker_test.go +++ b/go-chaos/worker/chaos_worker_test.go @@ -25,7 +25,7 @@ import ( "github.com/camunda/zeebe/clients/go/v8/pkg/pb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - chaos_experiments "github.com/zeebe-io/zeebe-chaos/go-chaos/internal/chaos-experiments" + chaos_experiments "github.com/camunda/zeebe-chaos/go-chaos/internal/chaos-experiments" ) func Test_ShouldFailToHandleJobWithoutPayload(t *testing.T) {