-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[infrastructure] add initial Locust load testing scripts (#4160)
- Loading branch information
Showing
10 changed files
with
932 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Performance | ||
|
||
## Load testing with Locust | ||
|
||
This directory contains Python scripts for load testing using [Locust](https://locust.io/) ([docs](https://docs.locust.io/en/stable/)). | ||
|
||
### Setup | ||
|
||
We use `uv` to manage dependencies for this project. If you aren't already familiar with this Python project manager, [get set up](https://docs.astral.sh/uv/). | ||
|
||
Then: | ||
- run `uv sync` (`pyproject.toml` and `uv.lock` together completely determine the setup) | ||
- run `source .venv/bin/activate` to [activate the virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment) | ||
|
||
### Usage | ||
|
||
The `run_locust.sh` script is intended to encode some sensible assumptions and do some of the heavy lifting to make it very easy to run load tests. | ||
|
||
Note that it assumes your machine has 8 cores, each of which can handle a workload of ~ 300 users. If you're on Mac/Linux you can check your core count with `lscpu`, and adjust the script accordingly. The latter will vary depending on the workload you're running, so feel free to play around with it (keep an eye on the 'Workers' tab in Locust to track CPU usage). | ||
|
||
As an example, the following command will simulate 500 users hitting PlanX staging (`editor.planx.dev`) with a series of requests to Hasura's GraphQL endpoint every 10 seconds (after a period of ramping up): | ||
|
||
```sh | ||
./run_locust.sh test_hasura.py 500 staging | ||
``` | ||
|
||
Then find the Locust GUI at `http://localhost:8089/`. | ||
|
||
### Development | ||
|
||
The `OpenWorkloadBase` class in `base_workload.py` provides a base class which all the `test_*.py` scripts inherit from. Any new workload should follow the same pattern. | ||
|
||
Also note that this project using [ruff](https://docs.astral.sh/ruff/) for linting and formatting. So before pushing up changes (and with the venv activated), run the following: | ||
|
||
``` | ||
ruff check | ||
ruff format | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from locust import ( | ||
constant_pacing, | ||
FastHttpUser, | ||
stats, | ||
) | ||
|
||
|
||
# we want the double & triple nine percentiles to be reported in the chart and statistics (already in csv) | ||
stats.PERCENTILES_TO_CHART = (0.5, 0.95, 0.99, 0.999) | ||
stats.PERCENTILES_TO_STATISTICS = (0.95, 0.99, 0.999) | ||
|
||
# by default, we attempt to have each user run a task every second (or as fast as possible if latency is greater) | ||
# this means that user count will correspond roughly to request rate (assuming most tasks emit 1 request) | ||
TASK_INVOCATION_RATE_SECONDS = 1 | ||
|
||
|
||
class OpenWorkloadBase(FastHttpUser): | ||
# this is a base class, intended to be subclassed for each test workload | ||
abstract = True | ||
wait_time = constant_pacing(TASK_INVOCATION_RATE_SECONDS) | ||
default_headers = { | ||
"accept": "*/*", | ||
"accept-encoding": "gzip, deflate, br, zstd", | ||
"accept-language": "en-GB,en;q=0.9", | ||
"cache-control": "no-cache", # locust has no cache | ||
"pragma": "no-cache", | ||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", | ||
} | ||
|
||
def on_start(self): | ||
pass | ||
|
||
def on_stop(self): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
[project] | ||
name = "performance" | ||
version = "0.1.0" | ||
description = "Load testing scripts for PlanX infra" | ||
readme = "README.md" | ||
requires-python = ">=3.13" | ||
dependencies = [ | ||
"blinker==1.8.2", | ||
"brotli==1.1.0", | ||
"certifi==2024.8.30", | ||
"charset-normalizer==3.4.0", | ||
"click==8.1.7", | ||
"configargparse==1.7", | ||
"flask==3.0.3", | ||
"flask-cors==5.0.0", | ||
"flask-login==0.6.3", | ||
"gevent==24.10.3", | ||
"geventhttpclient==2.3.1", | ||
"greenlet==3.1.1", | ||
"har2locust==0.9.3", | ||
"idna==3.10", | ||
"itsdangerous==2.2.0", | ||
"jinja2==3.1.4", | ||
"locust==2.32.1", | ||
"locust-plugins==4.5.3", | ||
"markupsafe==3.0.2", | ||
"msgpack==1.1.0", | ||
"psutil==6.1.0", | ||
"pyzmq==26.2.0", | ||
"requests==2.32.3", | ||
"ruff==0.7.4", | ||
"setuptools==75.3.0", | ||
"typing-extensions==4.12.2", | ||
"urllib3==2.2.3", | ||
"werkzeug==3.1.2", | ||
"zope-event==5.0", | ||
"zope-interface==7.1.1", | ||
] | ||
|
||
[tool.ruff] | ||
indent-width=2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/usr/bin/env bash | ||
|
||
# grab filename and number of users from command line args | ||
LOCUSTFILE=$1 | ||
USERS=$2 | ||
|
||
# export env for scripts to reference (accepts local/staging, but not production) | ||
if [ -z $3 ]; then | ||
echo "No environment passed in, assuming local" | ||
export TARGET_ENV=local | ||
else | ||
echo "Setting target environment as: $3" | ||
export TARGET_ENV=$3 | ||
fi | ||
|
||
# this script assumes your machine has 8 cores, each of which can handle ~ 300 users (will depend on workload) | ||
# check core count with lscpu, test and adjust constants accordingly for your use case | ||
LOCAL_CORES=8 | ||
USERS_PER_CORE=300 | ||
|
||
# get the ceiling of division, rather than floor (i.e. lean towards more workers) | ||
PROCESSES=$((($USERS + $USERS_PER_CORE - 1) / $USERS_PER_CORE)) | ||
PROCESSES=$(($PROCESSES > 0 ? $PROCESSES : 1)) | ||
WORKERS=$PROCESSES | ||
if [ $WORKERS -gt $LOCAL_CORES ]; then | ||
PROCESSES=-1 | ||
WORKERS=$LOCAL_CORES | ||
fi | ||
|
||
# we keep spawn rate relatively low to avoid overwhelming CPUs during ramp up | ||
SPAWN_RATE=5 | ||
|
||
# run load test for thrice as long as total ramp up time (or 5 minutes, whichever is higher) | ||
RUN_TIME_SECONDS=$((($USERS / $SPAWN_RATE) * 3)) | ||
RUN_TIME_SECONDS=$(($RUN_TIME_SECONDS > 300 ? $RUN_TIME_SECONDS : 300)) | ||
|
||
echo "Running $LOCUSTFILE load test across $WORKERS workers (i.e. CPUs)" | ||
python -m locust --locustfile $LOCUSTFILE --users $USERS --spawn-rate $SPAWN_RATE --run-time $RUN_TIME_SECONDS --processes $PROCESSES --autostart |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import os | ||
import random | ||
import time | ||
|
||
from locust import ( | ||
constant_pacing, | ||
task, | ||
) | ||
|
||
from base_workload import OpenWorkloadBase | ||
from utils import ( | ||
get_nested_key, | ||
get_target_host, | ||
) | ||
|
||
|
||
TASK_INVOCATION_RATE_SECONDS = 10 | ||
HOST_BY_ENV = { | ||
"local": os.getenv("HASURA_GRAPHQL_URL", "http://localhost:7100"), | ||
"staging": "https://hasura.editor.planx.dev", | ||
} | ||
HASURA_GRAPHQL_ENDPOINT = "/v1/graphql" | ||
|
||
|
||
class HasuraWorkload(OpenWorkloadBase): | ||
wait_time = constant_pacing(TASK_INVOCATION_RATE_SECONDS) | ||
host = get_target_host(HOST_BY_ENV) | ||
|
||
@task | ||
def get_random_flow_metadata(self) -> None: | ||
# first we simulate hitting the splash page (for a logged in user), where teams are listed | ||
teams = None | ||
with self.rest( | ||
"POST", | ||
HASURA_GRAPHQL_ENDPOINT, | ||
name="GetTeams", | ||
json={ | ||
"operationName": "GetTeams", | ||
"query": """ | ||
query GetTeams { | ||
teams(order_by: {name: asc}) { | ||
id | ||
name | ||
} | ||
} | ||
""", | ||
}, | ||
) as resp: | ||
teams = get_nested_key(resp.js, "data", "teams") | ||
# we choose a team at random to fetch flows for, as if to display the list of services | ||
team_id = random.choice(teams)["id"] | ||
# then we sleep for a bit to simulate the user choosing from the list of teams | ||
time.sleep(2) | ||
|
||
flows = None | ||
with self.rest( | ||
"POST", | ||
HASURA_GRAPHQL_ENDPOINT, | ||
name="GetFlows", | ||
json={ | ||
"operationName": "GetFlows", | ||
"variables": {"team_id": team_id}, | ||
"query": """ | ||
query GetFlows($team_id: Int!) { | ||
flows(where: {team_id: {_eq: $team_id}}) { | ||
id | ||
name | ||
slug | ||
updated_at | ||
} | ||
} | ||
""", | ||
}, | ||
) as resp: | ||
flows = get_nested_key(resp.js, "data", "flows") | ||
if not flows: | ||
return | ||
# now we choose a random flow from that team to get more information about | ||
flow_slug = random.choice(flows)["slug"] | ||
time.sleep(2) | ||
|
||
flow_id, aggregate_count = None, None | ||
with self.rest( | ||
"POST", | ||
HASURA_GRAPHQL_ENDPOINT, | ||
name="GetFlowMetadata", | ||
json={ | ||
"operationName": "GetFlowMetadata", | ||
"variables": { | ||
"team_id": team_id, | ||
"slug": flow_slug, | ||
}, | ||
"query": """ | ||
query GetFlowMetadata($team_id: Int!, $slug: String!) { | ||
flows(where: {team_id: {_eq: $team_id}, slug: {_eq: $slug}}) { | ||
id | ||
published_flows_aggregate { | ||
aggregate { | ||
count | ||
} | ||
} | ||
} | ||
} | ||
""", | ||
}, | ||
) as resp: | ||
flows = get_nested_key(resp.js, "data", "flows") | ||
if flows: | ||
flow_id = flows[0]["id"] | ||
aggregate_count = get_nested_key( | ||
flows[0], "published_flows_aggregate", "aggregate", "count" | ||
) | ||
# it may be that the flow is not published (in which case aggregate count will be 0) | ||
if not aggregate_count: | ||
return | ||
|
||
# this last request comes immediately after the last, and will likely be the heaviest | ||
# (because published flow data is one of the largest fetches we ever handle) | ||
with self.rest( | ||
"POST", | ||
HASURA_GRAPHQL_ENDPOINT, | ||
name="GetLastPublishedFlow", | ||
json={ | ||
"operationName": "GetLastPublishedFlow", | ||
"variables": { | ||
"id": flow_id, | ||
}, | ||
"query": """ | ||
query GetLastPublishedFlow($id: uuid) { | ||
flows(where: {id: {_eq: $id}}) { | ||
published_flows(limit: 1, order_by: {data: asc, created_at: desc}) { | ||
created_at | ||
data | ||
} | ||
} | ||
} | ||
""", | ||
}, | ||
) as resp: | ||
flows = get_nested_key(resp.js, "data", "flows") | ||
if flows: | ||
published_flows = flows[0]["published_flows"] | ||
assert published_flows[0]["created_at"] is not None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import os | ||
|
||
from locust import task | ||
|
||
from base_workload import OpenWorkloadBase | ||
from utils import get_target_host | ||
|
||
|
||
HOST_BY_ENV = { | ||
"local": os.getenv("EDITOR_URL_EXT", "http://localhost:3000"), | ||
"staging": "https://editor.planx.dev", | ||
} | ||
|
||
|
||
class SplashWorkload(OpenWorkloadBase): | ||
host = get_target_host(HOST_BY_ENV) | ||
|
||
# simple test to simulate users hitting splash page (without auth) | ||
@task | ||
def get_splash(self): | ||
self.client.get("/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import os | ||
from typing import Any | ||
|
||
|
||
VALID_TARGET_ENVIRONMENTS = ("local", "staging") | ||
|
||
|
||
def get_nested_key(dct: dict[Any, Any], *keys: str) -> Any: | ||
for key in keys: | ||
try: | ||
dct = dct[key] | ||
except KeyError: | ||
return None | ||
return dct | ||
|
||
|
||
def get_target_host(host_by_env: dict[str, str]) -> str: | ||
env = os.getenv("TARGET_ENV", "local") | ||
if env not in VALID_TARGET_ENVIRONMENTS: | ||
raise ValueError(f"Invalid environment submitted (accepts local/staging): {env}") | ||
return host_by_env[env] |
Oops, something went wrong.