Skip to content

Commit

Permalink
PTFE-1438 Add labels from webhooks to runner backend (#539)
Browse files Browse the repository at this point in the history
  • Loading branch information
tcarmet authored Feb 13, 2024
1 parent 2c7c1c3 commit 60e0dd4
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 22 deletions.
1 change: 1 addition & 0 deletions pyrightconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"exclude": ["**/__pycache__", "**/node_modules"],
"typeCheckingMode": "basic",
"stubPath": "./typings",
"extraPaths": ["./.venv/lib/*/site-packages"],
"reportMissingTypeStubs": true,
"venvPath": ".",
"venv": ".venv"
Expand Down
9 changes: 6 additions & 3 deletions runner_manager/backend/aws.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import List, Literal
from typing import List, Literal, Optional

from boto3 import client
from botocore.exceptions import ClientError
from githubkit.webhooks.types import WorkflowJobEvent
from mypy_boto3_ec2 import EC2Client
from pydantic import Field
from redis_om import NotFoundError
Expand Down Expand Up @@ -88,7 +89,9 @@ def list(self) -> List[Runner]:
runners.append(runner)
return runners

def update(self, runner: Runner) -> Runner:
def update(
self, runner: Runner, webhook: Optional[WorkflowJobEvent] = None
) -> Runner:
"""Update a runner."""
if runner.instance_id:
try:
Expand All @@ -99,4 +102,4 @@ def update(self, runner: Runner) -> Runner:
except Exception as e:
log.error(e)
raise e
return super().update(runner)
return super().update(runner, webhook)
5 changes: 4 additions & 1 deletion runner_manager/backend/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Literal, Optional

from githubkit.webhooks.types import WorkflowJobEvent
from pydantic import BaseModel, Field
from redis_om import NotFoundError

Expand Down Expand Up @@ -47,7 +48,9 @@ def delete(self, runner: Runner) -> int:
"""
return Runner.delete(runner.pk)

def update(self, runner: Runner) -> Runner:
def update(
self, runner: Runner, webhook: Optional[WorkflowJobEvent] = None
) -> Runner:
"""Update a runner instance.
Args:
Expand Down
7 changes: 4 additions & 3 deletions runner_manager/backend/docker.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from importlib.resources import files
from pathlib import Path
from typing import Dict, List, Literal
from typing import Dict, List, Literal, Optional

from docker import DockerClient
from docker.errors import APIError, NotFound
from docker.models.containers import Container
from githubkit.webhooks.types import WorkflowJobEvent
from pydantic import Field
from redis_om import NotFoundError

Expand Down Expand Up @@ -73,7 +74,7 @@ def create(self, runner: Runner):

return super().create(runner)

def update(self, runner: Runner):
def update(self, runner: Runner, webhook: Optional[WorkflowJobEvent] = None):
"""Update a runner instance.
We cannot update a container, so we just gonna ensure the runner
Expand All @@ -82,7 +83,7 @@ def update(self, runner: Runner):
container: Container = self.client.containers.get(runner.instance_id)
if container.status != "running":
raise Exception(f"Container {container.id} is not running.")
return super().update(runner)
return super().update(runner, webhook)

def delete(self, runner: Runner):
try:
Expand Down
45 changes: 36 additions & 9 deletions runner_manager/backend/gcloud.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
import math
import re
import time
from typing import List, Literal, MutableMapping
from typing import List, Literal, MutableMapping, Optional

from githubkit.webhooks.types import WorkflowJobEvent
from google.api_core.exceptions import BadRequest, NotFound
from google.api_core.extended_operation import ExtendedOperation
from google.cloud.compute import (
Expand Down Expand Up @@ -152,18 +154,41 @@ def configure_instance(self, runner: Runner) -> Instance:
],
)

def _sanitize_label_value(self, value: str) -> str:
value = value[:63]
value = value.lower()
value = re.sub(r"[^a-z0-9_-]", "-", value)
def _sanitize_label_value(self, value: str | int | float | None) -> str:
if value is None:
return ""
if isinstance(value, (int, float)):
if math.isnan(value):
return ""
value = str(int(value))
value = value[:63].lower()
value = re.sub(r"[^a-z0-9_-]+", "-", value)
value = re.sub(r"(^-+)|(-+$)", "", value)
return value

def setup_labels(self, runner: Runner) -> MutableMapping[str, str]:
def setup_labels(
self, runner: Runner, webhook: Optional[WorkflowJobEvent] = None
) -> MutableMapping[str, str]:
labels: MutableMapping[str, str] = self.instance_config.labels.copy()
if self.manager:
labels["runner-manager"] = self.manager
labels["status"] = self._sanitize_label_value(runner.status)
labels["busy"] = self._sanitize_label_value(str(runner.busy))
if webhook:
labels["repository"] = self._sanitize_label_value(webhook.repository.name)
labels["organization"] = self._sanitize_label_value(
webhook.repository.organization
if webhook.repository.organization
else ""
)
labels["workflow"] = self._sanitize_label_value(
webhook.workflow_job.workflow_name
)
labels["job"] = self._sanitize_label_value(webhook.workflow_job.name)
labels["run_id"] = self._sanitize_label_value(webhook.workflow_job.run_id)
labels["run_attempt"] = self._sanitize_label_value(
webhook.workflow_job.run_attempt
)
return labels

def create(self, runner: Runner):
Expand Down Expand Up @@ -240,14 +265,16 @@ def list(self) -> List[Runner]:
raise e
return runners

def update(self, runner: Runner) -> Runner:
def update(
self, runner: Runner, webhook: Optional[WorkflowJobEvent] = None
) -> Runner:
try:
instance: Instance = self.client.get(
project=self.config.project_id,
zone=self.config.zone,
instance=runner.instance_id or runner.name,
)
instance.labels = self.setup_labels(runner)
instance.labels = self.setup_labels(runner, webhook)

log.info(f"Updating {runner.name} labels to {instance.labels}")
self.client.update(
Expand All @@ -260,4 +287,4 @@ def update(self, runner: Runner) -> Runner:
except Exception as e:
super().update(runner)
raise e
return super().update(runner)
return super().update(runner, webhook)
10 changes: 5 additions & 5 deletions runner_manager/models/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ class InstanceConfig(BaseSettings):
"""Base class for backend instance configuration."""

startup_script: str = startup_sh.as_posix()
redhat_username: Optional[str]
redhat_password: Optional[str]
redhat_username: Optional[str] = None
redhat_password: Optional[str] = None

def runner_env(self, runner: Runner) -> RunnerEnv:
return RunnerEnv(
Expand All @@ -58,9 +58,9 @@ def runner_env(self, runner: Runner) -> RunnerEnv:
RUNNER_GROUP=runner.runner_group_name,
RUNNER_DOWNLOAD_URL=runner.download_url,
RUNNER_REDHAT_USERNAME=self.redhat_username,
RUNNER_REDHAT_PASSWORD=self.redhat_password
if self.redhat_password
else None,
RUNNER_REDHAT_PASSWORD=(
self.redhat_password if self.redhat_password else None
),
)

def template_startup(self, runner: Runner) -> str:
Expand Down
2 changes: 1 addition & 1 deletion runner_manager/models/runner_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def update_runner(self: Self, webhook: WorkflowJobInProgress) -> Runner:
runner.started_at = webhook.workflow_job.started_at
runner.busy = True
runner.save()
return self.backend.update(runner)
return self.backend.update(runner, webhook)

def delete_runner(self, runner: Runner, github: GitHub) -> int:
"""Delete a runner instance.
Expand Down
Empty file added tests/unit/backend/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions tests/unit/backend/test_gcp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
from typing import List

from githubkit.webhooks.types import WorkflowJobEvent
from google.cloud.compute import Image, NetworkInterface
from hypothesis import given
from pytest import fixture, mark, raises
from redis_om import NotFoundError

Expand All @@ -10,6 +12,8 @@
from runner_manager.models.runner import Runner
from runner_manager.models.runner_group import RunnerGroup

from ...strategies import WorkflowJobInProgressStrategy


@fixture()
def gcp_group(settings, monkeypatch) -> RunnerGroup:
Expand Down Expand Up @@ -95,6 +99,32 @@ def test_gcp_setup_labels(runner: Runner, gcp_group: RunnerGroup):
assert labels["key"] == "value"


@given(webhook=WorkflowJobInProgressStrategy)
def test_gcp_setup_labels_with_webhook(webhook: WorkflowJobEvent):
runner: Runner = Runner(
name=webhook.workflow_job.runner_name,
id=webhook.workflow_job.runner_id,
busy=True,
runner_group_name=webhook.workflow_job.runner_group_name,
runner_group_id=webhook.workflow_job.runner_group_id,
status="online",
)
backend = GCPBackend(
config=GCPConfig(
zone="europe-west1-a",
project_id="project",
),
instance_config=GCPInstanceConfig(),
)
labels = backend.setup_labels(runner, webhook)
assert "workflow" in labels.keys()
assert "repository" in labels.keys()

# Test with no webhook
labels = backend.setup_labels(runner)
assert "workflow" not in labels.keys()


def test_gcp_spot_config(runner: Runner, gcp_group: RunnerGroup):
gcp_group.backend.instance_config.spot = True
scheduling = gcp_group.backend.scheduling
Expand Down Expand Up @@ -127,6 +157,15 @@ def test_gcp_instance(runner: Runner, gcp_group: RunnerGroup):
assert instance.name == runner.name


def test_sanitize_label(gcp_group: RunnerGroup):
assert "test" == gcp_group.backend._sanitize_label_value("test")
assert "42" == gcp_group.backend._sanitize_label_value(42)
assert "42" == gcp_group.backend._sanitize_label_value(42.0)
assert "" == gcp_group.backend._sanitize_label_value(None)
assert "test" == gcp_group.backend._sanitize_label_value("-test-")
assert "" == gcp_group.backend._sanitize_label_value(float("nan"))


@mark.skipif(
not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"), reason="GCP credentials not found"
)
Expand Down

0 comments on commit 60e0dd4

Please sign in to comment.