Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move one more operator to use DRE #1

Merged
merged 7 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/containers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: Containers
on:
push:
branches:
- "main"

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
airflow-content-syncer:
runs-on: ubuntu-latest
# This image is based on ubuntu:20.04
steps:
- uses: actions/checkout@v4

- uses: docker/metadata-action@v5
id: metadata
with:
images: ghcr.io/${{ github.repository }}/airflow-content-syncer

- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- uses: int128/kaniko-action@v1
name: Create airflow-content-syncer container
with:
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
cache: false
cache-repository: ghcr.io/${{ github.repository }}/cache
context: airflow-content-syncer
airflow-customized:
runs-on: ubuntu-latest
# This image is based on ubuntu:20.04
steps:
- uses: actions/checkout@v4

- uses: docker/metadata-action@v5
id: metadata
with:
images: ghcr.io/${{ github.repository }}/airflow-customized

- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- uses: int128/kaniko-action@v1
name: Create airflow-customized container
with:
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
cache: false
cache-repository: ghcr.io/${{ github.repository }}/cache
context: airflow-customized
23 changes: 23 additions & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Main
on:
push:
branches:
- "main"
pull_request: {}

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
CI:
runs-on: ubuntu-latest
# This image is based on ubuntu:20.04
steps:
- uses: actions/checkout@v4

########################################
# Test
########################################
- name: "Test"
uses: ./.github/workflows/test
53 changes: 53 additions & 0 deletions .github/workflows/manage-runner-post/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Setup runner
description: Reusable action for tearing down the github runner

inputs:
GITHUB_TOKEN:
description: 'Automatically assigned GitHub Token'
required: true

runs:
using: composite
steps:
########################################
# Optimize bazel cache by hard-linking duplicate files
########################################
- name: "🧹 Optimize bazel cache directory before uploading"
if: ${{ github.ref == 'refs/heads/main' }}
run: ./bin/optimize-bazel-cache.sh
shell: bash

########################################
# Save cache:
#
# Cache is saved on main only to avoid cache evictions due to github restrictions:
# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache
########################################
- name: First delete cache entry if it exists
if: ${{ github.ref == 'refs/heads/main' }}
run: |
set -eExou pipefail
gh extension install actions/gh-actions-cache
REPO=${{ github.repository }}
BRANCH=${{ github.ref }}
echo "Fetching list of cache key"
cacheKeys=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )
## Setting this to not fail the workflow while deleting cache keys.
echo "Deleting caches..."
for cacheKey in $cacheKeys
do
gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm || true
done
echo "Done"
env:
GH_TOKEN: ${{ inputs.GITHUB_TOKEN }}
shell: bash
- name: "☁️ ⬆️ Saving cache on main only"
if: ${{ github.ref == 'refs/heads/main' }}
uses: actions/cache/save@v4
with:
path: "~/.cache/bazel"
# Configure cache updates
# https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
# https://github.com/actions/cache/blob/main/examples.md#---bazel
key: ${{ runner.os }}-bazel-${{ hashFiles('.bazelversion', '.bazelrc', 'WORKSPACE.bazel', 'Cargo.Bazel.lock', 'requirements.txt') }}
66 changes: 66 additions & 0 deletions .github/workflows/manage-runner-pre/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Setup runner
description: Reusable action for setting up the github runner

runs:
using: composite
steps:
- name: "👮‍♀️ Setup runner"
if: ${{ startsWith(runner.name, 'dre-runner-custom') }}
shell: bash
run: |
set -exuo pipefail
whoami

path_to_check="/home/runner/.cache"

current_owner=$(stat -c "%u" "$path_to_check")
current_group=$(stat -c "%g" "$path_to_check")

desired_owner=1001
desired_group=1001

# Check if the current owner and group match the desired owner and group
if [ "$current_owner" -ne "$desired_owner" ] || [ "$current_group" -ne "$desired_group" ]; then
echo "Owner or group of cache does not match. Changing ownership..."
# Change the owner and group recursively
sudo chown -R $desired_owner:$desired_group "$path_to_check"
echo "Ownership changed to $desired_owner:$desired_group for $path_to_check"
else
echo "Ownership is already set to $desired_owner:$desired_group for $path_to_check"
fi

########################################
# Setup
########################################
- name: "🔧 Free Up Disk Space"
uses: jlumbroso/[email protected]
if: ${{ !startsWith(runner.name, 'dre-runner-custom') }}
with:
# this might remove tools that are actually needed,
# when set to "true" but frees about 6 GB
tool-cache: true
large-packages: true # this is slow

- uses: bazelbuild/setup-bazelisk@v2

########################################
# Download and unpack cache
########################################
- name: "☁️ ⬇️ Restore bazel cache"
uses: actions/cache/restore@v4
if: ${{ !startsWith(runner.name, 'dre-runner-custom') }}
with:
path: "~/.cache/bazel"
# Configure cache updates
# https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
# https://github.com/actions/cache/blob/main/examples.md#---bazel
key: ${{ runner.os }}-bazel-${{ hashFiles('.bazelversion', '.bazelrc', 'WORKSPACE.bazel', 'Cargo.Bazel.lock', 'requirements.txt') }}
restore-keys: |
${{ runner.os }}-bazel-

- name: "🧹 Clean bazel cache if we're preparing a new release"
if: ${{ startsWith(github.ref, 'refs/tags/v') && !startsWith(runner.name, 'dre-runner-custom') }}
# This is desirable to make sure bazel does not use stale pre-built binaries
# Bazel actually keeps all intermediate objects so builds are still fast
run: bazel clean
shell: bash
35 changes: 35 additions & 0 deletions .github/workflows/test/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Test
description: Run battery of tests

runs:
using: composite
steps:
- name: "Prepare"
id: prep
run: |
out=$(make venv/bin venv/bin/mypy venv/bin/ruff venv/bin/pytest venv/lib/*/site-packages/mock 2>&1)
ret=$?
if [ $ret != 0 ]
then
echo "$out" >&2
exit $ret
fi
shell: bash

- name: "Ruff"
id: ruff
run: |
make ruff
shell: bash

- name: "MyPy"
id: mypy
run: |
make mypy
shell: bash

- name: "Unit tests"
id: pytest
run: |
make pytest
shell: bash
9 changes: 9 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,17 @@ default:
- runner_system_failure

stages:
- test
- release

run-tests:
stage: test
tags:
- ubuntu
- dfinity-shared
script:
- make test

release-content:
stage: release
tags:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ tests/airflow.db: $(VENV_BINDIR)
pytest: tests/airflow.db $(VENV_BINDIR)/pytest $(VENV_DIR)/lib/*/site-packages/mock
AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS=False AIRFLOW__CORE__LOAD_EXAMPLES=False AIRFLOW__CORE__UNIT_TEST_MODE=True AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES_REGEXP="(airflow|dfinity)[.].*" AIRFLOW_HOME=$(PWD)/tests PYTHONPATH=$(PWD)/plugins:$(PWD)/shared $(VENV_BINDIR)/pytest -vv tests

test: mypy ruff pytest
test: ruff mypy pytest
2 changes: 2 additions & 0 deletions airflow-content-syncer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
FROM fedora:37

LABEL org.opencontainers.image.source="https://github.com/dfinity/dre-airflow"

RUN dnf install -y bash ca-certificates git rsync

COPY content-syncer /usr/local/bin/content-syncer
Expand Down
3 changes: 2 additions & 1 deletion bin/airflow
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ then
pushd "$AIRFLOW_HOME"
ln -sfT ../dags dags
ln -sfT ../plugins plugins
sed -i 's/allowed_deserialization_classes.*/allowed_deserialization_classes = (airflow|dfinity)[.].*/' airflow.cfg
sed -i 's/allowed_deserialization_classes_regex.*/allowed_deserialization_classes_regex = (airflow|dfinity)[.].*/' airflow.cfg
sed -i 's/^allowed_deserialization_classes /# allowed_deserialization_classes.../' airflow.cfg
sed -i 's/reload_on_plugin_change.*/reload_on_plugin_change = True/' airflow.cfg
sed -i 's/load_examples.*/load_examples = False/' airflow.cfg
popd
Expand Down
4 changes: 2 additions & 2 deletions plugins/operators/ic_os_rollout.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def __init__(
**kwargs,
)

def execute(self, context: Context) -> None: # type:ignore
def execute(self, context: Context) -> None:
proposal_creation_result = context["task_instance"].xcom_pull(
task_ids=self.source_task_id,
map_indexes=context["task_instance"].map_index,
Expand All @@ -280,7 +280,7 @@ def execute(self, context: Context) -> None: # type:ignore
self.log.info("Proposal does not need vote. Not requesting vote.")
else:
self.log.info("Requesting vote on proposal with text: %s", self.text)
slack.SlackAPIPostOperator.execute(self, context=context) # type:ignore
slack.SlackAPIPostOperator.execute(self, context=context)


class NotifyAboutStalledSubnet(slack.SlackAPIPostOperator):
Expand Down
14 changes: 6 additions & 8 deletions plugins/sensors/ic_os_rollout.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import Any, TypedDict, cast

import dfinity.dre as dre
import dfinity.ic_admin as ic_admin
import dfinity.ic_types as ic_types
import dfinity.prom_api as prom
from dfinity.ic_os_rollout import (
Expand Down Expand Up @@ -115,11 +114,12 @@ def execute(self, context: Context, event: Any = None) -> None:
return

self.log.info(f"Waiting for revision {git_revision} to be elected.")
if not ic_admin.is_replica_version_blessed(
print("::group::DRE output")
blessed = dre.DRE(self.network, SubprocessHook()).is_replica_version_blessed(
git_revision,
self.network,
ic_admin_version=None if self.simulate_elected else self.git_revision,
):
)
print("::endgroup::")
if not blessed:
self.log.info("Revision is not yet elected. Waiting.")
self.defer(
trigger=TimeDeltaTrigger(datetime.timedelta(minutes=15)),
Expand Down Expand Up @@ -385,9 +385,7 @@ def send_notification_if_necessary(subnet_id: str) -> None:
NotifyAboutStalledSubnet(
task_id="notify_about_stalled_subnet",
subnet_id=subnet_id,
).execute(
context=context
) # type: ignore
).execute(context=context)
# send message here, then
context["task_instance"].xcom_push(
key="first_alert_check_timestamp",
Expand Down
13 changes: 13 additions & 0 deletions shared/dfinity/dre.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,22 @@ def get_subnet_list(self) -> list[str]:
raise AirflowException("dre exited with status code %d", r.exit_code)
return cast(list[str], json.loads(r.output))

def get_blessed_replica_versions(self) -> list[str]:
r = self.run("get", "blessed-replica-versions", "--json", full_stdout=True)
if r.exit_code != 0:
raise AirflowException("dre exited with status code %d", r.exit_code)
return cast(list[str], json.loads(r.output)["value"]["blessed_version_ids"])

def is_replica_version_blessed(self, git_revision: str) -> bool:
return git_revision.lower() in [
x.lower() for x in self.get_blessed_replica_versions()
]


class AuthenticatedDRE(DRE):

network: ic_types.ICNetworkWithPrivateKey

def upgrade_unassigned_nodes(
self,
dry_run: bool = False,
Expand Down
Loading