From 5957ca471dcf90a09f82aa566adf7937e7371cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Tomi=C4=87?= Date: Wed, 22 Jan 2025 18:17:22 +0100 Subject: [PATCH 1/4] feat: enhance Airflow setup script with 1Password and S3 logging ### Added - `set -eEuo pipefail` options for a more robust script. - Function `install_1pass_cli` to automate 1Password CLI installation for both Linux and macOS. - Function `login_1pass` to handle 1Password login processes. - Function `setup_airflow_variables_and_connections` to configure Airflow connections and variables using 1Password. - Interactive prompt to optionally set up Airflow variables and connections. - Logic to prevent multiple instances of Airflow standalone mode. ### Changed - Updated the description of S3 logging storage in `README.md` for production clarification. ### Fixed - Removed redundant environment variable exports and restructured script for clarity. --- README.md | 2 +- bin/airflow | 80 ++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0640805..a62ee03 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ connections (unless specified otherwise): * * Connection ID: `airflow_logging` * Connection type: Amazon Web Services - * Description: Logging storage for Airflow. + * Description: S3 Logging storage for Airflow. Only needed for production (Kubernetes deployments) * AWS Access Key ID: the value of `AWS_ACCESS_KEY_ID` in K8s secret `airflow-logging` * AWS Secret Access Key: the value of `AWS_SECRET_ACCESS_KEY` in K8s secret `airflow-logging` * Extra: `{ "endpoint_url": "http://rook-ceph-rgw-ceph-store.rook-ceph.svc.cluster.local" }` diff --git a/bin/airflow b/bin/airflow index 07fc84a..28214f9 100755 --- a/bin/airflow +++ b/bin/airflow @@ -1,9 +1,55 @@ -#!/bin/bash -e +#!/bin/bash +set -eEuo pipefail SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) VENV_DIR=$( dirname "$SCRIPT_DIR" )/venv +VENV_BIN_DIR="$VENV_DIR"/bin export AIRFLOW_HOME=$( dirname "$SCRIPT_DIR" )/airflow export PYTHONPATH=$(dirname "$SCRIPT_DIR")/shared +AIRFLOW_BIN="$VENV_BIN_DIR"/airflow +ONEPASS_BIN="$VENV_BIN_DIR"/1pass + +export AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES_REGEXP="(dfinity|airflow).*" +export AIRFLOW__WEBSERVER__ALLOW_RAW_HTML_DESCRIPTIONS=true +export PATH="$VENV_BIN_DIR:$PATH" + +function install_1pass_cli() { + if [ -x "$ONEPASS_BIN" ]; then + return 0 + fi + if [[ "$(uname)" == "Linux" ]]; then + ARCH=$(dpkg --print-architecture) && \ + wget "https://cache.agilebits.com/dist/1P/op2/pkg/v2.30.3/op_linux_${ARCH}_v2.30.3.zip" -O op.zip && \ + unzip -d op op.zip && \ + mv op/op "$VENV_BIN_DIR"/1pass && \ + rm -r op.zip op && \ + chmod 0755 "$VENV_BIN_DIR"/1pass && \ + ln -sf 1pass "$VENV_BIN_DIR"/op + elif [[ "$OSTYPE" == "darwin"* ]]; then + brew install 1password-cli + fi +} + +function login_1pass() { + "$ONEPASS_BIN" whoami || { + "$ONEPASS_BIN" account add --address dfinity.1password.com + eval $("$ONEPASS_BIN" signin) + } +} + +function setup_airflow_variables_and_connections() { + "$AIRFLOW_BIN" variables set "dfinity.ic_admin.mainnet.proposer_key_file" "$("$ONEPASS_BIN" read "op://DRE Team/DFX release-automation principal key/identity.pem")" + SLACK_CREDS="$("$ONEPASS_BIN" read "op://DRE Team/Slack token for Airflow connection slack.ic_os_rollout/credential")" + "$AIRFLOW_BIN" connections delete "slack.ic_os_rollout" || true + "$AIRFLOW_BIN" connections add "slack.ic_os_rollout" --conn-type slack --conn-password "$SLACK_CREDS" --conn-extra "{\"slack_token\": \"$SLACK_CREDS\"}" + GOOGLE_CREDS="$("$ONEPASS_BIN" read "op://DRE Team/Airflow Google Drive credentials/credential" | jq -c .)" + "$AIRFLOW_BIN" connections delete "google_cloud_default" || true + "$AIRFLOW_BIN" connections add "google_cloud_default" --conn-type google_cloud_platform --conn-extra "{ + \"extra__google_cloud_platform__project\": \"airflow-422113\", + \"extra__google_cloud_platform__scope\": \"https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/spreadsheets\", + \"extra__google_cloud_platform__keyfile_dict\": $GOOGLE_CREDS} + " +} if [ "$1" == "setup" ] then @@ -23,6 +69,15 @@ then sed -i 's/reload_on_plugin_change.*/reload_on_plugin_change = True/' airflow.cfg sed -i 's/load_examples.*/load_examples = False/' airflow.cfg popd + + read -p "Do you want to set up Airflow variables and connections (optional for most local runs)? (y/n): " setup_choice + if [[ "$setup_choice" == "y" || "$setup_choice" == "Y" ]]; then + install_1pass_cli + login_1pass + setup_airflow_variables_and_connections + else + echo "Skipping Airflow variables and connections setup." + fi fi test -x "$VENV_DIR"/bin/airflow || { @@ -50,16 +105,27 @@ then exit fi -if [ "$1" == "unlockdb" ] +if [ "$1" == "standalone" ] +then +if pgrep -f "venv/bin/airflow" > /dev/null +then + echo "Another instance of Airflow is already running. Please terminate it or kill airflow processes to avoid database corruption." >&2 + echo "Process IDs of running Airflow instances:" >&2 + pgrep -f "venv/bin/airflow" >&2 + exit 1 +fi +fi + +if [ "$1" == "unlockdb" ] || [ "$1" == "standalone" ] then cd "$AIRFLOW_HOME" echo .dump | sqlite3 airflow.db | sqlite3 airflow.db-new mv -f airflow.db-new airflow.db echo "Database is now unlocked." >&2 - exit + if [ "$1" == "unlockdb" ] + then + exit + fi fi -export AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES_REGEXP="(dfinity|airflow).*" -export AIRFLOW__WEBSERVER__ALLOW_RAW_HTML_DESCRIPTIONS=true -export PATH="$VENV_DIR/bin:$PATH" -exec "$VENV_DIR"/bin/airflow "$@" +exec "$AIRFLOW_BIN" "$@" From f67d48c78db14233ed11b1611b8fd6b1806f5739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Tomi=C4=87?= Date: Thu, 23 Jan 2025 16:11:26 +0100 Subject: [PATCH 2/4] fix ci --- bin/airflow | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/airflow b/bin/airflow index 28214f9..bd08cf4 100755 --- a/bin/airflow +++ b/bin/airflow @@ -70,7 +70,8 @@ then sed -i 's/load_examples.*/load_examples = False/' airflow.cfg popd - read -p "Do you want to set up Airflow variables and connections (optional for most local runs)? (y/n): " setup_choice + # If running in an interactive shell, ask the user if they want to set up Airflow variables and connections + read -p "Do you want to set up Airflow variables and connections (optional for most local runs)? (y/n): " setup_choice < /dev/tty || setup_choice="n" if [[ "$setup_choice" == "y" || "$setup_choice" == "Y" ]]; then install_1pass_cli login_1pass From 84d28457b69aa56550ee452168db9ef2c15e03f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Tomi=C4=87?= Date: Thu, 23 Jan 2025 16:33:54 +0100 Subject: [PATCH 3/4] remove dependency on dpkg --- bin/airflow | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/airflow b/bin/airflow index bd08cf4..d321865 100755 --- a/bin/airflow +++ b/bin/airflow @@ -18,7 +18,13 @@ function install_1pass_cli() { return 0 fi if [[ "$(uname)" == "Linux" ]]; then - ARCH=$(dpkg --print-architecture) && \ + case "$(uname -m)" in + i386) ARCH="386" ;; + x86_64) ARCH="amd64" ;; + armv7l) ARCH="arm" ;; + aarch64) ARCH="arm64" ;; + *) echo "Unsupported architecture"; exit 1 ;; + esac && \ wget "https://cache.agilebits.com/dist/1P/op2/pkg/v2.30.3/op_linux_${ARCH}_v2.30.3.zip" -O op.zip && \ unzip -d op op.zip && \ mv op/op "$VENV_BIN_DIR"/1pass && \ From 0f92dae0c4717813e58b8213c4073ef3f58500c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Tomi=C4=87?= Date: Thu, 23 Jan 2025 16:37:19 +0100 Subject: [PATCH 4/4] update the macos install command --- bin/airflow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/airflow b/bin/airflow index d321865..67308c3 100755 --- a/bin/airflow +++ b/bin/airflow @@ -32,7 +32,7 @@ function install_1pass_cli() { chmod 0755 "$VENV_BIN_DIR"/1pass && \ ln -sf 1pass "$VENV_BIN_DIR"/op elif [[ "$OSTYPE" == "darwin"* ]]; then - brew install 1password-cli + brew install --cask 1password-cli fi }