diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..40bcda0 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,30 @@ +name: Continuous Integration +on: push + +jobs: + syntax-checks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Code formatting using Ruff + uses: chartboost/ruff-action@v1 # https://github.com/chartboost/ruff-action + with: + args: format --check + + - name: Code linting using Ruff + uses: chartboost/ruff-action@v1 + + unit-testing: + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + runs-on: ubuntu-latest + container: + image: python:${{ matrix.python-version }} + steps: + - uses: actions/checkout@v4 + - name: Install Python dependencies + run: pip install .[dev,test] + - name: Run tests using Pytest + run: python -m pytest tests/ diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000..3bdd45d --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,102 @@ +# Inspired by https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/#checking-out-the-project-and-building-distributions + +name: Publish package to TestPyPI +on: + push: + tags: + - '*' + +jobs: + build: + name: 📦 Build package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + + - name: Build a binary wheel and a source tarball + run: python3 -m build + + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + + publish-to-testpypi: + name: Publish Python 🐍 distribution 📦 to TestPyPI + needs: + - build + runs-on: ubuntu-latest + + environment: + name: testpypi + url: https://test.pypi.org/p/democorp_airflow + + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + github-release: + name: >- + Sign the Python 🐍 distribution 📦 with Sigstore + and upload them to GitHub Release + needs: + - publish-to-testpypi + runs-on: ubuntu-latest + + permissions: + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v1.2.3 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..20c322d --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*.egg-info/ +*.pyc +.DS_Store +.idea/ +.python-version +__pycache__/ +dist/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..b249d44 --- /dev/null +++ b/README.md @@ -0,0 +1,143 @@ +# Custom package demo + +``` +THIS REPOSITORY SERVES AS A DEMO FOR HOW TO STRUCTURE A CUSTOM PACKAGE. +FEEL FREE TO COPY AND ADJUST TO YOUR OWN NEEDS. +``` + +This Python library hosts custom Airflow Operators, Sensors, Notifiers, etc. that are common across **[insert company name]** data teams. This guide will walk you through how this library works, how to use it in your project, making changes, running tests, and contributing your changes. + +## Table of Contents + +- [Installing the library](#installing-the-library) +- [Using the library](#using-the-library) +- [Versioning](#versioning) +- [Continuous Integration/Continuous Deployment (CI/CD)](#continuous-integrationcontinuous-deployment-cicd) +- [Contributing](#contributing) + * [Getting started](#getting-started) + * [Making changes](#making-changes) + * [Testing](#testing) + * [Pull requests](#pull-requests) + * [Update your project dependency](#update-your-project-dependency) +- [Library structure](#library-structure) +- [How do I ...?](#how-do-i-) + +Table of contents generated using https://derlin.github.io/bitdowntoc. + +## Installing the library + +Since this repository is built only for example purposes, we publish a Python package only to [TestPyPI](https://test.pypi.org/project/democorp-airflow). Therefore, you'll need to add TestPyPI as an (extra) index URL. You can install the package using pip: + +```bash +pip install -i https://test.pypi.org/simple/ democorp-airflow +``` + +Or define the package in a requirements.txt file and install using `pip install -r requirements.txt`: + +``` +--extra-index-url https://test.pypi.org/simple +democorp_airflow +``` + +## Using the library + +Import example: +```python +from democorp_airflow.operators.example import ExampleOperator +``` + +## Versioning + +We use `setuptools-scm` to automatically manage versioning based on git tags. When you create a new tag, the library version is updated accordingly, and a release for the given tag is made. See the files in `.github/workflows` for inspiration. + +See [1](https://github.com/pypa/setuptools_scm/), [2](https://www.moritzkoerber.com/posts/versioning-with-setuptools_scm/) for more details. + +## Continuous Integration/Continuous Deployment (CI/CD) + +Our CI/CD pipeline is managed using GitHub Actions: + +- On every commit, syntax checks and unit tests are run to ensure code quality. See `.github/workflows/ci.yaml`. +- When a new tag is pushed, the library is built, published to TestPyPI, and a GitHub release is created. See `.github/workflows/publish.yaml`. + +## Contributing + +We welcome contributions from every team to improve this library! Here's how you can get started: + +### Getting started + +1. Clone the repository to your local machine. +1. Create a new branch for your changes: `git checkout -b my-new-feature`. + +### Making changes + +1. Make your desired changes to the library, following the structure and coding guidelines. +1. Write unit tests for your changes in the `tests/` directory. + +### Testing + +Before submitting your changes, ensure that all tests pass: + +```bash +pytest tests/ +``` + +### Pull requests + +1. Commit your changes and push them to your branch. +1. Create a pull request from your branch to the `main` branch. +1. The CI/CD pipeline will automatically run tests on your pull request. +1. Once the tests pass and your code is reviewed, your contribution will be merged into the main repository. + +### Update your project dependency + +Once a new version of the `custom-package-demo` library is released, you need to update your Airflow project to use it. + +1. Update the version constraint in your `requirements.txt` file. +1. After updating the package, it's crucial to test your project to ensure that the new version works as expected and doesn't introduce any compatibility issues or bugs. Run `astro dev start` to test changes locally. +1. Commit your changes to the requirements.txt file to Git so that other developers can easily reproduce your project environment. + +## Library structure + +The library is organized as follows: + +``` +custom_package_demo/ +├── hooks/ +│ ├── __init__.py +│ └── ... +├── notifiers/ +│ ├── __init__.py +│ └── ... +├── operators/ +│ ├── __init__.py +│ └── ... +├── sensors/ +│ ├── __init__.py +│ └── ... +├── tests/ +│ ├── hooks/ +│ │ ├── __init__.py +│ │ └── ... +│ ├── notifiers/ +│ │ ├── __init__.py +│ │ └── ... +│ ├── operators/ +│ │ ├── __init__.py +│ │ └── ... +│ ├── sensors/ +│ │ ├── __init__.py +│ │ └── ... +├── README.md +├── pyproject.toml +└── setup.py +``` + +- `tests/` directory contains unit tests for each component +- `pyproject.toml` is used for package configuration and versioning +- `setup.py` is used for backwards compatibility of newer `pyproject.toml` syntax + +Happy coding! + +## How do I ...? + +Explain usage of custom components here... diff --git a/democorp_airflow/__init__.py b/democorp_airflow/__init__.py new file mode 100644 index 0000000..31a3aae --- /dev/null +++ b/democorp_airflow/__init__.py @@ -0,0 +1,6 @@ +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("afaas-custom-package") +except PackageNotFoundError: + __version__ = "unknown version" diff --git a/democorp_airflow/hooks/__init__.py b/democorp_airflow/hooks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/democorp_airflow/hooks/example.py b/democorp_airflow/hooks/example.py new file mode 100644 index 0000000..1856724 --- /dev/null +++ b/democorp_airflow/hooks/example.py @@ -0,0 +1,42 @@ +from airflow.hooks.base import BaseHook +from airflow.models import Connection + + +class ExampleHook(BaseHook): + """...""" + + def __init__(self, conn_id: str) -> None: + """ + Example Airflow hook. Implement your own business logic here. + + :param conn_id: Airflow connection id + """ + super().__init__() + self._conn_id = conn_id + + self._conn: Connection | None = None + + @property + def conn(self) -> Connection: + """ + Cache connection to avoid re-fetching multiple times. + + :return: Airflow connection object + """ + if self._conn is None: + self._conn = self.get_connection(conn_id=self._conn_id) + return self._conn + + def test_connection(self) -> tuple[bool, str]: + """ + Verify a connection. + + :return: + """ + try: + # ... Implement your business logic to validate a connection here ... + _ = self.conn + return True, "Connection successful." + except Exception as e: + self.log.warning("Failed connection verification.") + return False, str(e) diff --git a/democorp_airflow/notifiers/__init__.py b/democorp_airflow/notifiers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/democorp_airflow/operators/__init__.py b/democorp_airflow/operators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/democorp_airflow/operators/example.py b/democorp_airflow/operators/example.py new file mode 100644 index 0000000..b30685f --- /dev/null +++ b/democorp_airflow/operators/example.py @@ -0,0 +1,68 @@ +from airflow.models import BaseOperator +from airflow.utils.context import Context + + +class ExampleOperator(BaseOperator): + """ + This operator prints an ASCII art Airflow logo as an example. + + Example usage: + ``` + from democorp_airflow.operators.example import ExampleOperator + + mytask = ExampleOperator(task_id="mytask") + ``` + """ + + def execute(self, context: Context) -> None: + # ASCII logo generated using + # https://icon-icons.com/icon/apache-airflow-logo/145494 + # and https://ascii-generator.site + airflow_logo = """ +*@@@@@@@%%%%##***++==--::. :#@%= +#@@@%*****####%%%@@@@@@@@@@@@%#*+-:. :#@@@@@ + -%@@#- ..:-==+*#%@@@@@#*=: :#@@%-@@@ + -#@@%- .:=*%@@@@#=. :#@@%- @@@ + -%@@%- :=#@@@%= .*@@@= @@% + :#@@%- -*@@@+. .*@@%= .@@# + :#@@%- +@@@*. .+@@@= :@@* + :*@@%= :%@@@*. .*@@%= -@@+ + .*@@@= %@@@@#: .+@@@= +@@= + :*@@@= =@@#@@@#: +@@@= #@@: + .*@@%= .@@# =%@@#: +@@@= @@@ + .+@@@+ @@@ -%@@#: +@@@+ .@@% + .*@@@+:--: .@@@ -%@@#@@@+ =@@* + .#@@@@@@@#+. -@@# -%@@@. #@@- + .*@@@= .=#@@@#: *@@= @@@ @@@ + .*@@%= .=@@@#: .@@@ .%@@= =@@* + .*@@@= =@@@* #@@- -@@@= @@@. + .+@@@= .#@@%: -@@# -#@@%: +@@* + .*@@@*-------:. -@@@+ .@@@. .=%@@%- -@@% + +@@@@@@@@@@@@@@@@@%#+=: .%@@* #@@- .=%@@@#- .@@@: + +@@@@#+-:......:-=+*#@@@@@%*=-@@@%@@@%+%@@@%=. %@@= + =@@@*- :-+#%@@@@@#*%@@@@#=. :@@@= + .#@@#. *@@+ .@@@ +@@@: + :%@@= -+%@@@@#*@@@@@%*=-. .+@@@* + :@@@: -*@@@@*#@@@%@@@++#@@@@@%#+=-:. .:=+%@@@*. + %@@- .+@@@@*: .@@@. +@@@: .:=*#@@@@@@@@@@@@@@@@@@#. + *@@+ .*@@@*- %@@- =@@@= .:--=====-+@@@#: + -@@% .*@@@= +@@* .%@@#. -%@@#: + %@@: :@@@+ .@@@ *@@@= :#@@%- + -@@* :@@@: #@@- :#@@%= :#@@%- + #@@: #@@: :@@% :#@@@*- :#@@%- + .@@@ #@@%- +@@+ .+%@@@@@@@%. + =@@* =@@@%@@%- #@@: :===+@@@+. + *@@- =@@@*. :*@@%- #@@. +@@@+. + %@@. =%@@*. :*@@%= +@@: .+@@@*. +.@@@ -%@@*. .*@@@*@@+ =@@@*. +:@@# -%@@#: :#@@@@@. =@@@*. +=@@* :%@@#: :#@@@@: =%@@*: +*@@+ -%@@#: .*@@@+ =@@@#: +#@@= :#@@%: .+@@@*: =%@@#: +%@@- :%@@%- =%@@@*=. -%@@%- +%@@: :#@@%- .=#@@@@#+-: -%@@#- +@@@=#@@%- :=*%@@@@@%#++=-:.. -#@@%- +@@@@@%- .:=+*%@@@@@@@@@@@%%###****+++++%@@@# +=%@#= ..:--=++***###%%%%@@@@@@@@@= +""" + print(airflow_logo) diff --git a/democorp_airflow/sensors/__init__.py b/democorp_airflow/sensors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..335a56d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,54 @@ +[build-system] +requires = [ + "setuptools", + "setuptools-scm" +] +build-backend = "setuptools.build_meta" + +[project] +name = "democorp_airflow" +authors = [{ name = "John Doe", email = "johndoe@democorp.com" }] +description = "Project to demonstrate how to structure an Airflow library with common components." +requires-python = ">=3.9" +classifiers = [ + "Framework :: Apache Airflow", + "Programming Language :: Python :: 3", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + ] +dependencies = [ + "apache-airflow >= 2.7.0", + 'importlib-metadata; python_version < "3.8"', + 'typing_extensions; python_version < "3.8"', +] +dynamic = ["version", "readme"] + +[project.urls] +Homepage = "https://github.com/astronomer/custom-package-demo" + +[project.optional-dependencies] +dev = ["black", "isort", "ruff"] +test = ["pytest", "pytest-cov"] +docs = ["sphinx"] + +[tool.setuptools.dynamic] +readme = { file = ["README.md"] } + +[tool.ruff] +# https://docs.astral.sh/ruff/configuration/#using-pyprojecttoml +line-length = 110 + +[tool.ruff.lint] +select = [ + "E", # pycodestyle + "F", # Pyflakes + "UP", # pyupgrade + "B", # flake8-bugbear + "SIM", # flake8-simplify + "I", # isort +] + +[tool.setuptools.packages] +find = {} + +[tool.setuptools_scm] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6068493 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d4ea765 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,64 @@ +import os + +import pytest + +# These have to come before any Airflow imports +os.environ["AIRFLOW__CORE__UNIT_TEST_MODE"] = "True" + +from airflow.models import ( # noqa: E402 + DagModel, + DagRun, + DagTag, + TaskInstance, + TaskReschedule, + Trigger, + Variable, + XCom, +) +from airflow.utils import db # noqa: E402 +from airflow.utils.session import create_session # noqa: E402 +from airflow.utils.timezone import datetime # noqa: E402 + + +@pytest.fixture(scope="session", autouse=True) +def airflow_db(): + """ + Session-wide fixture that ensures the database is set up for tests. + """ + db.resetdb() + + +@pytest.fixture +def session(): + """ + Creates a SQLAlchemy session. + """ + with create_session() as session: + yield session + + +@pytest.fixture(autouse=True) +def clean_db(session): + """ + Clears test database after each test is run. + """ + session.query(Trigger).delete() + session.query(DagRun).delete() + session.query(TaskInstance).delete() + session.query(DagTag).delete() + session.query(DagModel).delete() + session.query(TaskReschedule).delete() + session.query(Variable).delete() + session.query(XCom).delete() + + +@pytest.fixture +def context(): + """ + Creates a context with default execution date. + """ + context = { + "execution_date": datetime(2023, 1, 1), + "logical_date": datetime(2023, 1, 1), + } + yield context diff --git a/tests/democorp_airflow/__init__.py b/tests/democorp_airflow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/democorp_airflow/hooks/__init__.py b/tests/democorp_airflow/hooks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/democorp_airflow/hooks/test_example.py b/tests/democorp_airflow/hooks/test_example.py new file mode 100644 index 0000000..7a45508 --- /dev/null +++ b/tests/democorp_airflow/hooks/test_example.py @@ -0,0 +1,24 @@ +import json +from unittest import mock + +from democorp_airflow.hooks.example import ExampleHook + + +@mock.patch.dict( + "os.environ", + AIRFLOW_CONN_MYDB=json.dumps( + { + "conn_type": "my-conn-type", + "login": "my-login", + "password": "my-password", + "host": "my-host", + "port": 1234, + "schema": "my-schema", + "extra": {"param1": "val1", "param2": "val2"}, + } + ), +) +def test_examplehook(): + """This test verifies fetching of a connection.""" + test_hook = ExampleHook(conn_id="mydb") + test_hook.test_connection() diff --git a/tests/democorp_airflow/operators/__init__.py b/tests/democorp_airflow/operators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/democorp_airflow/operators/test_example.py b/tests/democorp_airflow/operators/test_example.py new file mode 100644 index 0000000..f518edf --- /dev/null +++ b/tests/democorp_airflow/operators/test_example.py @@ -0,0 +1,6 @@ +from democorp_airflow.operators.example import ExampleOperator + + +def test_exampleoperator(): + test = ExampleOperator(task_id="test") + test.execute(context={})