Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update Python connector base image to v4.0.0 with Python 3.11 #52663

Merged
merged 9 commits into from
Feb 9, 2025
113 changes: 59 additions & 54 deletions airbyte-ci/connectors/base_images/README.md

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion airbyte-ci/connectors/base_images/base_images/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ async def _publish(

async def execute_async_command(command_fn: Callable, *args, **kwargs):
"""This is a helper function that will execute a command function in an async context, required by the use of Dagger."""
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client:
# NOTE: Dagger logs using Rich now, and two rich apps don't play well with each other.
# Logging into a file makes the CLI experience tolerable.
async with dagger.Connection(dagger.Config(log_output=open("dagger.log", "w"))) as dagger_client:
natikgadzhi marked this conversation as resolved.
Show resolved Hide resolved
await command_fn(dagger_client, *args, **kwargs)


Expand Down
11 changes: 5 additions & 6 deletions airbyte-ci/connectors/base_images/base_images/python/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from base_images import bases, published_image
from base_images import sanity_checks as base_sanity_checks
from base_images.python import sanity_checks as python_sanity_checks
from base_images.root_images import PYTHON_3_10_14
from base_images.root_images import PYTHON_3_11_11


class AirbyteManifestOnlyConnectorBaseImage(bases.AirbyteConnectorBaseImage):
Expand All @@ -20,7 +20,7 @@ class AirbyteManifestOnlyConnectorBaseImage(bases.AirbyteConnectorBaseImage):


class AirbytePythonConnectorBaseImage(bases.AirbyteConnectorBaseImage):
root_image: Final[published_image.PublishedImage] = PYTHON_3_10_14
root_image: Final[published_image.PublishedImage] = PYTHON_3_11_11
repository: Final[str] = "airbyte/python-connector-base"
pip_cache_name: Final[str] = "pip_cache"
nltk_data_path: Final[str] = "/usr/share/nltk_data"
Expand Down Expand Up @@ -107,7 +107,7 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container:
.with_env_variable("POETRY_VIRTUALENVS_CREATE", "false")
.with_env_variable("POETRY_VIRTUALENVS_IN_PROJECT", "false")
.with_env_variable("POETRY_NO_INTERACTION", "1")
.with_exec(["pip", "install", "poetry==1.6.1"])
.with_exec(["pip", "install", "poetry==1.8.4"])
.with_exec(["sh", "-c", "apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y && apt-get clean"])
.with_exec(["sh", "-c", "apt-get install -y socat=1.7.4.4-2"])
# Install CDK system dependencies
Expand All @@ -125,15 +125,14 @@ async def run_sanity_checks(self, platform: dagger.Platform):
container = self.get_container(platform)
await base_sanity_checks.check_timezone_is_utc(container)
await base_sanity_checks.check_a_command_is_available_using_version_option(container, "bash")
await python_sanity_checks.check_python_version(container, "3.10.14")
await python_sanity_checks.check_python_version(container, "3.11.11")
await python_sanity_checks.check_pip_version(container, "24.0")
await base_sanity_checks.check_user_exists(container, self.USER, expected_uid=self.USER_ID, expected_gid=self.USER_ID)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.nltk_data_path)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.CACHE_DIR_PATH)
await base_sanity_checks.check_user_can_write_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_cant_write_dir(container, self.USER, self.CACHE_DIR_PATH)
await python_sanity_checks.check_poetry_version(container, "1.6.1")
await python_sanity_checks.check_poetry_version(container, "1.8.4")
await python_sanity_checks.check_python_image_has_expected_env_vars(container)
await base_sanity_checks.check_a_command_is_available_using_version_option(container, "socat", "-V")
await base_sanity_checks.check_socat_version(container, "1.7.4.4")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,10 @@ async def check_python_image_has_expected_env_vars(python_image_container: dagge
"""
expected_env_vars = {
"PYTHON_VERSION",
"PYTHON_PIP_VERSION",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no clue where they are set, and they don't seem to be used anywhere. Code for the sake of code must die.

"PYTHON_GET_PIP_SHA256",
"PYTHON_GET_PIP_URL",
"HOME",
"PATH",
"LANG",
"GPG_KEY",
"PYTHON_SETUPTOOLS_VERSION",
}
# It's not suboptimal to call printenv multiple times because the printenv output is cached.
for expected_env_var in expected_env_vars:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
sha="2407c61b1a18067393fecd8a22cf6fceede893b6aaca817bf9fbfe65e33614a3",
)

PYTHON_3_11_11 = PublishedImage(
registry="docker.io",
repository="python",
tag="3.11.11-slim-bookworm",
sha="6ed5bff4d7d377e2a27d9285553b8c21cfccc4f00881de1b24c9bc8d90016e82",
)

AMAZON_CORRETTO_21_AL_2023 = PublishedImage(
registry="docker.io",
repository="amazoncorretto",
Expand Down
18 changes: 0 additions & 18 deletions airbyte-ci/connectors/base_images/base_images/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,24 +146,6 @@ async def check_user_can_read_dir(container: dagger.Container, user: str, dir_pa
raise errors.SanityCheckError(f"{dir_path} is not readable by {user}.")


async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_path: str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dagger started outputing exit code 1, but not throwing the exception as our code assumed. Removing the whole check because I don't see how this is useful.

"""Check that the given user can't write files to a given directory.

Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.

Raises:
errors.SanityCheckError: Raised if the user could write a file in the given directory.
"""
try:
await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"])
except dagger.ExecError:
return
raise errors.SanityCheckError(f"{dir_path} is writable by {user}.")


async def check_user_can_write_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user has write permissions on files in a given directory.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ However, we do artificially generate Dockerfiles for debugging and documentation

### `{{ registry.ConnectorBaseImageClass.repository }}`

| Version | Published | Docker Image Address | Changelog |
| Version | Published | Docker Image Address | Changelog |
|---------|-----------|--------------|-----------|
{%- for entry in registry.entries %}
| {{ entry.version }} | {{ "✅" if entry.published else "❌" }}| {{ entry.published_docker_image.address }} | {{ entry.changelog_entry.changelog_entry }} |
Expand All @@ -54,7 +54,7 @@ However, we do artificially generate Dockerfiles for debugging and documentation
It will:
- Prompt you to pick which base image you'd like to publish.
- Prompt you for a major/minor/patch/pre-release version bump.
- Prompt you for a changelog message.
- Prompt you for a changelog message.
- Run the sanity checks on the new version.
- Optional: Publish the new version to DockerHub.
- Regenerate the docs and the registry json file.
Expand All @@ -79,6 +79,9 @@ poetry run mypy base_images --check-untyped-defs

## CHANGELOG

### 1.6.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Funny, but it seems that the changelog for the package is in a template file. Pretty insane tbh.

- Add a Python base image 4.0.0 with Python 3.11.11.

### 1.4.0
- Declare a base image for our java connectors.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
[
{
"version": "4.0.0",
"changelog_entry": "Python 3.11.11",
"dockerfile_example": "FROM docker.io/python:3.11.11-slim-bookworm@sha256:6ed5bff4d7d377e2a27d9285553b8c21cfccc4f00881de1b24c9bc8d90016e82\nRUN ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime\nRUN adduser --uid 1000 --system --group --no-create-home airbyte\nRUN mkdir --mode 755 /custom_cache\nRUN mkdir --mode 755 /airbyte\nRUN chown airbyte:airbyte /airbyte\nENV PIP_CACHE_DIR=/custom_cache/pip\nRUN pip install --upgrade pip==24.0 setuptools==70.0.0\nENV POETRY_VIRTUALENVS_CREATE=false\nENV POETRY_VIRTUALENVS_IN_PROJECT=false\nENV POETRY_NO_INTERACTION=1\nRUN pip install poetry==1.8.4\nRUN sh -c apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y && apt-get clean\nRUN sh -c apt-get install -y socat=1.7.4.4-2\nRUN sh -c apt-get update && apt-get install -y tesseract-ocr=5.3.0-2 poppler-utils=22.12.0-2+b1\nRUN mkdir -p 755 /usr/share/nltk_data"
},
{
"version": "3.0.0",
"changelog_entry": "Create airbyte user",
Expand Down
Loading
Loading