From 792898dd11db1493c9865f474948cb1ae2cb7ce8 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 13:28:43 +0000 Subject: [PATCH 1/9] update docker --- Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index e032536..e38048a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,15 +5,16 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends git && \ rm -rf /var/lib/apt/lists/* +# Copy the application files +COPY src /app/src +COPY pyproject.toml app/pyproject.toml +COPY README.md app/README.md + # Set the working directory in the container WORKDIR /app # Copy the requirements file and install -COPY pyproject.toml ./ RUN pip install --no-cache-dir . -# Copy the application files -COPY src /app/src - # Set the entrypoint command to run the app CMD ["python", "src/cloudcasting_app/app.py"] \ No newline at end of file From d411a192a21d8ccdab30f441e936f15ea74d0f00 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 13:41:34 +0000 Subject: [PATCH 2/9] use pip install -e . --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e38048a..f181a00 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ COPY README.md app/README.md WORKDIR /app # Copy the requirements file and install -RUN pip install --no-cache-dir . +RUN pip install -e . # Set the entrypoint command to run the app CMD ["python", "src/cloudcasting_app/app.py"] \ No newline at end of file From 37e5634cbfefec09e159ab70daf14557c66f0f6c Mon Sep 17 00:00:00 2001 From: James Fulton Date: Tue, 4 Feb 2025 14:40:35 +0000 Subject: [PATCH 3/9] save over old files + save multiple copies --- src/cloudcasting_app/app.py | 19 +++++++++++++++++-- src/cloudcasting_app/data.py | 2 +- tests/test_app.py | 12 +++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/cloudcasting_app/app.py b/src/cloudcasting_app/app.py index 5a08b58..468f367 100644 --- a/src/cloudcasting_app/app.py +++ b/src/cloudcasting_app/app.py @@ -3,7 +3,7 @@ This app expects these environmental variables to be available: SATELLITE_ZARR_PATH (str): The path of the input satellite data - OUTPUT_PREDICTION_ZARR_PATH (str): The path to save the predictions to + OUTPUT_PREDICTION_DIRECTORY (str): The path of the directory to save the predictions to """ from importlib.metadata import PackageNotFoundError, version @@ -12,6 +12,7 @@ import yaml import hydra import typer +import fsspec import pandas as pd import xarray as xr @@ -131,7 +132,21 @@ def app(t0=None): ds_y_hat = da_y_hat.to_dataset(name="sat_pred") ds_y_hat.sat_pred.attrs.update(ds.data.attrs) - ds_y_hat.to_zarr(os.environ["OUTPUT_PREDICTION_ZARR_PATH"]) + # Save predictions to latest path and to path with timestring + out_dir = os.environ["OUTPUT_PREDICTION_DIRECTORY"] + + latest_zarr_path = f"{out_dir}/latest.zarr" + t0_string_zarr_path = t0.strftime(f"{out_dir}/%Y-%m-%dT%H:%M.zarr") + + fs, _ = fsspec.core.url_to_fs(out_dir) + for path in [latest_zarr_path, t0_string_zarr_path]: + + # Remove the path if it exists already + if fs.exists(path): + logger.info(f"Removing path: {path}") + fs.rm(path, recursive=True) + + ds_y_hat.to_zarr(path) if __name__ == "__main__": diff --git a/src/cloudcasting_app/data.py b/src/cloudcasting_app/data.py index d890f4f..0164319 100644 --- a/src/cloudcasting_app/data.py +++ b/src/cloudcasting_app/data.py @@ -105,7 +105,7 @@ def download_all_sat_data() -> bool: # download 5 minute satellite data sat_5_dl_path = os.environ["SATELLITE_ZARR_PATH"] - fs = fsspec.open(sat_5_dl_path).fs + fs, _ = fsspec.core.url_to_fs(sat_5_dl_path) if fs.exists(sat_5_dl_path): sat_available = True logger.info(f"Downloading 5-minute satellite data") diff --git a/tests/test_app.py b/tests/test_app.py index fc10c02..5fad7ff 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -14,15 +14,21 @@ def test_app(sat_5_data, tmp_path, test_t0): # In production sat zarr is zipped os.environ["SATELLITE_ZARR_PATH"] = "temp_sat.zarr.zip" - os.environ["OUTPUT_PREDICTION_ZARR_PATH"] = "sat_prediction.zarr" + os.environ["OUTPUT_PREDICTION_DIRECTORY"] = f"{tmp_path}" with zarr.storage.ZipStore("temp_sat.zarr.zip", mode="x") as store: sat_5_data.to_zarr(store) app() - + + # Check the two output files have been created + latest_zarr_path = f"{tmp_path}/latest.zarr" + t0_string_zarr_path = test_t0.strftime(f"{tmp_path}/%Y-%m-%dT%H:%M.zarr") + assert os.path.exists(latest_zarr_path) + assert os.path.exists(t0_string_zarr_path) + # Load the predictions and check them - ds_y_hat = xr.open_zarr(os.environ["OUTPUT_PREDICTION_ZARR_PATH"]) + ds_y_hat = xr.open_zarr(latest_zarr_path) assert "sat_pred" in ds_y_hat assert ( From 2a01095e324ac2a4cb66843ddb4f062617264704 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 15:00:03 +0000 Subject: [PATCH 4/9] install unzip --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index f181a00..8b0eee2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ FROM python:3.11-slim as base RUN apt-get update && \ apt-get install -y --no-install-recommends git && \ rm -rf /var/lib/apt/lists/* +RUN apt-get install unzip -y # Copy the application files COPY src /app/src From 6d60036b9b78e15f93efcb412e43cc9689e26929 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 15:03:51 +0000 Subject: [PATCH 5/9] udpate docker --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8b0eee2..5d22a0f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ # Use an official Python runtime as a parent image FROM python:3.11-slim as base -RUN apt-get update && \ - apt-get install -y --no-install-recommends git && \ - rm -rf /var/lib/apt/lists/* +RUN apt-get update RUN apt-get install unzip -y +RUN apt-get install -y --no-install-recommends git && \ + rm -rf /var/lib/apt/lists/* # Copy the application files COPY src /app/src From 8c27e7a1b57fe5632d432f5ca09c7618f9d75f4a Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 16:52:19 +0000 Subject: [PATCH 6/9] add s3fs --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 67f5a8d..17a0d11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,8 @@ classifiers = [ ] dependencies = [ "torch[cpu]", - "fsspec[s3]", + "fsspec", + "s3fs", "xarray", "zarr<3.0", "numpy", From 71322ff5918055135cca847e4aa110f750aa67b3 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 16:53:15 +0000 Subject: [PATCH 7/9] add loggin basic config --- src/cloudcasting_app/app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cloudcasting_app/app.py b/src/cloudcasting_app/app.py index 468f367..3a03973 100644 --- a/src/cloudcasting_app/app.py +++ b/src/cloudcasting_app/app.py @@ -32,6 +32,10 @@ # --------------------------------------------------------------------------- # GLOBAL SETTINGS +logging.basicConfig( + level=getattr(logging, os.getenv("LOGLEVEL", "INFO")), + format="[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s", +) # Create a logger logger = logging.getLogger(__name__) From cb4e0f24c379fae03a1e8ee3ef0f51ff489a8179 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 16:58:33 +0000 Subject: [PATCH 8/9] tidy --- src/cloudcasting_app/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudcasting_app/app.py b/src/cloudcasting_app/app.py index 3a03973..fffb302 100644 --- a/src/cloudcasting_app/app.py +++ b/src/cloudcasting_app/app.py @@ -142,7 +142,7 @@ def app(t0=None): latest_zarr_path = f"{out_dir}/latest.zarr" t0_string_zarr_path = t0.strftime(f"{out_dir}/%Y-%m-%dT%H:%M.zarr") - fs, _ = fsspec.core.url_to_fs(out_dir) + fs = fsspec.open(out_dir).fs for path in [latest_zarr_path, t0_string_zarr_path]: # Remove the path if it exists already From 14448e16b4223594046fd333dd4290f1c85c5bf5 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 4 Feb 2025 17:37:47 +0000 Subject: [PATCH 9/9] text --- src/cloudcasting_app/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudcasting_app/app.py b/src/cloudcasting_app/app.py index fffb302..2e0e0ac 100644 --- a/src/cloudcasting_app/app.py +++ b/src/cloudcasting_app/app.py @@ -136,7 +136,7 @@ def app(t0=None): ds_y_hat = da_y_hat.to_dataset(name="sat_pred") ds_y_hat.sat_pred.attrs.update(ds.data.attrs) - # Save predictions to latest path and to path with timestring + # Save predictions to the latest path and to path with timestring out_dir = os.environ["OUTPUT_PREDICTION_DIRECTORY"] latest_zarr_path = f"{out_dir}/latest.zarr"