From 2ec48a9974c8047dbc7f4fb51bb633ad66ac6b4d Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Tue, 7 Jan 2025 17:00:13 +0100 Subject: [PATCH] fix docs, address additional potential race conditions --- luxonis_ml/data/datasets/luxonis_dataset.py | 28 ++++++++++++++------- luxonis_ml/data/loaders/luxonis_loader.py | 1 + luxonis_ml/data/utils/__init__.py | 3 ++- luxonis_ml/data/utils/enums.py | 7 ++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/luxonis_ml/data/datasets/luxonis_dataset.py b/luxonis_ml/data/datasets/luxonis_dataset.py index 917ff06c..09bf8ad6 100644 --- a/luxonis_ml/data/datasets/luxonis_dataset.py +++ b/luxonis_ml/data/datasets/luxonis_dataset.py @@ -5,7 +5,6 @@ import tempfile from collections import defaultdict from contextlib import suppress -from enum import Enum from functools import cached_property from pathlib import Path from typing import ( @@ -36,6 +35,7 @@ BucketStorage, BucketType, ParquetFileManager, + UpdateMode, infer_task, warn_on_duplicates, ) @@ -69,11 +69,6 @@ class Metadata(TypedDict): skeletons: Dict[str, Skeletons] -class UpdateMode(Enum): - ALWAYS = "always" - IF_EMPTY = "if_empty" - - class LuxonisDataset(BaseDataset): def __init__( self, @@ -292,8 +287,14 @@ def _get_file_index( def _get_file_index( self, lazy: bool = False ) -> Optional[Union[pl.DataFrame, pl.LazyFrame]]: - path = get_file( - self.fs, "metadata/file_index.parquet", self.metadata_path + path = ( + self.base_path + / "data" + / self.team_id + / "datasets" + / self.dataset_name + / "metadata" + / "file_index.parquet" ) if path is not None and path.exists(): if not lazy: @@ -435,8 +436,17 @@ def get_tasks(self) -> List[str]: def sync_from_cloud( self, update_mode: UpdateMode = UpdateMode.IF_EMPTY ) -> None: - """Downloads data from a remote cloud bucket.""" + """Synchronizes the dataset from a remote cloud bucket to the + local directory. + This method performs the download only if local data is empty, or always downloads + depending on the provided update_mode. + + @type update_mode: UpdateMode + @param update_mode: Specifies the update behavior. + - UpdateMode.IF_EMPTY: Downloads data only if the local dataset is empty. + - UpdateMode.ALWAYS: Always downloads and overwrites the local dataset. + """ if not self.is_remote: logger.warning("This is a local dataset! Cannot sync from cloud.") return diff --git a/luxonis_ml/data/loaders/luxonis_loader.py b/luxonis_ml/data/loaders/luxonis_loader.py index 05321a32..628a38e5 100644 --- a/luxonis_ml/data/loaders/luxonis_loader.py +++ b/luxonis_ml/data/loaders/luxonis_loader.py @@ -85,6 +85,7 @@ def __init__( @type width: Optional[int] @param width: The width of the output images. Defaults to C{None}. + @type update_mode: UpdateMode @param update_mode: Enum that determines the sync mode: - UpdateMode.ALWAYS: Force a fresh download - UpdateMode.IF_EMPTY: Skip downloading if local data exists diff --git a/luxonis_ml/data/utils/__init__.py b/luxonis_ml/data/utils/__init__.py index f11079e1..39967645 100644 --- a/luxonis_ml/data/utils/__init__.py +++ b/luxonis_ml/data/utils/__init__.py @@ -1,5 +1,5 @@ from .data_utils import infer_task, rgb_to_bool_masks, warn_on_duplicates -from .enums import BucketStorage, BucketType, ImageType, MediaType +from .enums import BucketStorage, BucketType, ImageType, MediaType, UpdateMode from .parquet import ParquetDetection, ParquetFileManager, ParquetRecord from .task_utils import ( get_task_name, @@ -24,6 +24,7 @@ "ImageType", "BucketType", "BucketStorage", + "UpdateMode", "get_task_name", "task_type_iterator", "task_is_metadata", diff --git a/luxonis_ml/data/utils/enums.py b/luxonis_ml/data/utils/enums.py index 7ce4e669..17bbe273 100644 --- a/luxonis_ml/data/utils/enums.py +++ b/luxonis_ml/data/utils/enums.py @@ -31,3 +31,10 @@ class BucketStorage(Enum): S3 = "s3" GCS = "gcs" AZURE_BLOB = "azure" + + +class UpdateMode(Enum): + """Update mode for the dataset.""" + + ALWAYS = "always" + IF_EMPTY = "if_empty"