Skip to content

Commit

Permalink
fix docs, address additional potential race conditions
Browse files Browse the repository at this point in the history
  • Loading branch information
JSabadin committed Jan 7, 2025
1 parent 05f64c8 commit 2ec48a9
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 10 deletions.
28 changes: 19 additions & 9 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tempfile
from collections import defaultdict
from contextlib import suppress
from enum import Enum
from functools import cached_property
from pathlib import Path
from typing import (
Expand Down Expand Up @@ -36,6 +35,7 @@
BucketStorage,
BucketType,
ParquetFileManager,
UpdateMode,
infer_task,
warn_on_duplicates,
)
Expand Down Expand Up @@ -69,11 +69,6 @@ class Metadata(TypedDict):
skeletons: Dict[str, Skeletons]


class UpdateMode(Enum):
ALWAYS = "always"
IF_EMPTY = "if_empty"


class LuxonisDataset(BaseDataset):
def __init__(
self,
Expand Down Expand Up @@ -292,8 +287,14 @@ def _get_file_index(
def _get_file_index(
self, lazy: bool = False
) -> Optional[Union[pl.DataFrame, pl.LazyFrame]]:
path = get_file(
self.fs, "metadata/file_index.parquet", self.metadata_path
path = (
self.base_path
/ "data"
/ self.team_id
/ "datasets"
/ self.dataset_name
/ "metadata"
/ "file_index.parquet"
)
if path is not None and path.exists():
if not lazy:
Expand Down Expand Up @@ -435,8 +436,17 @@ def get_tasks(self) -> List[str]:
def sync_from_cloud(
self, update_mode: UpdateMode = UpdateMode.IF_EMPTY
) -> None:
"""Downloads data from a remote cloud bucket."""
"""Synchronizes the dataset from a remote cloud bucket to the
local directory.
This method performs the download only if local data is empty, or always downloads
depending on the provided update_mode.
@type update_mode: UpdateMode
@param update_mode: Specifies the update behavior.
- UpdateMode.IF_EMPTY: Downloads data only if the local dataset is empty.
- UpdateMode.ALWAYS: Always downloads and overwrites the local dataset.
"""
if not self.is_remote:
logger.warning("This is a local dataset! Cannot sync from cloud.")
return
Expand Down
1 change: 1 addition & 0 deletions luxonis_ml/data/loaders/luxonis_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def __init__(
@type width: Optional[int]
@param width: The width of the output images. Defaults to
C{None}.
@type update_mode: UpdateMode
@param update_mode: Enum that determines the sync mode:
- UpdateMode.ALWAYS: Force a fresh download
- UpdateMode.IF_EMPTY: Skip downloading if local data exists
Expand Down
3 changes: 2 additions & 1 deletion luxonis_ml/data/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .data_utils import infer_task, rgb_to_bool_masks, warn_on_duplicates
from .enums import BucketStorage, BucketType, ImageType, MediaType
from .enums import BucketStorage, BucketType, ImageType, MediaType, UpdateMode
from .parquet import ParquetDetection, ParquetFileManager, ParquetRecord
from .task_utils import (
get_task_name,
Expand All @@ -24,6 +24,7 @@
"ImageType",
"BucketType",
"BucketStorage",
"UpdateMode",
"get_task_name",
"task_type_iterator",
"task_is_metadata",
Expand Down
7 changes: 7 additions & 0 deletions luxonis_ml/data/utils/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,10 @@ class BucketStorage(Enum):
S3 = "s3"
GCS = "gcs"
AZURE_BLOB = "azure"


class UpdateMode(Enum):
"""Update mode for the dataset."""

ALWAYS = "always"
IF_EMPTY = "if_empty"

0 comments on commit 2ec48a9

Please sign in to comment.