diff --git a/luxonis_ml/data/datasets/annotation.py b/luxonis_ml/data/datasets/annotation.py index e69cbe16..2f85b2ed 100644 --- a/luxonis_ml/data/datasets/annotation.py +++ b/luxonis_ml/data/datasets/annotation.py @@ -498,7 +498,7 @@ def validate_path(cls, path: FilePath) -> FilePath: np.load(path) except Exception as e: raise ValueError( - f"Failed to load array annotation from {path}" + f"Failed to load array annotation from {path}." ) from e return path @@ -506,7 +506,7 @@ def validate_path(cls, path: FilePath) -> FilePath: class DatasetRecord(BaseModelExtraForbid): files: Dict[str, FilePath] annotation: Optional[Detection] = None - task: str = "detection" + task: str = "" @property def file(self) -> FilePath: @@ -564,7 +564,8 @@ def check_valid_identifier(name: str, *, label: str) -> None: Albumentations requires that the names of the targets passed as `additional_targets` are valid Python identifiers. """ - if not name.replace("-", "_").isidentifier(): + name = name.replace("-", "_") + if name and not name.isidentifier(): raise ValueError( f"{label} can only contain alphanumeric characters, " "underscores, and dashes. Additionaly, the first character " diff --git a/luxonis_ml/data/datasets/luxonis_dataset.py b/luxonis_ml/data/datasets/luxonis_dataset.py index 9c7de0ac..5e9af4b1 100644 --- a/luxonis_ml/data/datasets/luxonis_dataset.py +++ b/luxonis_ml/data/datasets/luxonis_dataset.py @@ -728,10 +728,10 @@ def delete_dataset(self, *, delete_remote: bool = False) -> None: """ if not self.is_remote: shutil.rmtree(self.path) - logger.info(f"Deleted dataset {self.dataset_name}") + logger.info(f"Deleted dataset '{self.dataset_name}'") if self.is_remote and delete_remote: - logger.info(f"Deleting dataset {self.dataset_name} from cloud") + logger.info(f"Deleting dataset '{self.dataset_name}' from cloud") assert self.path assert self.dataset_name assert self.local_path @@ -828,6 +828,7 @@ def _add_process_batch( def add( self, generator: DatasetIterator, batch_size: int = 1_000_000 ) -> Self: + logger.info(f"Adding data to dataset '{self.dataset_name}'...") index = self._get_file_index(sync_from_cloud=True) new_index = {"uuid": [], "file": [], "original_filepath": []} processed_uuids = set() diff --git a/luxonis_ml/data/loaders/luxonis_loader.py b/luxonis_ml/data/loaders/luxonis_loader.py index 628a38e5..1210cfac 100644 --- a/luxonis_ml/data/loaders/luxonis_loader.py +++ b/luxonis_ml/data/loaders/luxonis_loader.py @@ -143,8 +143,6 @@ def __init__( self.class_mappings: Dict[str, Dict[str, int]] = {} for task in self.df["task_name"].unique(): - if not task: - continue class_mapping = { class_: i for i, class_ in enumerate( diff --git a/luxonis_ml/data/parsers/base_parser.py b/luxonis_ml/data/parsers/base_parser.py index 37bbd04e..636f90ab 100644 --- a/luxonis_ml/data/parsers/base_parser.py +++ b/luxonis_ml/data/parsers/base_parser.py @@ -1,3 +1,4 @@ +import os from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path @@ -89,17 +90,20 @@ def _parse_split(self, **kwargs) -> List[Path]: @rtype: List[str] @return: List of added images. """ - generator, skeletons, added_images = self.from_split(**kwargs) - self.dataset.add(self._add_task(generator)) - if skeletons: - for skeleton in skeletons.values(): - self.dataset.set_skeletons( - skeleton.get("labels"), - skeleton.get("edges"), - self.dataset_type.value, - ) - - return added_images + old_cwd = os.getcwd() + try: + generator, skeletons, added_images = self.from_split(**kwargs) + self.dataset.add(self._add_task(generator)) + if skeletons: + for skeleton in skeletons.values(): + self.dataset.set_skeletons( + skeleton.get("labels"), + skeleton.get("edges"), + self.dataset_type.value, + ) + return added_images + finally: + os.chdir(old_cwd) def parse_split( self, @@ -240,10 +244,11 @@ def _add_task(self, generator: DatasetIterator) -> DatasetIterator: @return: Generator function with added task """ - task_name = self.task_name or self.dataset_type.value + task_name = self.task_name or "" for item in generator: if isinstance(item, dict): - item["task"] = task_name - else: + if "task" not in item: + item["task"] = task_name + elif not item.task: item.task = task_name yield item diff --git a/luxonis_ml/data/parsers/native_parser.py b/luxonis_ml/data/parsers/native_parser.py index b000c168..5660ed16 100644 --- a/luxonis_ml/data/parsers/native_parser.py +++ b/luxonis_ml/data/parsers/native_parser.py @@ -1,4 +1,5 @@ import json +import os from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -65,8 +66,11 @@ def from_split(self, annotation_path: Path) -> ParserOutput: dictionary for keypoints and list of added images. """ + data = json.loads(annotation_path.read_text()) + os.chdir(annotation_path.parent) + def generator() -> DatasetIterator: - yield from json.loads(annotation_path.read_text()) + yield from data added_images = self._get_added_images(generator()) diff --git a/luxonis_ml/data/utils/visualizations.py b/luxonis_ml/data/utils/visualizations.py index 23393c0c..06562f08 100644 --- a/luxonis_ml/data/utils/visualizations.py +++ b/luxonis_ml/data/utils/visualizations.py @@ -329,13 +329,14 @@ def create_mask( for task, arr in task_type_iterator(labels, "segmentation"): task_name = get_task_name(task) - images[task_name] = create_mask( + image_name = task_name if task_name and not blend_all else "labels" + images[image_name] = create_mask( image, arr, task_name, is_instance=False ) for task, arr in task_type_iterator(labels, "instance_segmentation"): task_name = get_task_name(task) - image_name = task_name if not blend_all else "labels" + image_name = task_name if task_name and not blend_all else "labels" curr_image = images.get(image_name, image.copy()) images[image_name] = create_mask( curr_image, arr, task_name, is_instance=True @@ -343,7 +344,7 @@ def create_mask( for task, arr in task_type_iterator(labels, "boundingbox"): task_name = get_task_name(task) - image_name = task_name if not blend_all else "labels" + image_name = task_name if task_name and not blend_all else "labels" curr_image = images.get(image_name, image.copy()) draw_function = cv2.rectangle @@ -374,7 +375,7 @@ def create_mask( for task, arr in task_type_iterator(labels, "keypoints"): task_name = get_task_name(task) - image_name = task_name if not blend_all else "labels" + image_name = task_name if task_name and not blend_all else "labels" curr_image = images.get(image_name, image.copy()) task_classes = class_names[task_name] diff --git a/tests/test_data/test_annotations.py b/tests/test_data/test_annotations.py index 00237c66..6e7d28c8 100644 --- a/tests/test_data/test_annotations.py +++ b/tests/test_data/test_annotations.py @@ -68,7 +68,7 @@ def compare_parquet_rows( { "file": Path("tests/data/tempdir/left.jpg"), "source_name": "image", - "task_name": "detection", + "task_name": "", "class_name": None, "instance_id": None, "task_type": None, @@ -90,7 +90,7 @@ def compare_parquet_rows( { "file": Path("tests/data/tempdir/left.jpg"), "source_name": "image", - "task_name": "detection", + "task_name": "", "class_name": "person", "instance_id": -1, "task_type": "boundingbox", @@ -99,7 +99,7 @@ def compare_parquet_rows( { "file": Path("tests/data/tempdir/left.jpg"), "source_name": "image", - "task_name": "detection", + "task_name": "", "class_name": "person", "instance_id": -1, "task_type": "classification", diff --git a/tests/test_data/test_dataset.py b/tests/test_data/test_dataset.py index a1fd1c52..464e8832 100644 --- a/tests/test_data/test_dataset.py +++ b/tests/test_data/test_dataset.py @@ -53,6 +53,7 @@ def test_dataset( bucket_storage=bucket_storage, delete_existing=True, delete_remote=True, + task_name="coco", ) parser.parse() dataset = LuxonisDataset(dataset_name, bucket_storage=bucket_storage) @@ -173,6 +174,7 @@ def test_loader_iterator(storage_url: str, tempdir: Path): save_dir=tempdir, dataset_type=DatasetType.COCO, delete_existing=True, + task_name="coco", ).parse() loader = LuxonisLoader(dataset) @@ -417,12 +419,12 @@ def generator(): compare_loader_output( loader, { - "detection/classification", - "detection/boundingbox", - "detection/driver/boundingbox", - "detection/driver/keypoints", - "detection/license_plate/boundingbox", - "detection/license_plate/metadata/text", + "/classification", + "/boundingbox", + "/driver/boundingbox", + "/driver/keypoints", + "/license_plate/boundingbox", + "/license_plate/metadata/text", }, ) @@ -482,10 +484,10 @@ def generator(): compare_loader_output( loader, { - "detection/classification", - "detection/boundingbox", - "detection/keypoints", - "detection/segmentation", + "/classification", + "/boundingbox", + "/keypoints", + "/segmentation", }, ) @@ -533,6 +535,8 @@ def generator1(): df_cloned = cloned_dataset._load_df_offline() df_original = dataset._load_df_offline() + assert df_cloned is not None + assert df_original is not None assert df_cloned.equals(df_original) @@ -620,4 +624,6 @@ def generator2(): df_cloned_merged = dataset1.merge_with( dataset2, inplace=True )._load_df_offline() + assert df_merged is not None + assert df_cloned_merged is not None assert df_merged.equals(df_cloned_merged) diff --git a/tests/test_data/test_parsers.py b/tests/test_data/test_parsers.py index b47d5e20..4ebfd4f1 100644 --- a/tests/test_data/test_parsers.py +++ b/tests/test_data/test_parsers.py @@ -1,4 +1,4 @@ -from typing import Final, List +from typing import Final, Set import pytest @@ -26,70 +26,82 @@ def prepare_dir(): ( DatasetType.COCO, "COCO_people_subset.zip", - ["boundingbox", "keypoints", "segmentation", "classification"], + {"boundingbox", "keypoints", "segmentation", "classification"}, ), ( DatasetType.COCO, "Thermal_Dogs_and_People.v1-resize-416x416.coco.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.VOC, "Thermal_Dogs_and_People.v1-resize-416x416.voc.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.DARKNET, "Thermal_Dogs_and_People.v1-resize-416x416.darknet.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.YOLOV4, "Thermal_Dogs_and_People.v1-resize-416x416.yolov4pytorch.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.YOLOV6, "Thermal_Dogs_and_People.v1-resize-416x416.mt-yolov6.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.CREATEML, "Thermal_Dogs_and_People.v1-resize-416x416.createml.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.TFCSV, "Thermal_Dogs_and_People.v1-resize-416x416.tensorflow.zip", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, ), ( DatasetType.SEGMASK, "D2_Tile.png-mask-semantic.zip", - ["segmentation", "classification"], + {"segmentation", "classification"}, ), ( DatasetType.CLSDIR, "Flowers_Classification.v2-raw.folder.zip", - ["classification"], + {"classification"}, ), ( DatasetType.SOLO, "D2_ParkingLot.zip", - ["boundingbox", "segmentation", "classification", "keypoints"], + {"boundingbox", "segmentation", "classification", "keypoints"}, ), ( DatasetType.COCO, "roboflow://team-roboflow/coco-128/2/coco", - ["boundingbox", "classification"], + {"boundingbox", "classification"}, + ), + ( + DatasetType.NATIVE, + "D2_ParkingLot_Native.zip", + { + "boundingbox", + "instance_segmentation", + "classification", + "keypoints", + "metadata/color", + "metadata/brand", + }, ), ], ) def test_dir_parser( dataset_type: DatasetType, url: str, - expected_task_types: List[str], - storage_url, + expected_task_types: Set[str], + storage_url: str, ): if not url.startswith("roboflow://"): url = f"{storage_url}/{url}" @@ -108,5 +120,5 @@ def test_dir_parser( loader = LuxonisLoader(dataset) _, ann = next(iter(loader)) task_types = {get_task_type(task) for task in ann} - assert task_types == set(expected_task_types) + assert task_types == expected_task_types dataset.delete_dataset() diff --git a/tests/test_data/test_task_ingestion.py b/tests/test_data/test_task_ingestion.py index 1944641e..6f030c80 100644 --- a/tests/test_data/test_task_ingestion.py +++ b/tests/test_data/test_task_ingestion.py @@ -193,6 +193,7 @@ def generator4(): path = make_image(i) yield { "file": str(path), + "task": "detection", "annotation": { "class": "bike", "boundingbox": {"x": 0.9, "y": 0.8, "w": 0.1, "h": 0.4}, diff --git a/tests/test_data/test_utils/test_visualizations.py b/tests/test_data/test_utils/test_visualizations.py index 565a404e..39bb85df 100644 --- a/tests/test_data/test_utils/test_visualizations.py +++ b/tests/test_data/test_utils/test_visualizations.py @@ -210,8 +210,7 @@ def test_visualize(): expected_semantic = np.stack([expected_semantic] * 3, axis=-1) expected_images = { "image": image.copy(), - "semantic": expected_semantic, - "labels": expected_labels, + "labels": expected_semantic + expected_labels, } class_names = { "task": ["class_name"],