Skip to content

Commit

Permalink
Default empty task name (#222)
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlov721 authored Jan 14, 2025
1 parent 539fde4 commit d2840b2
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 57 deletions.
7 changes: 4 additions & 3 deletions luxonis_ml/data/datasets/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,15 +498,15 @@ def validate_path(cls, path: FilePath) -> FilePath:
np.load(path)
except Exception as e:
raise ValueError(
f"Failed to load array annotation from {path}"
f"Failed to load array annotation from {path}."
) from e
return path


class DatasetRecord(BaseModelExtraForbid):
files: Dict[str, FilePath]
annotation: Optional[Detection] = None
task: str = "detection"
task: str = ""

@property
def file(self) -> FilePath:
Expand Down Expand Up @@ -564,7 +564,8 @@ def check_valid_identifier(name: str, *, label: str) -> None:
Albumentations requires that the names of the targets
passed as `additional_targets` are valid Python identifiers.
"""
if not name.replace("-", "_").isidentifier():
name = name.replace("-", "_")
if name and not name.isidentifier():
raise ValueError(
f"{label} can only contain alphanumeric characters, "
"underscores, and dashes. Additionaly, the first character "
Expand Down
5 changes: 3 additions & 2 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,10 +728,10 @@ def delete_dataset(self, *, delete_remote: bool = False) -> None:
"""
if not self.is_remote:
shutil.rmtree(self.path)
logger.info(f"Deleted dataset {self.dataset_name}")
logger.info(f"Deleted dataset '{self.dataset_name}'")

if self.is_remote and delete_remote:
logger.info(f"Deleting dataset {self.dataset_name} from cloud")
logger.info(f"Deleting dataset '{self.dataset_name}' from cloud")
assert self.path
assert self.dataset_name
assert self.local_path
Expand Down Expand Up @@ -828,6 +828,7 @@ def _add_process_batch(
def add(
self, generator: DatasetIterator, batch_size: int = 1_000_000
) -> Self:
logger.info(f"Adding data to dataset '{self.dataset_name}'...")
index = self._get_file_index(sync_from_cloud=True)
new_index = {"uuid": [], "file": [], "original_filepath": []}
processed_uuids = set()
Expand Down
2 changes: 0 additions & 2 deletions luxonis_ml/data/loaders/luxonis_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,6 @@ def __init__(

self.class_mappings: Dict[str, Dict[str, int]] = {}
for task in self.df["task_name"].unique():
if not task:
continue
class_mapping = {
class_: i
for i, class_ in enumerate(
Expand Down
33 changes: 19 additions & 14 deletions luxonis_ml/data/parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -89,17 +90,20 @@ def _parse_split(self, **kwargs) -> List[Path]:
@rtype: List[str]
@return: List of added images.
"""
generator, skeletons, added_images = self.from_split(**kwargs)
self.dataset.add(self._add_task(generator))
if skeletons:
for skeleton in skeletons.values():
self.dataset.set_skeletons(
skeleton.get("labels"),
skeleton.get("edges"),
self.dataset_type.value,
)

return added_images
old_cwd = os.getcwd()
try:
generator, skeletons, added_images = self.from_split(**kwargs)
self.dataset.add(self._add_task(generator))
if skeletons:
for skeleton in skeletons.values():
self.dataset.set_skeletons(
skeleton.get("labels"),
skeleton.get("edges"),
self.dataset_type.value,
)
return added_images
finally:
os.chdir(old_cwd)

def parse_split(
self,
Expand Down Expand Up @@ -240,10 +244,11 @@ def _add_task(self, generator: DatasetIterator) -> DatasetIterator:
@return: Generator function with added task
"""

task_name = self.task_name or self.dataset_type.value
task_name = self.task_name or ""
for item in generator:
if isinstance(item, dict):
item["task"] = task_name
else:
if "task" not in item:
item["task"] = task_name
elif not item.task:
item.task = task_name
yield item
6 changes: 5 additions & 1 deletion luxonis_ml/data/parsers/native_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -65,8 +66,11 @@ def from_split(self, annotation_path: Path) -> ParserOutput:
dictionary for keypoints and list of added images.
"""

data = json.loads(annotation_path.read_text())
os.chdir(annotation_path.parent)

def generator() -> DatasetIterator:
yield from json.loads(annotation_path.read_text())
yield from data

added_images = self._get_added_images(generator())

Expand Down
9 changes: 5 additions & 4 deletions luxonis_ml/data/utils/visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,21 +329,22 @@ def create_mask(

for task, arr in task_type_iterator(labels, "segmentation"):
task_name = get_task_name(task)
images[task_name] = create_mask(
image_name = task_name if task_name and not blend_all else "labels"
images[image_name] = create_mask(
image, arr, task_name, is_instance=False
)

for task, arr in task_type_iterator(labels, "instance_segmentation"):
task_name = get_task_name(task)
image_name = task_name if not blend_all else "labels"
image_name = task_name if task_name and not blend_all else "labels"
curr_image = images.get(image_name, image.copy())
images[image_name] = create_mask(
curr_image, arr, task_name, is_instance=True
)

for task, arr in task_type_iterator(labels, "boundingbox"):
task_name = get_task_name(task)
image_name = task_name if not blend_all else "labels"
image_name = task_name if task_name and not blend_all else "labels"
curr_image = images.get(image_name, image.copy())

draw_function = cv2.rectangle
Expand Down Expand Up @@ -374,7 +375,7 @@ def create_mask(

for task, arr in task_type_iterator(labels, "keypoints"):
task_name = get_task_name(task)
image_name = task_name if not blend_all else "labels"
image_name = task_name if task_name and not blend_all else "labels"
curr_image = images.get(image_name, image.copy())

task_classes = class_names[task_name]
Expand Down
6 changes: 3 additions & 3 deletions tests/test_data/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def compare_parquet_rows(
{
"file": Path("tests/data/tempdir/left.jpg"),
"source_name": "image",
"task_name": "detection",
"task_name": "",
"class_name": None,
"instance_id": None,
"task_type": None,
Expand All @@ -90,7 +90,7 @@ def compare_parquet_rows(
{
"file": Path("tests/data/tempdir/left.jpg"),
"source_name": "image",
"task_name": "detection",
"task_name": "",
"class_name": "person",
"instance_id": -1,
"task_type": "boundingbox",
Expand All @@ -99,7 +99,7 @@ def compare_parquet_rows(
{
"file": Path("tests/data/tempdir/left.jpg"),
"source_name": "image",
"task_name": "detection",
"task_name": "",
"class_name": "person",
"instance_id": -1,
"task_type": "classification",
Expand Down
26 changes: 16 additions & 10 deletions tests/test_data/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def test_dataset(
bucket_storage=bucket_storage,
delete_existing=True,
delete_remote=True,
task_name="coco",
)
parser.parse()
dataset = LuxonisDataset(dataset_name, bucket_storage=bucket_storage)
Expand Down Expand Up @@ -173,6 +174,7 @@ def test_loader_iterator(storage_url: str, tempdir: Path):
save_dir=tempdir,
dataset_type=DatasetType.COCO,
delete_existing=True,
task_name="coco",
).parse()
loader = LuxonisLoader(dataset)

Expand Down Expand Up @@ -417,12 +419,12 @@ def generator():
compare_loader_output(
loader,
{
"detection/classification",
"detection/boundingbox",
"detection/driver/boundingbox",
"detection/driver/keypoints",
"detection/license_plate/boundingbox",
"detection/license_plate/metadata/text",
"/classification",
"/boundingbox",
"/driver/boundingbox",
"/driver/keypoints",
"/license_plate/boundingbox",
"/license_plate/metadata/text",
},
)

Expand Down Expand Up @@ -482,10 +484,10 @@ def generator():
compare_loader_output(
loader,
{
"detection/classification",
"detection/boundingbox",
"detection/keypoints",
"detection/segmentation",
"/classification",
"/boundingbox",
"/keypoints",
"/segmentation",
},
)

Expand Down Expand Up @@ -533,6 +535,8 @@ def generator1():

df_cloned = cloned_dataset._load_df_offline()
df_original = dataset._load_df_offline()
assert df_cloned is not None
assert df_original is not None
assert df_cloned.equals(df_original)


Expand Down Expand Up @@ -620,4 +624,6 @@ def generator2():
df_cloned_merged = dataset1.merge_with(
dataset2, inplace=True
)._load_df_offline()
assert df_merged is not None
assert df_cloned_merged is not None
assert df_merged.equals(df_cloned_merged)
44 changes: 28 additions & 16 deletions tests/test_data/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Final, List
from typing import Final, Set

import pytest

Expand Down Expand Up @@ -26,70 +26,82 @@ def prepare_dir():
(
DatasetType.COCO,
"COCO_people_subset.zip",
["boundingbox", "keypoints", "segmentation", "classification"],
{"boundingbox", "keypoints", "segmentation", "classification"},
),
(
DatasetType.COCO,
"Thermal_Dogs_and_People.v1-resize-416x416.coco.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.VOC,
"Thermal_Dogs_and_People.v1-resize-416x416.voc.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.DARKNET,
"Thermal_Dogs_and_People.v1-resize-416x416.darknet.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.YOLOV4,
"Thermal_Dogs_and_People.v1-resize-416x416.yolov4pytorch.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.YOLOV6,
"Thermal_Dogs_and_People.v1-resize-416x416.mt-yolov6.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.CREATEML,
"Thermal_Dogs_and_People.v1-resize-416x416.createml.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.TFCSV,
"Thermal_Dogs_and_People.v1-resize-416x416.tensorflow.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.SEGMASK,
"D2_Tile.png-mask-semantic.zip",
["segmentation", "classification"],
{"segmentation", "classification"},
),
(
DatasetType.CLSDIR,
"Flowers_Classification.v2-raw.folder.zip",
["classification"],
{"classification"},
),
(
DatasetType.SOLO,
"D2_ParkingLot.zip",
["boundingbox", "segmentation", "classification", "keypoints"],
{"boundingbox", "segmentation", "classification", "keypoints"},
),
(
DatasetType.COCO,
"roboflow://team-roboflow/coco-128/2/coco",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.NATIVE,
"D2_ParkingLot_Native.zip",
{
"boundingbox",
"instance_segmentation",
"classification",
"keypoints",
"metadata/color",
"metadata/brand",
},
),
],
)
def test_dir_parser(
dataset_type: DatasetType,
url: str,
expected_task_types: List[str],
storage_url,
expected_task_types: Set[str],
storage_url: str,
):
if not url.startswith("roboflow://"):
url = f"{storage_url}/{url}"
Expand All @@ -108,5 +120,5 @@ def test_dir_parser(
loader = LuxonisLoader(dataset)
_, ann = next(iter(loader))
task_types = {get_task_type(task) for task in ann}
assert task_types == set(expected_task_types)
assert task_types == expected_task_types
dataset.delete_dataset()
1 change: 1 addition & 0 deletions tests/test_data/test_task_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def generator4():
path = make_image(i)
yield {
"file": str(path),
"task": "detection",
"annotation": {
"class": "bike",
"boundingbox": {"x": 0.9, "y": 0.8, "w": 0.1, "h": 0.4},
Expand Down
3 changes: 1 addition & 2 deletions tests/test_data/test_utils/test_visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,7 @@ def test_visualize():
expected_semantic = np.stack([expected_semantic] * 3, axis=-1)
expected_images = {
"image": image.copy(),
"semantic": expected_semantic,
"labels": expected_labels,
"labels": expected_semantic + expected_labels,
}
class_names = {
"task": ["class_name"],
Expand Down

0 comments on commit d2840b2

Please sign in to comment.