Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Default empty task name #222

Merged
merged 13 commits into from
Jan 14, 2025
7 changes: 4 additions & 3 deletions luxonis_ml/data/datasets/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,15 +498,15 @@ def validate_path(cls, path: FilePath) -> FilePath:
np.load(path)
except Exception as e:
raise ValueError(
f"Failed to load array annotation from {path}"
f"Failed to load array annotation from {path}."
) from e
return path


class DatasetRecord(BaseModelExtraForbid):
files: Dict[str, FilePath]
annotation: Optional[Detection] = None
task: str = "detection"
task: str = ""

@property
def file(self) -> FilePath:
Expand Down Expand Up @@ -564,7 +564,8 @@ def check_valid_identifier(name: str, *, label: str) -> None:
Albumentations requires that the names of the targets
passed as `additional_targets` are valid Python identifiers.
"""
if not name.replace("-", "_").isidentifier():
name = name.replace("-", "_")
if name and not name.isidentifier():
raise ValueError(
f"{label} can only contain alphanumeric characters, "
"underscores, and dashes. Additionaly, the first character "
Expand Down
5 changes: 3 additions & 2 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,10 +728,10 @@ def delete_dataset(self, *, delete_remote: bool = False) -> None:
"""
if not self.is_remote:
shutil.rmtree(self.path)
logger.info(f"Deleted dataset {self.dataset_name}")
logger.info(f"Deleted dataset '{self.dataset_name}'")

if self.is_remote and delete_remote:
logger.info(f"Deleting dataset {self.dataset_name} from cloud")
logger.info(f"Deleting dataset '{self.dataset_name}' from cloud")
assert self.path
assert self.dataset_name
assert self.local_path
Expand Down Expand Up @@ -828,6 +828,7 @@ def _add_process_batch(
def add(
self, generator: DatasetIterator, batch_size: int = 1_000_000
) -> Self:
logger.info(f"Adding data to dataset '{self.dataset_name}'...")
index = self._get_file_index(sync_from_cloud=True)
new_index = {"uuid": [], "file": [], "original_filepath": []}
processed_uuids = set()
Expand Down
2 changes: 0 additions & 2 deletions luxonis_ml/data/loaders/luxonis_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,6 @@ def __init__(

self.class_mappings: Dict[str, Dict[str, int]] = {}
for task in self.df["task_name"].unique():
if not task:
continue
class_mapping = {
class_: i
for i, class_ in enumerate(
Expand Down
33 changes: 19 additions & 14 deletions luxonis_ml/data/parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -89,17 +90,20 @@
@rtype: List[str]
@return: List of added images.
"""
generator, skeletons, added_images = self.from_split(**kwargs)
self.dataset.add(self._add_task(generator))
if skeletons:
for skeleton in skeletons.values():
self.dataset.set_skeletons(
skeleton.get("labels"),
skeleton.get("edges"),
self.dataset_type.value,
)

return added_images
old_cwd = os.getcwd()
try:
generator, skeletons, added_images = self.from_split(**kwargs)
self.dataset.add(self._add_task(generator))
if skeletons:
for skeleton in skeletons.values():
self.dataset.set_skeletons(
skeleton.get("labels"),
skeleton.get("edges"),
self.dataset_type.value,
)
return added_images
finally:
os.chdir(old_cwd)

def parse_split(
self,
Expand Down Expand Up @@ -240,10 +244,11 @@
@return: Generator function with added task
"""

task_name = self.task_name or self.dataset_type.value
task_name = self.task_name or ""
for item in generator:
if isinstance(item, dict):
item["task"] = task_name
else:
if "task" not in item:
item["task"] = task_name
elif not item.task:

Check warning on line 252 in luxonis_ml/data/parsers/base_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/base_parser.py#L252

Added line #L252 was not covered by tests
item.task = task_name
yield item
6 changes: 5 additions & 1 deletion luxonis_ml/data/parsers/native_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -65,8 +66,11 @@ def from_split(self, annotation_path: Path) -> ParserOutput:
dictionary for keypoints and list of added images.
"""

data = json.loads(annotation_path.read_text())
os.chdir(annotation_path.parent)

def generator() -> DatasetIterator:
yield from json.loads(annotation_path.read_text())
yield from data

added_images = self._get_added_images(generator())

Expand Down
9 changes: 5 additions & 4 deletions luxonis_ml/data/utils/visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,21 +329,22 @@ def create_mask(

for task, arr in task_type_iterator(labels, "segmentation"):
task_name = get_task_name(task)
images[task_name] = create_mask(
image_name = task_name if task_name and not blend_all else "labels"
images[image_name] = create_mask(
image, arr, task_name, is_instance=False
)

for task, arr in task_type_iterator(labels, "instance_segmentation"):
task_name = get_task_name(task)
image_name = task_name if not blend_all else "labels"
image_name = task_name if task_name and not blend_all else "labels"
curr_image = images.get(image_name, image.copy())
images[image_name] = create_mask(
curr_image, arr, task_name, is_instance=True
)

for task, arr in task_type_iterator(labels, "boundingbox"):
task_name = get_task_name(task)
image_name = task_name if not blend_all else "labels"
image_name = task_name if task_name and not blend_all else "labels"
curr_image = images.get(image_name, image.copy())

draw_function = cv2.rectangle
Expand Down Expand Up @@ -374,7 +375,7 @@ def create_mask(

for task, arr in task_type_iterator(labels, "keypoints"):
task_name = get_task_name(task)
image_name = task_name if not blend_all else "labels"
image_name = task_name if task_name and not blend_all else "labels"
curr_image = images.get(image_name, image.copy())

task_classes = class_names[task_name]
Expand Down
6 changes: 3 additions & 3 deletions tests/test_data/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def compare_parquet_rows(
{
"file": Path("tests/data/tempdir/left.jpg"),
"source_name": "image",
"task_name": "detection",
"task_name": "",
"class_name": None,
"instance_id": None,
"task_type": None,
Expand All @@ -90,7 +90,7 @@ def compare_parquet_rows(
{
"file": Path("tests/data/tempdir/left.jpg"),
"source_name": "image",
"task_name": "detection",
"task_name": "",
"class_name": "person",
"instance_id": -1,
"task_type": "boundingbox",
Expand All @@ -99,7 +99,7 @@ def compare_parquet_rows(
{
"file": Path("tests/data/tempdir/left.jpg"),
"source_name": "image",
"task_name": "detection",
"task_name": "",
"class_name": "person",
"instance_id": -1,
"task_type": "classification",
Expand Down
26 changes: 16 additions & 10 deletions tests/test_data/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def test_dataset(
bucket_storage=bucket_storage,
delete_existing=True,
delete_remote=True,
task_name="coco",
)
parser.parse()
dataset = LuxonisDataset(dataset_name, bucket_storage=bucket_storage)
Expand Down Expand Up @@ -173,6 +174,7 @@ def test_loader_iterator(storage_url: str, tempdir: Path):
save_dir=tempdir,
dataset_type=DatasetType.COCO,
delete_existing=True,
task_name="coco",
).parse()
loader = LuxonisLoader(dataset)

Expand Down Expand Up @@ -417,12 +419,12 @@ def generator():
compare_loader_output(
loader,
{
"detection/classification",
"detection/boundingbox",
"detection/driver/boundingbox",
"detection/driver/keypoints",
"detection/license_plate/boundingbox",
"detection/license_plate/metadata/text",
"/classification",
"/boundingbox",
"/driver/boundingbox",
"/driver/keypoints",
"/license_plate/boundingbox",
"/license_plate/metadata/text",
},
)

Expand Down Expand Up @@ -482,10 +484,10 @@ def generator():
compare_loader_output(
loader,
{
"detection/classification",
"detection/boundingbox",
"detection/keypoints",
"detection/segmentation",
"/classification",
"/boundingbox",
"/keypoints",
"/segmentation",
},
)

Expand Down Expand Up @@ -533,6 +535,8 @@ def generator1():

df_cloned = cloned_dataset._load_df_offline()
df_original = dataset._load_df_offline()
assert df_cloned is not None
assert df_original is not None
assert df_cloned.equals(df_original)


Expand Down Expand Up @@ -620,4 +624,6 @@ def generator2():
df_cloned_merged = dataset1.merge_with(
dataset2, inplace=True
)._load_df_offline()
assert df_merged is not None
assert df_cloned_merged is not None
assert df_merged.equals(df_cloned_merged)
44 changes: 28 additions & 16 deletions tests/test_data/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Final, List
from typing import Final, Set

import pytest

Expand Down Expand Up @@ -26,70 +26,82 @@ def prepare_dir():
(
DatasetType.COCO,
"COCO_people_subset.zip",
["boundingbox", "keypoints", "segmentation", "classification"],
{"boundingbox", "keypoints", "segmentation", "classification"},
),
(
DatasetType.COCO,
"Thermal_Dogs_and_People.v1-resize-416x416.coco.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.VOC,
"Thermal_Dogs_and_People.v1-resize-416x416.voc.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.DARKNET,
"Thermal_Dogs_and_People.v1-resize-416x416.darknet.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.YOLOV4,
"Thermal_Dogs_and_People.v1-resize-416x416.yolov4pytorch.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.YOLOV6,
"Thermal_Dogs_and_People.v1-resize-416x416.mt-yolov6.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.CREATEML,
"Thermal_Dogs_and_People.v1-resize-416x416.createml.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.TFCSV,
"Thermal_Dogs_and_People.v1-resize-416x416.tensorflow.zip",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.SEGMASK,
"D2_Tile.png-mask-semantic.zip",
["segmentation", "classification"],
{"segmentation", "classification"},
),
(
DatasetType.CLSDIR,
"Flowers_Classification.v2-raw.folder.zip",
["classification"],
{"classification"},
),
(
DatasetType.SOLO,
"D2_ParkingLot.zip",
["boundingbox", "segmentation", "classification", "keypoints"],
{"boundingbox", "segmentation", "classification", "keypoints"},
),
(
DatasetType.COCO,
"roboflow://team-roboflow/coco-128/2/coco",
["boundingbox", "classification"],
{"boundingbox", "classification"},
),
(
DatasetType.NATIVE,
"D2_ParkingLot_Native.zip",
{
"boundingbox",
"instance_segmentation",
"classification",
"keypoints",
"metadata/color",
"metadata/brand",
},
),
],
)
def test_dir_parser(
dataset_type: DatasetType,
url: str,
expected_task_types: List[str],
storage_url,
expected_task_types: Set[str],
storage_url: str,
):
if not url.startswith("roboflow://"):
url = f"{storage_url}/{url}"
Expand All @@ -108,5 +120,5 @@ def test_dir_parser(
loader = LuxonisLoader(dataset)
_, ann = next(iter(loader))
task_types = {get_task_type(task) for task in ann}
assert task_types == set(expected_task_types)
assert task_types == expected_task_types
dataset.delete_dataset()
1 change: 1 addition & 0 deletions tests/test_data/test_task_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def generator4():
path = make_image(i)
yield {
"file": str(path),
"task": "detection",
"annotation": {
"class": "bike",
"boundingbox": {"x": 0.9, "y": 0.8, "w": 0.1, "h": 0.4},
Expand Down
3 changes: 1 addition & 2 deletions tests/test_data/test_utils/test_visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,7 @@ def test_visualize():
expected_semantic = np.stack([expected_semantic] * 3, axis=-1)
expected_images = {
"image": image.copy(),
"semantic": expected_semantic,
"labels": expected_labels,
"labels": expected_semantic + expected_labels,
}
class_names = {
"task": ["class_name"],
Expand Down
Loading