Skip to content

Commit

Permalink
Native LDF Parser (#218)
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlov721 authored Jan 6, 2025
1 parent 2a64197 commit 1cbc795
Show file tree
Hide file tree
Showing 14 changed files with 107 additions and 36 deletions.
11 changes: 5 additions & 6 deletions luxonis_ml/data/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@

from luxonis_ml.data import BaseDataset, DatasetIterator
from luxonis_ml.enums.enums import DatasetType
from luxonis_ml.typing import PathType

ParserOutput = Tuple[DatasetIterator, List[str], Dict[str, Dict], List[str]]
ParserOutput = Tuple[DatasetIterator, Dict[str, Dict], List[Path]]
"""Type alias for parser output.
Contains a function to create the annotation generator, list of classes
Expand Down Expand Up @@ -81,7 +80,7 @@ def from_split(self, **kwargs) -> ParserOutput:
"""
...

def _parse_split(self, **kwargs) -> List[str]:
def _parse_split(self, **kwargs) -> List[Path]:
"""Parses data in a split subdirectory.
@type kwargs: Dict[str, Any]
Expand All @@ -90,7 +89,7 @@ def _parse_split(self, **kwargs) -> List[str]:
@rtype: List[str]
@return: List of added images.
"""
generator, _, skeletons, added_images = self.from_split(**kwargs)
generator, skeletons, added_images = self.from_split(**kwargs)
self.dataset.add(self._add_task(generator))
if skeletons:
for skeleton in skeletons.values():
Expand Down Expand Up @@ -151,7 +150,7 @@ def parse_dir(self, dataset_dir: Path, **kwargs) -> BaseDataset:
return self.dataset

@staticmethod
def _get_added_images(generator: DatasetIterator) -> List[PathType]:
def _get_added_images(generator: DatasetIterator) -> List[Path]:
"""Returns list of unique images added by the generator
function.
Expand All @@ -162,7 +161,7 @@ def _get_added_images(generator: DatasetIterator) -> List[PathType]:
"""
return list(
set(
item["file"] if isinstance(item, dict) else item.file
Path(item["file"] if isinstance(item, dict) else item.file)
for item in generator
)
)
Expand Down
4 changes: 2 additions & 2 deletions luxonis_ml/data/parsers/classification_directory_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(class_dir=dataset_dir / "train")
added_val_imgs = self._parse_split(class_dir=dataset_dir / "valid")
added_test_imgs = self._parse_split(class_dir=dataset_dir / "test")
Expand Down Expand Up @@ -83,4 +83,4 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), class_names, {}, added_images
return generator(), {}, added_images
5 changes: 2 additions & 3 deletions luxonis_ml/data/parsers/coco_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def from_dir(
use_keypoint_ann: bool = False,
keypoint_ann_paths: Optional[Dict[str, str]] = None,
split_val_to_test: bool = True,
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
dir_format, splits = COCOParser._detect_dataset_dir_format(dataset_dir)
if dir_format is None:
raise ValueError("Dataset is not in any expected format.")
Expand Down Expand Up @@ -208,7 +208,6 @@ def from_split(
coco_categories = annotation_data["categories"]
categories = {cat["id"]: cat["name"] for cat in coco_categories}

class_names = list(categories.values())
skeletons = {}
for cat in coco_categories:
if "keypoints" in cat.keys() and "skeleton" in cat.keys():
Expand Down Expand Up @@ -311,7 +310,7 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), class_names, skeletons, added_images
return generator(), skeletons, added_images


def clean_annotations(annotation_path: Path) -> Path:
Expand Down
6 changes: 2 additions & 4 deletions luxonis_ml/data/parsers/create_ml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
annotation_path=dataset_dir
Expand Down Expand Up @@ -89,7 +89,6 @@ def from_split(
with open(annotation_path) as f:
annotations_data = json.load(f)

class_names = set()
images_annotations = []
for annotations in annotations_data:
path = image_dir.absolute().resolve() / annotations["image"]
Expand All @@ -103,7 +102,6 @@ def from_split(
for curr_ann in annotations["annotations"]:
class_name = curr_ann["label"]
curr_annotations["classes"].append(class_name)
class_names.add(class_name)

bbox_ann = curr_ann["coordinates"]
bbox_xywh = [
Expand Down Expand Up @@ -134,4 +132,4 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), list(class_names), {}, added_images
return generator(), {}, added_images
4 changes: 2 additions & 2 deletions luxonis_ml/data/parsers/darknet_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
classes_path=dataset_dir / "train" / "_darknet.labels",
Expand Down Expand Up @@ -109,4 +109,4 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), list(class_names.values()), {}, added_images
return generator(), {}, added_images
5 changes: 4 additions & 1 deletion luxonis_ml/data/parsers/luxonis_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from .coco_parser import COCOParser
from .create_ml_parser import CreateMLParser
from .darknet_parser import DarknetParser
from .native_parser import NativeParser
from .segmentation_mask_directory_parser import SegmentationMaskDirectoryParser
from .solo_parser import SOLOParser
from .tensorflow_csv_parser import TensorflowCSVParser
Expand Down Expand Up @@ -54,6 +55,7 @@ class LuxonisParser(Generic[T]):
DatasetType.CLSDIR: ClassificationDirectoryParser,
DatasetType.SEGMASK: SegmentationMaskDirectoryParser,
DatasetType.SOLO: SOLOParser,
DatasetType.NATIVE: NativeParser,
}

def __init__(
Expand Down Expand Up @@ -142,7 +144,8 @@ def __init__(
dataset_name = dataset_name or name.replace(" ", "_").split(".")[0]

self.dataset = self.dataset_constructor(
dataset_name=dataset_name, **kwargs
dataset_name=dataset_name, # type: ignore
**kwargs,
)
self.parser = self.parsers[self.dataset_type](
self.dataset, self.dataset_type, task_name
Expand Down
73 changes: 73 additions & 0 deletions luxonis_ml/data/parsers/native_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from luxonis_ml.data import DatasetIterator

from .base_parser import BaseParser, ParserOutput


class NativeParser(BaseParser):
"""Parses directory with native LDF annotations.
Expected format::
dataset_dir/
├── train/
│ └── annotations.json
├── valid/
└── test/
The annotations are stored in a single JSON file as a list of dictionaries
in the same format as the output of the generator function used
in L{BaseDataset.add} method.
"""

@staticmethod
def validate_split(split_path: Path) -> Optional[Dict[str, Any]]:
annotation_path = split_path / "annotations.json"
if not annotation_path.exists():
return None
return {"annotation_path": annotation_path}

@staticmethod
def validate(dataset_dir: Path) -> bool:
for split in ["train", "valid", "test"]:
split_path = dataset_dir / split
if NativeParser.validate_split(split_path) is None:
return False
return True

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
annotation_dir=dataset_dir / "train",
)
added_val_imgs = self._parse_split(
image_dir=dataset_dir / "valid",
annotation_dir=dataset_dir / "valid",
)
added_test_imgs = self._parse_split(
image_dir=dataset_dir / "test",
annotation_dir=dataset_dir / "test",
)
return added_train_imgs, added_val_imgs, added_test_imgs

def from_split(self, annotation_path: Path) -> ParserOutput:
"""Parses annotations from LDF Format.
@type annotation_path: C{Path}
@param annotation_dir: Path to the JSON file with annotations.
@rtype: L{ParserOutput}
@return: Annotation generator, list of classes names, skeleton
dictionary for keypoints and list of added images.
"""

def generator() -> DatasetIterator:
yield from json.loads(annotation_path.read_text())

added_images = self._get_added_images(generator())

return generator(), {}, added_images
7 changes: 4 additions & 3 deletions luxonis_ml/data/parsers/segmentation_mask_directory_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
seg_dir=dataset_dir / "train",
Expand Down Expand Up @@ -96,7 +96,8 @@ def from_split(
dictionary for keypoints and list of added images
"""

idx_class = " Class" # NOTE: space prefix included
# NOTE: space prefix included
idx_class = " Class"

df = pl.read_csv(classes_path).filter(pl.col(idx_class).is_not_null())
class_names = df[idx_class].to_list()
Expand All @@ -122,4 +123,4 @@ def generator() -> DatasetIterator:
}

added_images = self._get_added_images(generator())
return generator(), class_names, {}, added_images
return generator(), {}, added_images
8 changes: 4 additions & 4 deletions luxonis_ml/data/parsers/solo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
"""Parses all present data to L{LuxonisDataset} format.
@type dataset_dir: str
@param dataset_dir: Path to source dataset directory.
@rtype: Tuple[List[str], List[str], List[str]]
@rtype: Tuple[List[Path], List[Path], List[Path]]
@return: Tuple with added images for train, valid and test
splits.
"""
Expand Down Expand Up @@ -138,7 +138,7 @@ def from_split(self, split_path: Path) -> ParserOutput:
)
# TODO: We make an assumption here that bbox class_names are also valid for all other annotation types in the dataset. Is this OK?
# TODO: Can we imagine a case where classes between annotation types are different? Which class names to return in this case?
if class_names == []:
if not class_names:
raise Exception("No class_names identified. ")

keypoint_labels = self._get_solo_keypoint_names(
Expand Down Expand Up @@ -256,7 +256,7 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), class_names, skeletons, added_images
return generator(), skeletons, added_images

def _get_solo_annotation_types(
self, annotation_definitions_dict: dict
Expand Down
5 changes: 2 additions & 3 deletions luxonis_ml/data/parsers/tensorflow_csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
annotation_path=dataset_dir / "train" / "_annotations.csv",
Expand Down Expand Up @@ -84,7 +84,6 @@ def from_split(
)
images_annotations = {}

class_names = set(df["class"])
for row in df.rows(named=True):
path = str(image_dir / str(row["filename"]))
if path not in images_annotations:
Expand Down Expand Up @@ -129,4 +128,4 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), list(class_names), {}, added_images
return generator(), {}, added_images
6 changes: 2 additions & 4 deletions luxonis_ml/data/parsers/voc_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
annotation_dir=dataset_dir / "train",
Expand Down Expand Up @@ -77,7 +77,6 @@ def from_split(
dictionary for keypoints and list of added images.
"""

class_names = set()
images_annotations = []
for anno_xml in annotation_dir.glob("*.xml"):
annotation_data = ET.parse(anno_xml)
Expand All @@ -98,7 +97,6 @@ def from_split(
for object_item in root.findall("object"):
class_name = self._xml_find(object_item, "name")
curr_annotations["classes"].append(class_name)
class_names.add(class_name)

bbox_info = object_item.find("bndbox")
if bbox_info is not None:
Expand Down Expand Up @@ -137,7 +135,7 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), list(class_names), {}, added_images
return generator(), {}, added_images

@staticmethod
def _xml_find(root: ET.Element, tag: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions luxonis_ml/data/parsers/yolov4_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[List[str], List[str], List[str]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "train",
annotation_path=dataset_dir / "train" / "_annotations.txt",
Expand Down Expand Up @@ -127,4 +127,4 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), list(class_names.values()), {}, added_images
return generator(), {}, added_images
4 changes: 2 additions & 2 deletions luxonis_ml/data/parsers/yolov6_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def validate(dataset_dir: Path) -> bool:

def from_dir(
self, dataset_dir: Path
) -> Tuple[Optional[List[str]], Optional[List[str]], Optional[List[str]]]:
) -> Tuple[List[Path], List[Path], List[Path]]:
classes_path = dataset_dir / "data.yaml"
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "images" / "train",
Expand Down Expand Up @@ -138,4 +138,4 @@ def generator() -> DatasetIterator:

added_images = self._get_added_images(generator())

return generator(), list(class_names.values()), {}, added_images
return generator(), {}, added_images
1 change: 1 addition & 0 deletions luxonis_ml/enums/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ class DatasetType(str, Enum):
CLSDIR = "clsdir"
SEGMASK = "segmask"
SOLO = "solo"
NATIVE = "native"

0 comments on commit 1cbc795

Please sign in to comment.