Skip to content

Commit

Permalink
hotifx: adding file names to be ignored by the COCO parser (#199)
Browse files Browse the repository at this point in the history
  • Loading branch information
JSabadin authored Nov 6, 2024
1 parent 7702187 commit 41f35fe
Showing 1 changed file with 61 additions and 1 deletion.
62 changes: 61 additions & 1 deletion luxonis_ml/data/parsers/coco_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,10 @@ def from_dir(
and dir_format == Format.FIFTYONE
else train_paths["annotation_path"]
)
cleaned_annotation_path = clean_annotations(train_ann_path)
added_train_imgs = self._parse_split(
image_dir=train_paths["image_dir"],
annotation_path=train_ann_path,
annotation_path=cleaned_annotation_path,
)

val_paths = COCOParser.validate_split(dataset_dir / splits[1])
Expand Down Expand Up @@ -199,6 +200,7 @@ def from_split(
@return: Annotation generator, list of classes names, skeleton
dictionary for keypoints and list of added images.
"""

with open(annotation_path) as f:
annotation_data = json.load(f)

Expand Down Expand Up @@ -236,6 +238,8 @@ def generator() -> DatasetIterator:
img_w = img["width"]

for i, ann in enumerate(img_anns):
if ann.get("iscrowd", True):
continue
class_name = categories[ann["category_id"]]
yield {
"file": path,
Expand Down Expand Up @@ -316,3 +320,59 @@ def generator() -> DatasetIterator:
added_images = self._get_added_images(generator())

return generator(), class_names, skeletons, added_images


def clean_annotations(annotation_path: Path) -> Path:
"""Cleans annotations by removing images that are known to cause
issues.
@type annotation_path: Path
@param annotation_path: Path to the annotation JSON file.
@rtype: Path
@return: Path to the cleaned annotation JSON file
("labels_fixed.json").
"""

files_to_avoid = [
"000000341448.jpg",
"000000279522.jpg",
"000000090169.jpg",
"000000321238.jpg",
"000000242807.jpg",
"000000297126.jpg",
"000000411274.jpg",
"000000407259.jpg",
"000000446141.jpg",
"000000373199.jpg",
"000000410810.jpg",
"000000397819.jpg",
"000000578492.jpg",
"000000531721.jpg",
]
with open(annotation_path, "r") as f:
annotation_data = json.load(f)

filtered_images = [
img
for img in annotation_data["images"]
if img["file_name"] not in files_to_avoid
]

if len(filtered_images) == len(annotation_data["images"]):
return annotation_path

filtered_image_ids = {img["id"] for img in filtered_images}
filtered_annotations = [
ann
for ann in annotation_data["annotations"]
if ann["image_id"] in filtered_image_ids
]

annotation_data["images"] = filtered_images
annotation_data["annotations"] = filtered_annotations

cleaned_annotation_path = annotation_path.with_name("labels_fixed.json")
with open(cleaned_annotation_path, "w") as f:
json.dump(annotation_data, f)

return cleaned_annotation_path

0 comments on commit 41f35fe

Please sign in to comment.