hotifx: adding file names to be ignored by the COCO parser (#199)

luxonis · Nov 6, 2024 · 41f35fe · 41f35fe
1 parent 7702187
commit 41f35fe
Showing 1 changed file with 61 additions and 1 deletion.
diff --git a/luxonis_ml/data/parsers/coco_parser.py b/luxonis_ml/data/parsers/coco_parser.py
@@ -136,9 +136,10 @@ def from_dir(
             and dir_format == Format.FIFTYONE
             else train_paths["annotation_path"]
         )
+        cleaned_annotation_path = clean_annotations(train_ann_path)
         added_train_imgs = self._parse_split(
             image_dir=train_paths["image_dir"],
-            annotation_path=train_ann_path,
+            annotation_path=cleaned_annotation_path,
         )
 
         val_paths = COCOParser.validate_split(dataset_dir / splits[1])
@@ -199,6 +200,7 @@ def from_split(
         @return: Annotation generator, list of classes names, skeleton
             dictionary for keypoints and list of added images.
         """
+
         with open(annotation_path) as f:
             annotation_data = json.load(f)
 
@@ -236,6 +238,8 @@ def generator() -> DatasetIterator:
                 img_w = img["width"]
 
                 for i, ann in enumerate(img_anns):
+                    if ann.get("iscrowd", True):
+                        continue
                     class_name = categories[ann["category_id"]]
                     yield {
                         "file": path,
@@ -316,3 +320,59 @@ def generator() -> DatasetIterator:
         added_images = self._get_added_images(generator())
 
         return generator(), class_names, skeletons, added_images
+
+
+def clean_annotations(annotation_path: Path) -> Path:
+    """Cleans annotations by removing images that are known to cause
+    issues.
+
+    @type annotation_path: Path
+    @param annotation_path: Path to the annotation JSON file.
+    @rtype: Path
+    @return: Path to the cleaned annotation JSON file
+        ("labels_fixed.json").
+    """
+
+    files_to_avoid = [
+        "000000341448.jpg",
+        "000000279522.jpg",
+        "000000090169.jpg",
+        "000000321238.jpg",
+        "000000242807.jpg",
+        "000000297126.jpg",
+        "000000411274.jpg",
+        "000000407259.jpg",
+        "000000446141.jpg",
+        "000000373199.jpg",
+        "000000410810.jpg",
+        "000000397819.jpg",
+        "000000578492.jpg",
+        "000000531721.jpg",
+    ]
+    with open(annotation_path, "r") as f:
+        annotation_data = json.load(f)
+
+    filtered_images = [
+        img
+        for img in annotation_data["images"]
+        if img["file_name"] not in files_to_avoid
+    ]
+
+    if len(filtered_images) == len(annotation_data["images"]):
+        return annotation_path
+
+    filtered_image_ids = {img["id"] for img in filtered_images}
+    filtered_annotations = [
+        ann
+        for ann in annotation_data["annotations"]
+        if ann["image_id"] in filtered_image_ids
+    ]
+
+    annotation_data["images"] = filtered_images
+    annotation_data["annotations"] = filtered_annotations
+
+    cleaned_annotation_path = annotation_path.with_name("labels_fixed.json")
+    with open(cleaned_annotation_path, "w") as f:
+        json.dump(annotation_data, f)
+
+    return cleaned_annotation_path