diff --git a/luxonis_ml/data/parsers/coco_parser.py b/luxonis_ml/data/parsers/coco_parser.py index 9408e0e0..f0d52d82 100644 --- a/luxonis_ml/data/parsers/coco_parser.py +++ b/luxonis_ml/data/parsers/coco_parser.py @@ -136,9 +136,10 @@ def from_dir( and dir_format == Format.FIFTYONE else train_paths["annotation_path"] ) + cleaned_annotation_path = clean_annotations(train_ann_path) added_train_imgs = self._parse_split( image_dir=train_paths["image_dir"], - annotation_path=train_ann_path, + annotation_path=cleaned_annotation_path, ) val_paths = COCOParser.validate_split(dataset_dir / splits[1]) @@ -199,6 +200,7 @@ def from_split( @return: Annotation generator, list of classes names, skeleton dictionary for keypoints and list of added images. """ + with open(annotation_path) as f: annotation_data = json.load(f) @@ -236,6 +238,8 @@ def generator() -> DatasetIterator: img_w = img["width"] for i, ann in enumerate(img_anns): + if ann.get("iscrowd", True): + continue class_name = categories[ann["category_id"]] yield { "file": path, @@ -316,3 +320,59 @@ def generator() -> DatasetIterator: added_images = self._get_added_images(generator()) return generator(), class_names, skeletons, added_images + + +def clean_annotations(annotation_path: Path) -> Path: + """Cleans annotations by removing images that are known to cause + issues. + + @type annotation_path: Path + @param annotation_path: Path to the annotation JSON file. + @rtype: Path + @return: Path to the cleaned annotation JSON file + ("labels_fixed.json"). + """ + + files_to_avoid = [ + "000000341448.jpg", + "000000279522.jpg", + "000000090169.jpg", + "000000321238.jpg", + "000000242807.jpg", + "000000297126.jpg", + "000000411274.jpg", + "000000407259.jpg", + "000000446141.jpg", + "000000373199.jpg", + "000000410810.jpg", + "000000397819.jpg", + "000000578492.jpg", + "000000531721.jpg", + ] + with open(annotation_path, "r") as f: + annotation_data = json.load(f) + + filtered_images = [ + img + for img in annotation_data["images"] + if img["file_name"] not in files_to_avoid + ] + + if len(filtered_images) == len(annotation_data["images"]): + return annotation_path + + filtered_image_ids = {img["id"] for img in filtered_images} + filtered_annotations = [ + ann + for ann in annotation_data["annotations"] + if ann["image_id"] in filtered_image_ids + ] + + annotation_data["images"] = filtered_images + annotation_data["annotations"] = filtered_annotations + + cleaned_annotation_path = annotation_path.with_name("labels_fixed.json") + with open(cleaned_annotation_path, "w") as f: + json.dump(annotation_data, f) + + return cleaned_annotation_path