Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hotifx: adding file names to be ignored by the COCO parser #199

Merged
merged 4 commits into from
Nov 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion luxonis_ml/data/parsers/coco_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,10 @@
and dir_format == Format.FIFTYONE
else train_paths["annotation_path"]
)
cleaned_annotation_path = clean_annotations(train_ann_path)
added_train_imgs = self._parse_split(
image_dir=train_paths["image_dir"],
annotation_path=train_ann_path,
annotation_path=cleaned_annotation_path,
)

val_paths = COCOParser.validate_split(dataset_dir / splits[1])
Expand Down Expand Up @@ -199,6 +200,7 @@
@return: Annotation generator, list of classes names, skeleton
dictionary for keypoints and list of added images.
"""

with open(annotation_path) as f:
annotation_data = json.load(f)

Expand Down Expand Up @@ -236,6 +238,8 @@
img_w = img["width"]

for i, ann in enumerate(img_anns):
if ann.get("iscrowd", True):
continue
class_name = categories[ann["category_id"]]
yield {
"file": path,
Expand Down Expand Up @@ -316,3 +320,59 @@
added_images = self._get_added_images(generator())

return generator(), class_names, skeletons, added_images


def clean_annotations(annotation_path: Path) -> Path:
"""Cleans annotations by removing images that are known to cause
issues.

@type annotation_path: Path
@param annotation_path: Path to the annotation JSON file.
@rtype: Path
@return: Path to the cleaned annotation JSON file
("labels_fixed.json").
"""

files_to_avoid = [
"000000341448.jpg",
"000000279522.jpg",
"000000090169.jpg",
"000000321238.jpg",
"000000242807.jpg",
"000000297126.jpg",
"000000411274.jpg",
"000000407259.jpg",
"000000446141.jpg",
"000000373199.jpg",
"000000410810.jpg",
"000000397819.jpg",
"000000578492.jpg",
"000000531721.jpg",
]
with open(annotation_path, "r") as f:
annotation_data = json.load(f)

filtered_images = [
JSabadin marked this conversation as resolved.
Show resolved Hide resolved
img
for img in annotation_data["images"]
if img["file_name"] not in files_to_avoid
]

if len(filtered_images) == len(annotation_data["images"]):
return annotation_path

filtered_image_ids = {img["id"] for img in filtered_images}
filtered_annotations = [

Check warning on line 365 in luxonis_ml/data/parsers/coco_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/coco_parser.py#L364-L365

Added lines #L364 - L365 were not covered by tests
ann
for ann in annotation_data["annotations"]
if ann["image_id"] in filtered_image_ids
]

annotation_data["images"] = filtered_images
annotation_data["annotations"] = filtered_annotations

Check warning on line 372 in luxonis_ml/data/parsers/coco_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/coco_parser.py#L371-L372

Added lines #L371 - L372 were not covered by tests

cleaned_annotation_path = annotation_path.with_name("labels_fixed.json")
with open(cleaned_annotation_path, "w") as f:
json.dump(annotation_data, f)

Check warning on line 376 in luxonis_ml/data/parsers/coco_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/coco_parser.py#L374-L376

Added lines #L374 - L376 were not covered by tests

return cleaned_annotation_path

Check warning on line 378 in luxonis_ml/data/parsers/coco_parser.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/parsers/coco_parser.py#L378

Added line #L378 was not covered by tests
Loading