-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
MCG
committed
Nov 15, 2024
1 parent
fabc98e
commit 196d148
Showing
1 changed file
with
319 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,319 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import json\n", | ||
"import cv2\n", | ||
"\n", | ||
"# Define directories\n", | ||
"label_dir = \"/data/ephemeral/home/MCG/train/outputs_json\"\n", | ||
"input_dir = \"/data/ephemeral/home/MCG/train/DCM\"\n", | ||
"output_dir = \"/data/ephemeral/home/MCG/train/annotated_images\"\n", | ||
"\n", | ||
"# Ensure the output directory exists\n", | ||
"os.makedirs(output_dir, exist_ok=True)\n", | ||
"\n", | ||
"# Calculate bounding box for a set of points\n", | ||
"def calculate_bounding_box(points):\n", | ||
" if not points:\n", | ||
" return None\n", | ||
" x_values = [p[0] for p in points]\n", | ||
" y_values = [p[1] for p in points]\n", | ||
" return min(x_values), min(y_values), max(x_values), max(y_values)\n", | ||
"\n", | ||
"# Process each patient's images\n", | ||
"for patient_id in os.listdir(label_dir):\n", | ||
" patient_label_path = os.path.join(label_dir, patient_id)\n", | ||
" patient_image_path = os.path.join(input_dir, patient_id)\n", | ||
" \n", | ||
" if not os.path.isdir(patient_label_path):\n", | ||
" continue\n", | ||
"\n", | ||
" for json_file in os.listdir(patient_label_path):\n", | ||
" if json_file.endswith(\".json\"):\n", | ||
" # Load JSON data\n", | ||
" json_path = os.path.join(patient_label_path, json_file)\n", | ||
" with open(json_path, \"r\") as f:\n", | ||
" data = json.load(f)\n", | ||
"\n", | ||
" # Prepare image path and output path\n", | ||
" image_name = json_file.replace(\".json\", \".png\")\n", | ||
" image_path = os.path.join(patient_image_path, image_name)\n", | ||
" output_path = os.path.join(output_dir, f\"{patient_id}_{image_name}\")\n", | ||
" \n", | ||
" # Skip if the corresponding image does not exist\n", | ||
" if not os.path.isfile(image_path):\n", | ||
" continue\n", | ||
"\n", | ||
" # Read the image using OpenCV\n", | ||
" image = cv2.imread(image_path)\n", | ||
" if image is None:\n", | ||
" continue\n", | ||
"\n", | ||
" # Initialize groups\n", | ||
" finger_points = []\n", | ||
" radius_ulna_points = []\n", | ||
" others_points = []\n", | ||
"\n", | ||
" # Categorize points based on labels\n", | ||
" for annotation in data['annotations']:\n", | ||
" label = annotation['label']\n", | ||
" points = annotation['points']\n", | ||
" \n", | ||
" if 'finger' in label.lower():\n", | ||
" finger_points.extend(points)\n", | ||
" elif label in ['Radius', 'Ulna']:\n", | ||
" radius_ulna_points.extend(points)\n", | ||
" else:\n", | ||
" others_points.extend(points)\n", | ||
"\n", | ||
" # Calculate bounding boxes for each group\n", | ||
" finger_box = calculate_bounding_box(finger_points)\n", | ||
" radius_ulna_box = calculate_bounding_box(radius_ulna_points)\n", | ||
" others_box = calculate_bounding_box(others_points)\n", | ||
"\n", | ||
" # Draw bounding boxes on the image\n", | ||
" if finger_box is not None:\n", | ||
" x_min, y_min, x_max, y_max = finger_box\n", | ||
" cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2) # Blue for Fingers\n", | ||
" if radius_ulna_box is not None:\n", | ||
" x_min, y_min, x_max, y_max = radius_ulna_box\n", | ||
" cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) # Green for Radius & Ulna\n", | ||
" if others_box is not None:\n", | ||
" x_min, y_min, x_max, y_max = others_box\n", | ||
" cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) # Red for Others\n", | ||
"\n", | ||
" # Save the annotated image\n", | ||
" cv2.imwrite(output_path, image)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Annotations and images have been successfully processed and saved in YOLO format.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import os\n", | ||
"import json\n", | ||
"import cv2\n", | ||
"import shutil\n", | ||
"\n", | ||
"# Define directories\n", | ||
"label_dir = \"/data/ephemeral/home/MCG/train/outputs_json\"\n", | ||
"input_dir = \"/data/ephemeral/home/MCG/train/DCM\"\n", | ||
"output_dir = \"/data/ephemeral/home/MCG/train/yolo_annotations\"\n", | ||
"\n", | ||
"# Ensure the output directory exists\n", | ||
"os.makedirs(output_dir, exist_ok=True)\n", | ||
"\n", | ||
"# Calculate bounding box for a set of points\n", | ||
"def calculate_bounding_box(points):\n", | ||
" if not points:\n", | ||
" return None\n", | ||
" x_values = [p[0] for p in points]\n", | ||
" y_values = [p[1] for p in points]\n", | ||
" return min(x_values), min(y_values), max(x_values), max(y_values)\n", | ||
"\n", | ||
"# Normalize bounding box to YOLO format\n", | ||
"def normalize_bbox(image_shape, bbox):\n", | ||
" if bbox is None:\n", | ||
" return None\n", | ||
" height, width = image_shape[:2]\n", | ||
" x_min, y_min, x_max, y_max = bbox\n", | ||
" x_center = (x_min + x_max) / 2 / width\n", | ||
" y_center = (y_min + y_max) / 2 / height\n", | ||
" box_width = (x_max - x_min) / width\n", | ||
" box_height = (y_max - y_min) / height\n", | ||
" return x_center, y_center, box_width, box_height\n", | ||
"\n", | ||
"# Define class mappings\n", | ||
"class_map = {\n", | ||
" \"finger\": 0,\n", | ||
" \"radius_ulna\": 1,\n", | ||
" \"others\": 2\n", | ||
"}\n", | ||
"\n", | ||
"# Process each patient's images\n", | ||
"for patient_id in os.listdir(label_dir):\n", | ||
" patient_label_path = os.path.join(label_dir, patient_id)\n", | ||
" patient_image_path = os.path.join(input_dir, patient_id)\n", | ||
" \n", | ||
" if not os.path.isdir(patient_label_path):\n", | ||
" continue\n", | ||
"\n", | ||
" for json_file in os.listdir(patient_label_path):\n", | ||
" if json_file.endswith(\".json\"):\n", | ||
" # Load JSON data\n", | ||
" json_path = os.path.join(patient_label_path, json_file)\n", | ||
" with open(json_path, \"r\") as f:\n", | ||
" data = json.load(f)\n", | ||
"\n", | ||
" # Prepare image path\n", | ||
" image_name = json_file.replace(\".json\", \".png\")\n", | ||
" image_path = os.path.join(patient_image_path, image_name)\n", | ||
" \n", | ||
" # Skip if the corresponding image does not exist\n", | ||
" if not os.path.isfile(image_path):\n", | ||
" continue\n", | ||
"\n", | ||
" # Read the image to get dimensions\n", | ||
" image = cv2.imread(image_path)\n", | ||
" if image is None:\n", | ||
" continue\n", | ||
"\n", | ||
" # Initialize groups\n", | ||
" finger_points = []\n", | ||
" radius_ulna_points = []\n", | ||
" others_points = []\n", | ||
"\n", | ||
" # Categorize points based on labels\n", | ||
" for annotation in data['annotations']:\n", | ||
" label = annotation['label']\n", | ||
" points = annotation['points']\n", | ||
" \n", | ||
" if 'finger' in label.lower():\n", | ||
" finger_points.extend(points)\n", | ||
" elif label in ['Radius', 'Ulna']:\n", | ||
" radius_ulna_points.extend(points)\n", | ||
" else:\n", | ||
" others_points.extend(points)\n", | ||
"\n", | ||
" # Calculate bounding boxes for each group\n", | ||
" finger_box = calculate_bounding_box(finger_points)\n", | ||
" radius_ulna_box = calculate_bounding_box(radius_ulna_points)\n", | ||
" others_box = calculate_bounding_box(others_points)\n", | ||
"\n", | ||
" # Normalize bounding boxes\n", | ||
" yolo_annotations = []\n", | ||
" if finger_box is not None:\n", | ||
" yolo_annotations.append((class_map[\"finger\"], *normalize_bbox(image.shape, finger_box)))\n", | ||
" if radius_ulna_box is not None:\n", | ||
" yolo_annotations.append((class_map[\"radius_ulna\"], *normalize_bbox(image.shape, radius_ulna_box)))\n", | ||
" if others_box is not None:\n", | ||
" yolo_annotations.append((class_map[\"others\"], *normalize_bbox(image.shape, others_box)))\n", | ||
"\n", | ||
" # Save YOLO annotation to a text file\n", | ||
" annotation_file = os.path.join(output_dir, f\"{patient_id}_{image_name.replace('.png', '.txt')}\")\n", | ||
" with open(annotation_file, \"w\") as f:\n", | ||
" for annotation in yolo_annotations:\n", | ||
" class_id, x_center, y_center, box_width, box_height = annotation\n", | ||
" f.write(f\"{class_id} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}\\n\")\n", | ||
" \n", | ||
" # Copy the image to the output directory\n", | ||
" output_image_path = os.path.join(output_dir, f\"{patient_id}_{image_name}\")\n", | ||
" shutil.copy(image_path, output_image_path)\n", | ||
"\n", | ||
"print(\"Annotations and images have been successfully processed and saved in YOLO format.\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Train and validation datasets created.\n", | ||
"Train files: 680, Validation files: 120\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import os\n", | ||
"import shutil\n", | ||
"import random\n", | ||
"\n", | ||
"# Directories\n", | ||
"yolo_annotations_dir = \"/data/ephemeral/home/MCG/train/yolo_annotations\"\n", | ||
"train_dir = \"/data/ephemeral/home/MCG/train/yolo_dataset/train\"\n", | ||
"valid_dir = \"/data/ephemeral/home/MCG/train/yolo_dataset/valid\"\n", | ||
"\n", | ||
"# Create directories for train and validation sets\n", | ||
"os.makedirs(train_dir, exist_ok=True)\n", | ||
"os.makedirs(valid_dir, exist_ok=True)\n", | ||
"\n", | ||
"# Group annotation files by patient ID\n", | ||
"annotation_files = [\n", | ||
" os.path.join(yolo_annotations_dir, file)\n", | ||
" for file in os.listdir(yolo_annotations_dir)\n", | ||
" if file.endswith(\".txt\")\n", | ||
"]\n", | ||
"\n", | ||
"# Group files by patient ID\n", | ||
"patient_groups = {}\n", | ||
"for file in annotation_files:\n", | ||
" patient_id = os.path.basename(file).split('_')[0] # Extract patient ID (e.g., \"ID001\")\n", | ||
" if patient_id not in patient_groups:\n", | ||
" patient_groups[patient_id] = []\n", | ||
" patient_groups[patient_id].append(file)\n", | ||
"\n", | ||
"# Shuffle patient IDs\n", | ||
"patient_ids = list(patient_groups.keys())\n", | ||
"random.shuffle(patient_ids)\n", | ||
"\n", | ||
"# Split patient IDs into train (85%) and valid (15%)\n", | ||
"split_index = int(len(patient_ids) * 0.85)\n", | ||
"train_patient_ids = patient_ids[:split_index]\n", | ||
"valid_patient_ids = patient_ids[split_index:]\n", | ||
"\n", | ||
"# Collect files for train and valid\n", | ||
"train_files = [file for pid in train_patient_ids for file in patient_groups[pid]]\n", | ||
"valid_files = [file for pid in valid_patient_ids for file in patient_groups[pid]]\n", | ||
"\n", | ||
"# Helper function to move files\n", | ||
"def move_files(file_list, target_dir):\n", | ||
" for file in file_list:\n", | ||
" # Move annotation file\n", | ||
" shutil.copy(file, os.path.join(target_dir, os.path.basename(file)))\n", | ||
" \n", | ||
" # Move the corresponding image file\n", | ||
" image_file = file.replace(\".txt\", \".png\")\n", | ||
" if os.path.isfile(image_file):\n", | ||
" shutil.copy(image_file, os.path.join(target_dir, os.path.basename(image_file)))\n", | ||
"\n", | ||
"# Move files to respective directories\n", | ||
"move_files(train_files, train_dir)\n", | ||
"move_files(valid_files, valid_dir)\n", | ||
"\n", | ||
"print(f\"Train and validation datasets created.\")\n", | ||
"print(f\"Train files: {len(train_files)}, Validation files: {len(valid_files)}\")\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "base", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |