-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_data.py
49 lines (38 loc) · 2.18 KB
/
split_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import shutil
import random
def split_dataset(source_folder, masks_folder, train_folder, val_folder, test_folder, masks_train_folder, masks_val_folder, masks_test_folder, train_size=500, test_size=55, val_size=145):
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)
os.makedirs(masks_train_folder, exist_ok=True)
os.makedirs(masks_val_folder, exist_ok=True)
os.makedirs(masks_test_folder, exist_ok=True)
image_files = sorted(os.listdir(source_folder))[:700] # Get only the first 700 images
mask_files = sorted(os.listdir(masks_folder))[:700] # Get corresponding mask files
combined = list(zip(image_files, mask_files))
random.shuffle(combined)
image_files, mask_files = zip(*combined)
train_images = image_files[:train_size]
train_masks = mask_files[:train_size]
test_images = image_files[train_size:train_size + test_size]
test_masks = mask_files[train_size:train_size + test_size]
val_images = image_files[train_size + test_size:]
val_masks = mask_files[train_size + test_size:]
def move_files(images, masks, dest_img_folder, dest_mask_folder):
for img, mask in zip(images, masks):
shutil.copy(os.path.join(source_folder, img), os.path.join(dest_img_folder, img))
shutil.copy(os.path.join(masks_folder, mask), os.path.join(dest_mask_folder, mask))
move_files(train_images, train_masks, train_folder, masks_train_folder)
move_files(test_images, test_masks, test_folder, masks_test_folder)
move_files(val_images, val_masks, val_folder, masks_val_folder)
print(f"Split completed! \nTrain: {train_size}, Test: {test_size}, Val: {val_size}")
source_folder = "data/train/train"
masks_folder = "data/train_masks/train_masks/"
train_folder = "data/split/train_images/"
val_folder = "data/split/val_images/"
test_folder = "data/split/test_images/"
masks_train_folder = "data/split/train_masks/"
masks_val_folder = "data/split/val_masks/"
masks_test_folder = "data/split/test_masks/"
split_dataset(source_folder, masks_folder, train_folder, val_folder, test_folder, masks_train_folder, masks_val_folder, masks_test_folder)