-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathconvert_to_csv.py
64 lines (54 loc) · 1.76 KB
/
convert_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# dependencies
# OS modules
import os
# Pandas
import pandas as pd
# In-built time module
import time
# tqdm for progress bars
from tqdm import tqdm
# Pillow Image Library
from PIL import Image
# Numpy module
import numpy as np
# A list for column names of csv
columnNames = list()
# A column for label
columnNames.append('label')
# Other pixels column
# replace 784 with your image size, here it is 28x28=784
# iterate and build headers
for i in range(784):
pixel = str(i)
columnNames.append(pixel)
# Create a Pandas dataframe for storing data
train_data = pd.DataFrame(columns = columnNames)
# calculates the total number of images in the dataset initially 0
num_images = 0
# iterate through every folder of the dataset
for i in range(0, 58):
# print messeage
print("Iterating: " + str(i) + " folder")
# itreate through every image in the folder
# tqdm shows progress bar
for file in tqdm(os.listdir(str(i))):
# open image using PIL Image module
img = Image.open(os.path.join(str(i), file))
# resize to 28x28, replace with your size
img = img.resize((28, 28), Image.NEAREST)
# load image
img.load()
# create a numpy array for image pixels
imgdata = np.asarray(img, dtype="int32")
# temporary array to store pixel values
data = []
data.append(str(i))
for y in range(28):
for x in range(28):
data.append(imgdata[x][y])
# add the data row to training data dataframe
train_data.loc[num_images] = data
# increment the number of images
num_images += 1
# write the dataframe to the CSV file
train_data.to_csv("train_converted.csv", index=False)