Skip to content

Commit

Permalink
Merge branch 'master' into bugfix/dynamic-num-letters
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/create_train_data.py
  • Loading branch information
fsarachu committed Oct 29, 2019
2 parents fcb04ed + efed6ce commit 96f23d8
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 20 deletions.
14 changes: 5 additions & 9 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ $ npm i
```
|_data
|_captcha
|_ captcha_1.jpg
|_ captcha_2.jpg
```
* Create mapping file `data/captcha.json` to map your train image with corresponding label
```json
{
"captcha_1.jpg": "HEYMEN",
"captcha_2.jpg": "XINCHA"
}
|_ xss7.jpg
|_ tvu4.jpg
```
**IMPORTANT:** Note each image file is named with it's own solution.

That means that if an image is named `A1bD3.jpg`, it corresponds to a captcha's whose solution is `A1bD3`

#### Build train data for model
Run `src/create_train_data.py` will save your train data as `data/captcha.npz` compressed file.
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ mock==2.0.0
networkx==2.1
numpy==1.14.5
pbr==4.0.4
Pillow==5.1.0
Pillow==6.2.0
protobuf==3.6.0
pyparsing==2.2.0
python-dateutil==2.7.3
Expand All @@ -31,7 +31,7 @@ scipy==1.1.0
six==1.11.0
subprocess32==3.5.2
tensorboard==1.8.0
tensorflow==1.8.0
tensorflow==1.12.2
termcolor==1.1.0
toolz==0.9.0
Werkzeug==0.14.1
Werkzeug==0.15.3
27 changes: 19 additions & 8 deletions src/create_train_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,22 @@
import os
import json
from skimage import io
from skimage.color import rgb2gray
from img import split_letters
import numpy as np


# Helper methods
def strip_extension(filename):
return filename[:filename.rindex('.')]


def build_data_map(data_path):
files = os.listdir(data_path)
return {x: strip_extension(x) for x in files}


DATA_DIR = 'data'
DATA_MAP = os.path.join(DATA_DIR, 'captcha.json')
DATA_FULL_DIR = os.path.join(DATA_DIR, 'captcha')
DATA_TRAIN_DIR = os.path.join(DATA_DIR, 'train')
DATA_TRAIN_FILE = os.path.join(DATA_DIR, 'captcha')
Expand All @@ -15,21 +26,21 @@
data_x = []
data_y = []

# load image content json file
with open(DATA_MAP) as f:
image_contents = json.load(f)
# build image contents map
image_contents = build_data_map(DATA_FULL_DIR)

# load image and save letters
counter = 0

for fname, contents in image_contents.iteritems():
counter += 1
print(counter, fname, contents)
image = io.imread(os.path.join(DATA_FULL_DIR, fname))
original_image = io.imread(os.path.join(DATA_FULL_DIR, fname))
grayscale_image = rgb2gray(original_image)

# split image
letters = split_letters(image, num_letters=len(contents), debug=True)
letters = split_letters(grayscale_image, num_letters=len(contents), debug=True)
if letters != None:
fname = fname.replace('.jpg', '.png')
for i, letter in enumerate(letters):
content = contents[i]
# add to dataset
Expand All @@ -40,7 +51,7 @@
fpath = os.path.join(DATA_TRAIN_DIR, content)
if not os.path.exists(fpath):
os.makedirs(fpath)
letter_fname = os.path.join(fpath, str(i+1) + '-' + fname)
letter_fname = os.path.join(fpath, str(i+1) + '-' + strip_extension(fname) + '.png')
io.imsave(letter_fname, 255 - letter) # invert black <> white color
else:
print('Letters is not valid')
Expand Down

0 comments on commit 96f23d8

Please sign in to comment.