Skip to content

Commit

Permalink
Build captcha solution map from image filename
Browse files Browse the repository at this point in the history
  • Loading branch information
fsarachu committed Oct 27, 2019
1 parent 9ac2c1c commit 1ba08a4
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
14 changes: 5 additions & 9 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ $ npm i
```
|_data
|_captcha
|_ captcha_1.jpg
|_ captcha_2.jpg
```
* Create mapping file `data/captcha.json` to map your train image with corresponding label
```json
{
"captcha_1.jpg": "HEYMEN",
"captcha_2.jpg": "XINCHA"
}
|_ xss7.jpg
|_ tvu4.jpg
```
**IMPORTANT:** Note each image file is named with it's own solution.

That means that if an image is named `A1bD3.jpg`, it corresponds to a captcha's whose solution is `A1bD3`

#### Build train data for model
Run `src/create_train_data.py` will save your train data as `data/captcha.npz` compressed file.
Expand Down
21 changes: 15 additions & 6 deletions src/create_train_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,18 @@
from img import split_letters
import numpy as np


# Helper methods
def strip_extension(filename):
return filename[:filename.rindex('.')]


def build_data_map(data_path):
files = os.listdir(data_path)
return {x: strip_extension(x) for x in files}


DATA_DIR = 'data'
DATA_MAP = os.path.join(DATA_DIR, 'captcha.json')
DATA_FULL_DIR = os.path.join(DATA_DIR, 'captcha')
DATA_TRAIN_DIR = os.path.join(DATA_DIR, 'train')
DATA_TRAIN_FILE = os.path.join(DATA_DIR, 'captcha')
Expand All @@ -15,12 +25,12 @@
data_x = []
data_y = []

# load image content json file
with open(DATA_MAP) as f:
image_contents = json.load(f)
# build image contents map
image_contents = build_data_map(DATA_FULL_DIR)

# load image and save letters
counter = 0

for fname, contents in image_contents.iteritems():
counter += 1
print(counter, fname, contents)
Expand All @@ -39,8 +49,7 @@
fpath = os.path.join(DATA_TRAIN_DIR, content)
if not os.path.exists(fpath):
os.makedirs(fpath)
fname_no_ext = fname[:fname.rindex('.')]
letter_fname = os.path.join(fpath, str(i+1) + '-' + fname_no_ext + '.png')
letter_fname = os.path.join(fpath, str(i+1) + '-' + strip_extension(fname) + '.png')
io.imsave(letter_fname, 255 - letter) # invert black <> white color
else:
print('Letters is not valid')
Expand Down

0 comments on commit 1ba08a4

Please sign in to comment.