-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 6937f9a
Showing
12 changed files
with
677 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
# train data | ||
data/captcha | ||
data/train | ||
|
||
# model log | ||
log | ||
.DS_Store | ||
|
||
# node.js | ||
node_modules/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The MIT License (MIT) | ||
|
||
Copyright (c) 2018 Do Minh Hai | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# Captcha Breaker | ||
Build with Tensorflow (ConvNets) and Node.js. Perfect test (100%) on Amazon Captcha :muscle::muscle::muscle: | ||
|
||
# Installation | ||
#### Python packages | ||
``` | ||
$ pip install -r requirements.txt | ||
``` | ||
|
||
#### Node.js packages (Node.js user only) | ||
``` | ||
$ npm i | ||
``` | ||
|
||
# Usage | ||
## 1. Create train data | ||
#### Prepare your training dataset | ||
* Copy captcha image to `data/captcha` folder | ||
``` | ||
|_data | ||
|_captcha | ||
|_ captcha_1.jpg | ||
|_ captcha_2.jpg | ||
``` | ||
* Create mapping file `data/captcha.json` to map your train image with corresponding label | ||
```json | ||
{ | ||
"captcha_1.jpg": "HEYMEN", | ||
"captcha_2.jpg": "XINCHA" | ||
} | ||
``` | ||
|
||
#### Build train data for model | ||
Run `src/create_train_data.py` will save your train data as `data/captcha.npz` compressed file. | ||
``` | ||
$ python src/create_train_data.py | ||
``` | ||
|
||
The compressed `data/captcha.npz` includes: | ||
* Train Data ( `x_train`, `y_train` ): `80%` | ||
* Test Data ( `x_test`, `y_test` ): `20%` | ||
|
||
## 2. Train | ||
Run `src/train.py` to train the model with your own dataset. | ||
``` | ||
$ python src/train.py | ||
``` | ||
|
||
Take :coffee: or :tea: while waiting! | ||
|
||
## 3. Attack | ||
Now, enjoy your war :fire::fire::fire: :stuck_out_tongue_winking_eye::stuck_out_tongue_winking_eye::stuck_out_tongue_winking_eye: | ||
|
||
#### Python | ||
``` | ||
$ python src/predict --fname YOUR_IMAGE_PATH_or_URL | ||
``` | ||
|
||
Sample output: | ||
``` | ||
loading image: data/captcha/captcha_2.jpg | ||
load captcha classifier | ||
predict for 1 char: `X` with probability: 99.956% | ||
predict for 2 char: `I` with probability: 99.909% | ||
predict for 3 char: `N` with probability: 99.556% | ||
predict for 4 char: `C` with probability: 99.853% | ||
predict for 5 char: `H` with probability: 99.949% | ||
predict for 6 char: `A` with probability: 98.889% | ||
Captcha: `XINCHA` with confident: `99.686%` | ||
XINCHA | ||
``` | ||
|
||
#### Node.js | ||
```js | ||
const captchaPredict = require('src/predict') | ||
|
||
captchaPredict(YOUR_IMAGE_PATH_or_URL) | ||
.then(console.log) | ||
.catche(console.error) | ||
``` | ||
Sample output: | ||
``` | ||
[ | ||
"loading image: data/captcha/captcha_2.jpg", | ||
"load captcha classifier", | ||
"predict for 1 char: `X` with probability: 99.956%", | ||
"predict for 2 char: `I` with probability: 99.909%", | ||
"predict for 3 char: `N` with probability: 99.556%", | ||
"predict for 4 char: `C` with probability: 99.853%", | ||
"predict for 5 char: `H` with probability: 99.949%", | ||
"predict for 6 char: `A` with probability: 98.889%", | ||
"Captcha: `XINCHA` with confident: `99.686%`", | ||
"XINCHA" | ||
] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
backend: TkAgg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"name": "captcha-breaker", | ||
"version": "1.0.0", | ||
"description": "Breaking Captcha with Tensorflow", | ||
"main": "src/predict.js", | ||
"dependencies": { | ||
"python-shell": "^0.5.0" | ||
}, | ||
"devDependencies": {}, | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/dominhhai/captcha-breaker.git" | ||
}, | ||
"author": "Do Minh Hai", | ||
"license": "MIT", | ||
"bugs": { | ||
"url": "https://github.com/dominhhai/captcha-breaker/issues" | ||
}, | ||
"homepage": "https://github.com/dominhhai/captcha-breaker#README" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
absl-py==0.2.2 | ||
astor==0.6.2 | ||
backports.functools-lru-cache==1.5 | ||
backports.weakref==1.0.post1 | ||
bleach==1.5.0 | ||
cloudpickle==0.5.3 | ||
cycler==0.10.0 | ||
dask==0.18.0 | ||
decorator==4.3.0 | ||
enum34==1.1.6 | ||
funcsigs==1.0.2 | ||
futures==3.2.0 | ||
gast==0.2.0 | ||
grpcio==1.12.1 | ||
html5lib==0.9999999 | ||
kiwisolver==1.0.1 | ||
Markdown==2.6.11 | ||
matplotlib==2.2.2 | ||
mock==2.0.0 | ||
networkx==2.1 | ||
numpy==1.14.5 | ||
pbr==4.0.4 | ||
Pillow==5.1.0 | ||
protobuf==3.6.0 | ||
pyparsing==2.2.0 | ||
python-dateutil==2.7.3 | ||
pytz==2018.4 | ||
PyWavelets==0.5.2 | ||
scikit-image==0.14.0 | ||
scipy==1.1.0 | ||
six==1.11.0 | ||
subprocess32==3.5.2 | ||
tensorboard==1.8.0 | ||
tensorflow==1.8.0 | ||
termcolor==1.1.0 | ||
toolz==0.9.0 | ||
Werkzeug==0.14.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
from __future__ import print_function, absolute_import, division | ||
import os | ||
import json | ||
from skimage import io | ||
from img import split_letters | ||
import numpy as np | ||
|
||
DATA_DIR = 'data' | ||
DATA_MAP = os.path.join(DATA_DIR, 'captcha.json') | ||
DATA_FULL_DIR = os.path.join(DATA_DIR, 'captcha') | ||
DATA_TRAIN_DIR = os.path.join(DATA_DIR, 'train') | ||
DATA_TRAIN_FILE = os.path.join(DATA_DIR, 'captcha') | ||
|
||
# array of tuple of binary image and label | ||
data_x = [] | ||
data_y = [] | ||
|
||
# load image content json file | ||
with open(DATA_MAP) as f: | ||
image_contents = json.load(f) | ||
|
||
# load image and save letters | ||
counter = 0 | ||
for fname, contents in image_contents.iteritems(): | ||
counter += 1 | ||
print(counter, fname, contents) | ||
image = io.imread(os.path.join(DATA_FULL_DIR, fname)) | ||
|
||
# split image | ||
letters = split_letters(image, debug=True) | ||
if letters != None: | ||
fname = fname.replace('.jpg', '.png') | ||
for i, letter in enumerate(letters): | ||
content = contents[i] | ||
# add to dataset | ||
data_x.append(letter) | ||
data_y.append(np.uint8(ord(content) - 65)) # 65: 'A' | ||
|
||
# save letter into train folder | ||
fpath = os.path.join(DATA_TRAIN_DIR, content) | ||
if not os.path.exists(fpath): | ||
os.makedirs(fpath) | ||
letter_fname = os.path.join(fpath, str(i+1) + '-' + fname) | ||
io.imsave(letter_fname, 255 - letter) # invert black <> white color | ||
else: | ||
print('Letters is not valid') | ||
break | ||
|
||
# split into train and test data set | ||
train_num = int(len(data_y) * 0.8) # 80% | ||
|
||
# save train data | ||
print('saving dataset') | ||
np.savez_compressed(DATA_TRAIN_FILE, | ||
x_train=data_x[:train_num], y_train=data_y[:train_num], | ||
x_test=data_x[train_num:], y_test=data_y[train_num:]) |
Oops, something went wrong.