diff --git a/.DS_Store b/.DS_Store index e8dba5d6..da5f0bd5 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num/3.jpeg" "b/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/30.png" similarity index 100% rename from "1.\351\252\214\350\257\201\347\240\201/num/3.jpeg" rename to "1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/30.png" diff --git "a/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/31.png" "b/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/31.png" new file mode 100644 index 00000000..02cfab26 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/31.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/6_L.png" "b/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/6_L.png" deleted file mode 100644 index b7054dbe..00000000 Binary files "a/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/6_L.png" and /dev/null differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/rotate.py" "b/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/rotate.py" index d2691571..443960b5 100644 --- "a/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/rotate.py" +++ "b/1.\351\252\214\350\257\201\347\240\201/PIL_EXAMPLES/rotate.py" @@ -1,29 +1,31 @@ # coding:utf-8 from __future__ import print_function -from PIL import Image -import glob, os +#from PIL import Image +import Image +#import glob, os -size = 128,128 +#size = 128,128 # 打开图片 -im = Image.open("1.png") +#im = Image.open("11.png") # 旋转45度 并且 显示 # im.rotate(45).show() # 遍历该目录下的所有PNG文件,打开并且创建略缩图 -for infile in glob.glob("*.png"): - file,ext = os.path.splitext(infile) -# print file,ext - im = Image.open(infile) - im.thumbnail(size,Image.ANTIALIAS) - #im.save(file+".thumbnail","JPEG") +# for infile in glob.glob("*.png"): +# file,ext = os.path.splitext(infile) +# # print file,ext +# im = Image.open(infile) +# im.thumbnail(size,Image.ANTIALIAS) +# #im.save(file+".thumbnail","JPEG") # 创建新图片,并保存 -new_img = Image.new("RGB",(512,512),"white") +# 101 31 +#new_img = Image.new("RGB",(101,31),"white") #new_img.save("NEW.png") -im_2 = Image.open("1.1.png") +# im_2 = Image.open("1.1.png") #im_3 = Image.composite(im.copy(),im_2.copy(),'L') # 将im与im_2 根据透明度进行合并 @@ -37,17 +39,14 @@ #输出im的最大最小rgb #print im_2.getextrema() -im = Image.open('6.png') -im = im.convert('L') -im = im.resize((32,32),Image.ANTIALIAS) -im.save('6_L.png') - -#print im.getpixel((0,0))#灰度图没有rgb直接是统一值 -#print im.size +im = Image.open('11.png') +#im = im.convert('L') +#im = im.resize((32,32),Image.ANTIALIAS) +#im.save('11_l.png') for i in xrange(im.size[0]): for j in xrange(im.size[1]): - #print im.getpixel((i,j)) + print im.getpixel((i,j)) l = im.getpixel((j,i)) if( l > 220): print ('0',end='') diff --git "a/1.\351\252\214\350\257\201\347\240\201/fuhao.png" "b/1.\351\252\214\350\257\201\347\240\201/fuhao.png" new file mode 100644 index 00000000..2ab13ee5 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/fuhao.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num/3.png" "b/1.\351\252\214\350\257\201\347\240\201/num/3.png" new file mode 100644 index 00000000..02cfab26 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num/3.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num/recognise.py" "b/1.\351\252\214\350\257\201\347\240\201/num/recognise.py" index a14b4e30..dde40457 100644 --- "a/1.\351\252\214\350\257\201\347\240\201/num/recognise.py" +++ "b/1.\351\252\214\350\257\201\347\240\201/num/recognise.py" @@ -3,7 +3,7 @@ import os import requests from PIL import Image -import math +import math,time def imagesget(): os.mkdir('images') @@ -24,7 +24,7 @@ def convert_image(image): pix=image.getpixel((x,y)) if pix<120:#灰度低于120 设置为 0 image2.putpixel((x,y),0) - image2.save('L.jpeg')#将灰度图存储下来看效果 + image2.save('L.png')#将灰度图存储下来看效果 return image2 def cut_image(image): @@ -86,12 +86,14 @@ def magnitude(self,concordance): #计算矢量之间的 cos 值 def relation(self,concordance1, concordance2): - + relevance = 0 topvalue = 0 for word, count in concordance1.items(): if word in concordance2: + print type(topvalue),topvalue,count,concordance2[word] topvalue += count * concordance2[word] + time.sleep(10) return topvalue / (self.magnitude(concordance1) * self.magnitude(concordance2)) def recognise(self,image): @@ -119,7 +121,8 @@ def recognise(self,image): if __name__=='__main__': imageRecognize=CaptchaRecognize() # 设置图片路径 - image=Image.open('3.jpeg') + image=Image.open('3.png') + print image.mode result=imageRecognize.recognise(image) diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/L.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/L.png" new file mode 100644 index 00000000..e8126edf Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/L.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/README.md" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/README.md" new file mode 100644 index 00000000..d3735a7c --- /dev/null +++ "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/README.md" @@ -0,0 +1,12 @@ +本部分测试了简单的图像验证码的处理 + +主要代码为:recognise.py + +在main中可以替换为你需要解析的图片路径与名称, + +产生L.png是进行了灰度以及二值转换之后的图片在本目录下 + +还会产生字符切割的效果 + +用户可以将切割出来的字符人工标记后加入到icon中成为新字符集合. + diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061493.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061493.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061493.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061517.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061517.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061517.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061602.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061602.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061602.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061618.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061618.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061618.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061689.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061689.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061689.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061759.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061759.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061759.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061798.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061798.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061798.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061803.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061803.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061803.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061815.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061815.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061815.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061821.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061821.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061821.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061833.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061833.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484061833.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062418.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062418.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062418.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062521.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062521.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062521.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062530.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062530.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062530.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062547.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062547.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062547.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062555.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062555.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062555.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062628.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062628.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062628.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062646.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062646.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484062646.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484127810.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484127810.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484127810.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128107.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128107.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128107.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128325.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128325.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128325.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128363.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128363.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128363.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128401.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128401.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128401.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128417.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128417.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128417.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128519.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128519.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128519.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128649.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128649.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128649.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128765.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128765.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128765.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128777.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128777.png" new file mode 100644 index 00000000..d725a44c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128777.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128843.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128843.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128843.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128879.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128879.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128879.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128904.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128904.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128904.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128984.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128984.png" new file mode 100644 index 00000000..dcdb50b8 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/cat/1484128984.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/login.py" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/login.py" new file mode 100644 index 00000000..0b34c51a --- /dev/null +++ "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/login.py" @@ -0,0 +1,49 @@ +#coding:utf-8 + +import requests +from recognise import * +from PIL import Image +import base64 +import getpass + + +def login(username,passwd): + session=requests.session() + session.get('http://wsxk.hust.edu.cn/login.jsp').text + img=session.get('http://wsxk.hust.edu.cn/randomImage.action').content + with open('captcha.jpeg','wb') as imgfile: + imgfile.write(img) + imageRecognize=CaptchaRecognize() + image=Image.open('captcha.jpeg') + result=imageRecognize.recognise(image) + string='' + for item in result: + string+=item[1] + print(string) + data={ + 'usertype':"xs", + 'username':username, + 'password':passwd, + 'rand':string, + 'sm1':"", + 'ln':"app610.dc.hust.edu.cn" + } + headers = { + 'Host':"wsxk.hust.edu.cn", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "en-US,en;q=0.5", + "Connection": "keep-alive", + 'Referer':"http://wsxk.hust.edu.cn/login.jsp", + "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:39.0) Gecko/20100101 Firefox/39.0"} + session.post('http://wsxk.hust.edu.cn/hublogin.action',data=data,headers=headers) + html=session.get('http://wsxk.hust.edu.cn/select.jsp',headers=headers).text + print(html) + return session + +def main(): + username=input('username:') + passwd=base64.b64encode(getpass.getpass('Passwd:').encode()).decode() + login(username,passwd) + +main() diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/recognise.py" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/recognise.py" new file mode 100644 index 00000000..cee1d8ed --- /dev/null +++ "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/recognise.py" @@ -0,0 +1,144 @@ +#coding:utf-8 + +import os +import requests +from PIL import Image +import math,time + +def convert_image(image): + image=image.convert('L')#灰度 + image2=Image.new('L',image.size,255) + for x in range(image.size[0]): + for y in range(image.size[1]): + pix=image.getpixel((x,y)) + if pix<120:#灰度低于120 设置为 0 + image2.putpixel((x,y),0) + image2.save('L.png')#将灰度图存储下来看效果 + return image2 + +def cut_image(image): + ''' 字符切割,根据黑色的连续性,当某一列出现黑色为标志,当黑色消失为结束点''' + inletter=False + foundletter=False + letters=[] + start=0 + end=0 + for x in range(image.size[0]): + for y in range(image.size[1]): + pix=image.getpixel((x,y)) + if(pix==0): + inletter=True + if foundletter==False and inletter ==True: + foundletter=True + start=x + if foundletter==True and inletter==False: + end=x + letters.append((start,end)) + foundletter=False + inletter=False + images=[] + for letter in letters: + img=image.crop((letter[0],0,letter[1],image.size[1])) + #img.save(str(letter[0])+'.png')#展示切割效果 + img.save("./cat/"+str(int(time.time()))+'.png')#展示切割效果 + images.append(img) + return images + +def buildvector(image): + ''' 图片转换成矢量,将二维的图片转为一维''' + result={} + count=0 + for i in image.getdata(): + result[count]=i + count+=1 + #print result + return result + + +class CaptchaRecognize: + def __init__(self): + self.letters=['0','1','2','3','4','5','6','7','8','9'] + self.loadSet() + + def loadSet(self): + self.imgset=[] + for letter in self.letters: + temp=[] + for img in os.listdir('./icon/%s'%(letter)): + temp.append(buildvector(Image.open('./icon/%s/%s'%(letter,img)))) + self.imgset.append({letter:temp}) + + #计算矢量大小 + def magnitude(self,concordance): + total = 0 + for word,count in concordance.items(): + try: + if(type(count) == type(())): + total += count[0] ** 2 + #print type(total),total,type(count),count ** 2 + else: + total += count ** 2 + except Exception,e: + print type(total),total,type(count),count + print e + return math.sqrt(total) + + #计算矢量之间的 cos 值 + def relation(self,concordance1, concordance2): + relevance = 0 + topvalue = 0 + for word, count in concordance1.items(): + if word in concordance2: + print type(topvalue),topvalue,count,concordance2[word] + time.sleep(1) + topvalue += count * concordance2[word][0] + #time.sleep(10) + return topvalue / (self.magnitude(concordance1) * self.magnitude(concordance2)) + + def recognise(self,image): + image=convert_image(image)#二值化 + images=cut_image(image)#字符单独切割出来 + vectors=[] + for img in images: + vectors.append(buildvector(img)) + result=[] + for vector in vectors: + guess=[] + for image in self.imgset: + for letter,temp in image.items(): + relevance=0 + num=0 + for img in temp: + relevance+=self.relation(vector,img) + num+=1 + relevance=relevance/num + guess.append((relevance,letter)) + guess.sort(reverse=True) + result.append(guess[0]) + return result + +if __name__=='__main__': + import os + dir="./temp" + name_list = [] + for root,dirs,files in os.walk(dir): + for file in files: + #name_list.append(file) + name = os.path.join(root,file) + name_list.append(name) + + print name_list + for i in name_list: + #name = '11' + name = i + print name + + imageRecognize=CaptchaRecognize() + # 设置图片路径 + image=Image.open(name) + #image=Image.open('./temp/2.png') + print image.mode + result=imageRecognize.recognise(image) + string=[''.join(item[1]) for item in result] + print(string) + break diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/1.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/1.png" new file mode 100644 index 00000000..701cec1e Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/1.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/2.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/2.png" new file mode 100644 index 00000000..4a4d2d1e Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/2.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/21.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/21.png" new file mode 100644 index 00000000..cad24bc4 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/21.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/3.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/3.png" new file mode 100644 index 00000000..6f7c8a3c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/3.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/31.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/31.png" new file mode 100644 index 00000000..38626132 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/31.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/32.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/32.png" new file mode 100644 index 00000000..6f7c8a3c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/32.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/4.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/4.png" new file mode 100644 index 00000000..2bf56984 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/4.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/41.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/41.png" new file mode 100644 index 00000000..2bf56984 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/41.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/42.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/42.png" new file mode 100644 index 00000000..6beed4ae Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/42.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/5.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/5.png" new file mode 100644 index 00000000..2e5b2b0c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/5.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/6.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/6.png" new file mode 100644 index 00000000..37780df5 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/6.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/61.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/61.png" new file mode 100644 index 00000000..37780df5 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/61.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/7.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/7.png" new file mode 100644 index 00000000..cf882209 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/7.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/71.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/71.png" new file mode 100644 index 00000000..cf882209 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/71.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/8.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/8.png" new file mode 100644 index 00000000..aedeef05 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/8.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/81.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/81.png" new file mode 100644 index 00000000..aedeef05 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/81.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/9.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/9.png" new file mode 100644 index 00000000..9f36fad3 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/9.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/91.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/91.png" new file mode 100644 index 00000000..d36666c1 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/91.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/92.png" "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/92.png" new file mode 100644 index 00000000..9f36fad3 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/num\345\211\257\346\234\254/temp/92.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp.py" "b/1.\351\252\214\350\257\201\347\240\201/temp.py" new file mode 100644 index 00000000..fc3356e1 --- /dev/null +++ "b/1.\351\252\214\350\257\201\347\240\201/temp.py" @@ -0,0 +1,121 @@ +#coding:utf-8 +import Image + +def Noise(img,i,j): + #导入图像和ij,测试该点是否是噪点 + num = 0 + + for temp_i in [-1,0,1]: + for temp_j in [-1,0,1]: + try: + if img.getpixel((i+temp_j,j+temp_j)) != img.getpixel((i,j)): + num += 1 + # print img.getpixel((i,j)) + except: + #print '11' + pass + if num == 6: + return 1 #如果周围8个点均与该点不同则该点为噪点 + else : + return 0 + +def cat_img(im,name): + + box = (0,0,101,31) + im = im.crop(box)#切割 + #im.show() + box = (0,0,26,30) + im1 = im.crop(box)#.convert('L')#切割 + #im1.show() #第一个数字 + box = (26,0,41,30) + im2 = im.crop(box)#.convert('L')#切割 + #im2.show()# 运算符号 + box = (41,0,65,30) + im3 = im.crop(box)#.convert('L')#切割 + #im3.show()# 第二个数字 + #region.show() + #(101, 31) {'0-0-255': 16, '0-255-0': 58, '255-0-0': 95, '68-146-137': 2899, '255-255-255': 63}                                       + print im.size +# rgb_dic = {} + + # 二值化第一个数字 + min_j_1 = 30 + max_j_1 = 0 + for i in range(im1.size[0]): + for j in range(im1.size[1]): + r,g,b = im1.getpixel((i,j)) + if r == 255 and b != 255: + if min_j_1 > j: + min_j_1 = j + max_j_1 = j + im1.putpixel((i,j), (0,0,0)) + else: + im1.putpixel((i,j),(255,255,255)) + cat_num_box_1 = (0,min_j_1,26,min_j_1+10)# 未知错误用固定字高10替代 + im1 = im1.crop(cat_num_box_1) + print min_j_1,max_j_1,cat_num_box_1 + #im1.show() + im1_1 = im1.crop((5,0,13,10)) + im1_1.save("./temp/"+name+'_1_1.png') + im1_2 = im1.crop((14,0,22,10)) + im1_2.save("./temp/"+name+'_1_2.png') + + # 二值化运算符号 + min_j_2 = 30 + max_j_2 = 0 + #im2.show() + try: + for i in range(im2.size[0]): + for j in range(im2.size[1]-4): + r,g,b = im2.getpixel((i,j)) + if(Noise(im2,i,j)): + continue + + if r == 255 and b == 255: + if min_j_2 > j: + min_j_2 = j + max_j_2 = j + im2.putpixel((i,j), (0,0,0)) + else: + im2.putpixel((i,j),(255,255,255)) + #im2.show() + print min_j_2,max_j_2 + cat_num_box_2 = (0,min_j_2,14,min_j_2+10)#固定高度 + im2 = im2.crop(cat_num_box_2) + #im2.show() + im2.save("./temp/"+name+'_fuhao.png') + except Exception,e: + print e + + # # 二值化第二个数字 + min_j_3 = 30 + max_j_3 = 0 + #im3.show() + for i in range(im3.size[0]): + for j in range(im3.size[1]): + r,g,b = im3.getpixel((i,j)) + if g == 255 and b != 255: + if min_j_3 > j: + min_j_3 = j + max_j_3 = j + im3.putpixel((i,j), (0,0,0)) + else: + im3.putpixel((i,j),(255,255,255)) + cat_num_box_3 = (0,min_j_3,24,min_j_3+10)#固定高度 + im3 = im3.crop(cat_num_box_3) + #im3.show() + #im3.show() + print min_j_3,max_j_3,cat_num_box_3 + im3.save("./temp/"+name+'_2.png') + # im3_1 = im3.crop((5,0,13,10)) + # im3_1.save("./temp/"+name+'_3_1.png') + # im3_2 = im3.crop((14,0,22,10)) + # im3_2.save("./temp/"+name+'_3_2.png') + + +if __name__ == '__main__': + for i in ['11','12','13','14','15']: + #name = '11' + name = i + im = Image.open("./trins/"+name+'.png').convert('RGB')#重点要切换图片模式 + cat_img(im,name) diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/1.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/1.png" new file mode 100644 index 00000000..701cec1e Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/1.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/2.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/2.png" new file mode 100644 index 00000000..4a4d2d1e Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/2.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/21.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/21.png" new file mode 100644 index 00000000..cad24bc4 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/21.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/3.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/3.png" new file mode 100644 index 00000000..6f7c8a3c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/3.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/31.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/31.png" new file mode 100644 index 00000000..38626132 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/31.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/32.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/32.png" new file mode 100644 index 00000000..6f7c8a3c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/32.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/4.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/4.png" new file mode 100644 index 00000000..2bf56984 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/4.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/41.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/41.png" new file mode 100644 index 00000000..2bf56984 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/41.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/42.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/42.png" new file mode 100644 index 00000000..6beed4ae Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/42.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/5.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/5.png" new file mode 100644 index 00000000..2e5b2b0c Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/5.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/6.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/6.png" new file mode 100644 index 00000000..37780df5 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/6.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/61.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/61.png" new file mode 100644 index 00000000..37780df5 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/61.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/7.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/7.png" new file mode 100644 index 00000000..cf882209 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/7.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/71.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/71.png" new file mode 100644 index 00000000..cf882209 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/71.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/8.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/8.png" new file mode 100644 index 00000000..aedeef05 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/8.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/81.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/81.png" new file mode 100644 index 00000000..aedeef05 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/81.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/9.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/9.png" new file mode 100644 index 00000000..9f36fad3 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/9.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/91.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/91.png" new file mode 100644 index 00000000..d36666c1 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/91.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/temp1/92.png" "b/1.\351\252\214\350\257\201\347\240\201/temp1/92.png" new file mode 100644 index 00000000..9f36fad3 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/temp1/92.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/trins/11.png" "b/1.\351\252\214\350\257\201\347\240\201/trins/11.png" new file mode 100644 index 00000000..8174e0d2 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/trins/11.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/trins/12.png" "b/1.\351\252\214\350\257\201\347\240\201/trins/12.png" new file mode 100644 index 00000000..3f764260 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/trins/12.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/trins/13.png" "b/1.\351\252\214\350\257\201\347\240\201/trins/13.png" new file mode 100644 index 00000000..f109da33 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/trins/13.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/trins/14.png" "b/1.\351\252\214\350\257\201\347\240\201/trins/14.png" new file mode 100644 index 00000000..d63dee5a Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/trins/14.png" differ diff --git "a/1.\351\252\214\350\257\201\347\240\201/trins/15.png" "b/1.\351\252\214\350\257\201\347\240\201/trins/15.png" new file mode 100644 index 00000000..ebbc04a3 Binary files /dev/null and "b/1.\351\252\214\350\257\201\347\240\201/trins/15.png" differ diff --git "a/2.\344\273\243\347\220\206/xicidaili.py" "b/2.\344\273\243\347\220\206/xicidaili.py" index b9faf30f..2a09ed2b 100644 --- "a/2.\344\273\243\347\220\206/xicidaili.py" +++ "b/2.\344\273\243\347\220\206/xicidaili.py" @@ -94,7 +94,7 @@ def re_html_code(html_code,proxy_list_json): Remarks = 'ly' # `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` list_i = [PROXY_IP,PROXY_PORT,PROXY_COUNTRY,PROXY_TYPE,addtime,Last_test_time,proxy_status,Remarks] - + #print list_i proxy_list_json.append(list_i) print proxy_list_json