Merge pull request #93 from 838239178/develop

(feat): 新增 azure ocr
saddit · Jul 28, 2022 · 5a5efff · 5a5efff
2 parents 2948fa0 + 49adcc1
commit 5a5efff
Show file tree

Hide file tree

Showing 5 changed files with 60 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -109,7 +109,8 @@ GithubAction（可选）添加新的secrets:ocr_type来指定识别类型
 | 可选值      | 使用方法                                                     | 支持版本 |
 | ----------- | ------------------------------------------------------------ | -------- |
 | baidu_image | [默认方法,需要到百度AI中申请](https://blog.pressed.top/2021/02/14/signUpBaiduOcr/) | 1.3.0    |
-| tesseract   | [本地ocr识别,Windows推荐,Linux需要安装配置](./doc/ocr_help/tesseract.md)                   | 1.3.1    |
+| tesseract   | [本地ocr识别,Windows推荐,Linux需要安装配置](./doc/ocr_help/tesseract.md) | 1.3.1    |
+| azure       | [需要微软账号申请](./doc/ocr_help/azure.md)                  | 1.3.2    |
 
 ## 可选消息推送
 

diff --git a/api_module/main_api.py b/api_module/main_api.py
@@ -28,6 +28,7 @@
     "Referer": "https://m.fjcyl.com/login"
 })
 
+
 def init_proxy():
     logging.info("正在尝试使用代理IP")
     module = importlib.import_module("proxy_module.proxy_fetcher")
@@ -55,7 +56,7 @@ def get_validate_code() -> str:
         try:
             # noinspection PyUnresolvedReferences
             res = ocrutil.img_ocr(base64.b64encode(resp.content))
-            logging.info('获取验证码成功')
+            logging.info(f'获取验证码成功: {res}')
             return res
         except Exception as e:
             logging.warning(f'获取验证码失败，原因:{e}')
@@ -96,7 +97,7 @@ def post_login(username: str, pwd: str, pub_key: str, code: str):
     }
 
     resp = sess.post(url="https://m.fjcyl.com/mobileNologin",
-                     data=post_dict)
+                     data=post_dict, timeout=5)
 
     if resp.status_code == requests.codes['ok']:
         if resp.json().get('success'):

diff --git a/doc/ocr_help/azure.md b/doc/ocr_help/azure.md
@@ -0,0 +1,21 @@
+# 微软Azure计算机视觉OCR
+
+## 依赖需求
+
+由于Azure支持的图片大小不在范围内，因此需要Pillow缩放图片大小，需要在你的`requirements.txt`中加入`Pillow` 
+
+## 步骤
+
+ 1. 需要注册微软账号
+ 2. 登陆 https://azure.microsoft.com/
+ 3. 在免费服务中开通**计算机视觉**服务
+ 4. 填写表单，注意选择的区域，建议选择EastAsia
+ 5. 创建完成后进入该服务>资源管理>密钥和终结点，复制其中一个密钥和终结点，终结点去掉`https://`前缀
+ 6. 填写config.json, ocr.ak 为终结点, ocr.sk 为其中一个密钥, 例:
+    ```json
+    "ocr": {
+        "sk": "hksight.cognitiveservices.azure.com",
+        "ak": "182391820319283019",
+        "type": "azure"
+    },
+    ```
diff --git a/ocr_module/azure/__init__.py b/ocr_module/azure/__init__.py
diff --git a/ocr_module/azure/azure_ocr.py b/ocr_module/azure/azure_ocr.py
@@ -0,0 +1,34 @@
+import base64
+from io import BytesIO
+import requests
+from PIL import Image
+
+_ENDPOINT = ''
+_SECRET_KEY = ''
+
+
+def set_keys(api_key, secret_key):
+    global _ENDPOINT, _SECRET_KEY
+    _ENDPOINT = api_key
+    _SECRET_KEY = secret_key
+
+
+def is_need_keys() -> bool:
+    return True
+
+
+def get_result(img: bytes) -> str:
+    img = base64.standard_b64decode(img)
+    buf = BytesIO()
+    Image.open(BytesIO(img)) \
+        .resize((157,52)) \
+        .save(buf, format="JPEG")
+    url = f"https://{_ENDPOINT}/vision/v3.2/ocr?language=en&detectOrientation=false"
+    resp = requests.post(url, headers={
+        "Ocp-Apim-Subscription-Key": _SECRET_KEY,
+        "Content-type": "application/octet-stream"
+    }, data=buf.getvalue(), timeout=10)
+    body = resp.json()
+    if resp.status_code != 200:
+        raise Exception(f"识别失败: {body['error']['message']}")
+    return body["regions"][0]["lines"][0]["words"][0]["text"]