From 49adcc1f9c78a30c96322a7bb98f14c755fff3a1 Mon Sep 17 00:00:00 2001 From: sjh Date: Thu, 28 Jul 2022 13:05:42 +0800 Subject: [PATCH] (feat): azure ocr --- README.md | 3 ++- api_module/main_api.py | 5 +++-- doc/ocr_help/azure.md | 21 +++++++++++++++++++++ ocr_module/azure/__init__.py | 0 ocr_module/azure/azure_ocr.py | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 doc/ocr_help/azure.md create mode 100644 ocr_module/azure/__init__.py create mode 100644 ocr_module/azure/azure_ocr.py diff --git a/README.md b/README.md index 2d7d839..9d93c66 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,8 @@ GithubAction(可选)添加新的secrets:ocr_type来指定识别类型 | 可选值 | 使用方法 | 支持版本 | | ----------- | ------------------------------------------------------------ | -------- | | baidu_image | [默认方法,需要到百度AI中申请](https://blog.pressed.top/2021/02/14/signUpBaiduOcr/) | 1.3.0 | -| tesseract | [本地ocr识别,Windows推荐,Linux需要安装配置](./doc/ocr_help/tesseract.md) | 1.3.1 | +| tesseract | [本地ocr识别,Windows推荐,Linux需要安装配置](./doc/ocr_help/tesseract.md) | 1.3.1 | +| azure | [需要微软账号申请](./doc/ocr_help/azure.md) | 1.3.2 | ## 可选消息推送 diff --git a/api_module/main_api.py b/api_module/main_api.py index dde80d7..b3f43a3 100644 --- a/api_module/main_api.py +++ b/api_module/main_api.py @@ -28,6 +28,7 @@ "Referer": "https://m.fjcyl.com/login" }) + def init_proxy(): logging.info("正在尝试使用代理IP") module = importlib.import_module("proxy_module.proxy_fetcher") @@ -55,7 +56,7 @@ def get_validate_code() -> str: try: # noinspection PyUnresolvedReferences res = ocrutil.img_ocr(base64.b64encode(resp.content)) - logging.info('获取验证码成功') + logging.info(f'获取验证码成功: {res}') return res except Exception as e: logging.warning(f'获取验证码失败,原因:{e}') @@ -96,7 +97,7 @@ def post_login(username: str, pwd: str, pub_key: str, code: str): } resp = sess.post(url="https://m.fjcyl.com/mobileNologin", - data=post_dict) + data=post_dict, timeout=5) if resp.status_code == requests.codes['ok']: if resp.json().get('success'): diff --git a/doc/ocr_help/azure.md b/doc/ocr_help/azure.md new file mode 100644 index 0000000..2ce3c1e --- /dev/null +++ b/doc/ocr_help/azure.md @@ -0,0 +1,21 @@ +# 微软Azure计算机视觉OCR + +## 依赖需求 + +由于Azure支持的图片大小不在范围内,因此需要Pillow缩放图片大小,需要在你的`requirements.txt`中加入`Pillow` + +## 步骤 + + 1. 需要注册微软账号 + 2. 登陆 https://azure.microsoft.com/ + 3. 在免费服务中开通**计算机视觉**服务 + 4. 填写表单,注意选择的区域,建议选择EastAsia + 5. 创建完成后进入该服务>资源管理>密钥和终结点,复制其中一个密钥和终结点,终结点去掉`https://`前缀 + 6. 填写config.json, ocr.ak 为终结点, ocr.sk 为其中一个密钥, 例: + ```json + "ocr": { + "sk": "hksight.cognitiveservices.azure.com", + "ak": "182391820319283019", + "type": "azure" + }, + ``` \ No newline at end of file diff --git a/ocr_module/azure/__init__.py b/ocr_module/azure/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ocr_module/azure/azure_ocr.py b/ocr_module/azure/azure_ocr.py new file mode 100644 index 0000000..431bea2 --- /dev/null +++ b/ocr_module/azure/azure_ocr.py @@ -0,0 +1,34 @@ +import base64 +from io import BytesIO +import requests +from PIL import Image + +_ENDPOINT = '' +_SECRET_KEY = '' + + +def set_keys(api_key, secret_key): + global _ENDPOINT, _SECRET_KEY + _ENDPOINT = api_key + _SECRET_KEY = secret_key + + +def is_need_keys() -> bool: + return True + + +def get_result(img: bytes) -> str: + img = base64.standard_b64decode(img) + buf = BytesIO() + Image.open(BytesIO(img)) \ + .resize((157,52)) \ + .save(buf, format="JPEG") + url = f"https://{_ENDPOINT}/vision/v3.2/ocr?language=en&detectOrientation=false" + resp = requests.post(url, headers={ + "Ocp-Apim-Subscription-Key": _SECRET_KEY, + "Content-type": "application/octet-stream" + }, data=buf.getvalue(), timeout=10) + body = resp.json() + if resp.status_code != 200: + raise Exception(f"识别失败: {body['error']['message']}") + return body["regions"][0]["lines"][0]["words"][0]["text"]