-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
204 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,3 +36,4 @@ lerna-debug.log* | |
|
||
/.env | ||
/secrets | ||
/.envrc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { AzureOCRService } from './azure-ocr.service' | ||
import { readFile } from 'fs/promises' | ||
|
||
let azureOCR: AzureOCRService | ||
let image: Buffer | ||
|
||
beforeEach(async () => { | ||
azureOCR = new AzureOCRService({ | ||
enable: true, | ||
driver: 'azure', | ||
endpoint: process.env.AZURE_ENDPOINT, | ||
credentials: process.env.AZURE_CREDENTIALS, | ||
}) | ||
image = await readFile('docs/assets/search-ui.jpg') | ||
}) | ||
|
||
test('simple ocr', async () => { | ||
const result = await azureOCR.recognize(image) | ||
const texts = result.map((x) => x.text).join('\n') | ||
expect(texts).toContain('搜索界面') | ||
expect(texts).toContain('Telegram') | ||
expect(texts).toContain('Archive') | ||
expect(texts).toContain('Server') | ||
expect(texts).toContain('宣传图') | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import type { OCRService, OCRResponse } from './ocr.service' | ||
import { Inject, Injectable } from '@nestjs/common' | ||
import ocrConfig from '../config/ocr.config' | ||
import { ConfigType } from '@nestjs/config' | ||
import Debug from 'debug' | ||
import { ComputerVisionClient } from '@azure/cognitiveservices-computervision' | ||
import { CognitiveServicesCredentials } from '@azure/ms-rest-azure-js' | ||
|
||
const debug = Debug('app:ocr:azure') | ||
|
||
@Injectable() | ||
export class AzureOCRService implements OCRService { | ||
private client: ComputerVisionClient | ||
|
||
public constructor( | ||
@Inject(ocrConfig.KEY) ocrCfg: ConfigType<typeof ocrConfig>, | ||
) { | ||
const credentials = new CognitiveServicesCredentials(ocrCfg.credentials!) | ||
const client = new ComputerVisionClient(credentials, ocrCfg.endpoint!) | ||
this.client = client | ||
|
||
debug('init azure vision') | ||
} | ||
|
||
public async recognize(image: Uint8Array): Promise<OCRResponse> { | ||
const imgBuffer = image instanceof Buffer ? image : Buffer.from(image) | ||
|
||
debug('uploading file to azure vision') | ||
const request = await this.client.readInStream(imgBuffer, { | ||
readingOrder: 'natural', | ||
}) | ||
|
||
const results = await (async () => { | ||
const totalTimes = 30 | ||
for (let i = 0; i < totalTimes; i++) { | ||
await new Promise((r) => setTimeout(r, 500)) | ||
|
||
const result = await this.client.getReadResult( | ||
request._response.parsedHeaders['apim-request-id'], | ||
) | ||
if (result._response.parsedBody.status === 'running') { | ||
debug(`task running ${i + 1}/${totalTimes}...`) | ||
continue | ||
} | ||
|
||
if (result._response.parsedBody.status === 'succeeded') { | ||
debug('task success') | ||
return result._response.parsedBody.analyzeResult?.readResults | ||
} | ||
|
||
throw new Error('failed to recognize') | ||
} | ||
throw new Error('task timeout') | ||
})() | ||
|
||
const textParts = [] as OCRResponse | ||
|
||
if (results) { | ||
for (const page of results) { | ||
for (const line of page.lines) { | ||
textParts.push({ | ||
text: line.text, | ||
vertices: chunk(line.boundingBox, 2).map(([x, y]) => ({ x, y })), | ||
}) | ||
} | ||
} | ||
} | ||
|
||
return textParts | ||
} | ||
} | ||
|
||
function chunk<T>(array: T[], size): T[][] { | ||
const chunkedArray = [] as T[][] | ||
for (let i = 0; i < array.length; i += size) { | ||
chunkedArray.push(array.slice(i, i + size)) | ||
} | ||
return chunkedArray | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,6 +56,61 @@ | |
ora "5.4.1" | ||
rxjs "6.6.7" | ||
|
||
"@azure/abort-controller@^2.0.0": | ||
version "2.1.1" | ||
resolved "https://registry.yarnpkg.com/@azure/abort-controller/-/abort-controller-2.1.1.tgz#ad4a964ce50a1eaed70ed2d2ef77c8de5708d10b" | ||
integrity sha512-NhzeNm5zu2fPlwGXPUjzsRCRuPx5demaZyNcyNYJDqpa/Sbxzvo/RYt9IwUaAOnDW5+r7J9UOE6f22TQnb9nhQ== | ||
dependencies: | ||
tslib "^2.6.2" | ||
|
||
"@azure/cognitiveservices-computervision@^8.2.0": | ||
version "8.2.0" | ||
resolved "https://registry.yarnpkg.com/@azure/cognitiveservices-computervision/-/cognitiveservices-computervision-8.2.0.tgz#1ee5fb516dfd3c65edd4632faf47dd2ca8a59e3f" | ||
integrity sha512-wcl9vbZrenrMStRvSCY6cA2ZkRoQpDwJNLOnRtQyNJKZ4uNrfpDfBTp+fPS+BdPn7PHA+kMckvXTABqU8OoS/w== | ||
dependencies: | ||
"@azure/ms-rest-js" "^2.0.4" | ||
tslib "^1.10.0" | ||
|
||
"@azure/core-auth@^1.1.4": | ||
version "1.7.1" | ||
resolved "https://registry.yarnpkg.com/@azure/core-auth/-/core-auth-1.7.1.tgz#ca75bc663b6463602fb10471db60f09368a1a3d2" | ||
integrity sha512-dyeQwvgthqs/SlPVQbZQetpslXceHd4i5a7M/7z/lGEAVwnSluabnQOjF2/dk/hhWgMISusv1Ytp4mQ8JNy62A== | ||
dependencies: | ||
"@azure/abort-controller" "^2.0.0" | ||
"@azure/core-util" "^1.1.0" | ||
tslib "^2.6.2" | ||
|
||
"@azure/core-util@^1.1.0": | ||
version "1.8.1" | ||
resolved "https://registry.yarnpkg.com/@azure/core-util/-/core-util-1.8.1.tgz#4a14ddb338dc1acf2ea7628b5b1cccdb5b6fbfbf" | ||
integrity sha512-L3voj0StUdJ+YKomvwnTv7gHzguJO+a6h30pmmZdRprJCM+RJlGMPxzuh4R7lhQu1jNmEtaHX5wvTgWLDAmbGQ== | ||
dependencies: | ||
"@azure/abort-controller" "^2.0.0" | ||
tslib "^2.6.2" | ||
|
||
"@azure/ms-rest-azure-js@^2.1.0": | ||
version "2.1.0" | ||
resolved "https://registry.yarnpkg.com/@azure/ms-rest-azure-js/-/ms-rest-azure-js-2.1.0.tgz#8c90b31468aeca3146b06c7144b386fd4827f64c" | ||
integrity sha512-CjZjB8apvXl5h97Ck6SbeeCmU0sk56YPozPtTyGudPp1RGoHXNjFNtoOvwOG76EdpmMpxbK10DqcygI16Lu60Q== | ||
dependencies: | ||
"@azure/core-auth" "^1.1.4" | ||
"@azure/ms-rest-js" "^2.2.0" | ||
tslib "^1.10.0" | ||
|
||
"@azure/ms-rest-js@^2.0.4", "@azure/ms-rest-js@^2.2.0": | ||
version "2.7.0" | ||
resolved "https://registry.yarnpkg.com/@azure/ms-rest-js/-/ms-rest-js-2.7.0.tgz#8639065577ffdf4946951e1d246334ebfd72d537" | ||
integrity sha512-ngbzWbqF+NmztDOpLBVDxYM+XLcUj7nKhxGbSU9WtIsXfRB//cf2ZbAG5HkOrhU9/wd/ORRB6lM/d69RKVjiyA== | ||
dependencies: | ||
"@azure/core-auth" "^1.1.4" | ||
abort-controller "^3.0.0" | ||
form-data "^2.5.0" | ||
node-fetch "^2.6.7" | ||
tslib "^1.10.0" | ||
tunnel "0.0.6" | ||
uuid "^8.3.2" | ||
xml2js "^0.5.0" | ||
|
||
"@babel/[email protected]": | ||
version "7.12.11" | ||
resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.11.tgz#f4ad435aa263db935b8f10f2c552d23fb716a63f" | ||
|
@@ -1978,7 +2033,7 @@ colors@^1.1.2: | |
resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78" | ||
integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA== | ||
|
||
combined-stream@^1.0.8: | ||
combined-stream@^1.0.6, combined-stream@^1.0.8: | ||
version "1.0.8" | ||
resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" | ||
integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg== | ||
|
@@ -2932,6 +2987,15 @@ [email protected]: | |
semver "^7.3.2" | ||
tapable "^1.0.0" | ||
|
||
form-data@^2.5.0: | ||
version "2.5.1" | ||
resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.5.1.tgz#f2cbec57b5e59e23716e128fe44d4e5dd23895f4" | ||
integrity sha512-m21N3WOmEEURgk6B9GLOE4RuWOFf28Lhh9qGYeNlGq4VDXUlJy2th2slBNU8Gp8EzloYZOibZJ7t5ecIrFSjVA== | ||
dependencies: | ||
asynckit "^0.4.0" | ||
combined-stream "^1.0.6" | ||
mime-types "^2.1.12" | ||
|
||
form-data@^3.0.0: | ||
version "3.0.1" | ||
resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f" | ||
|
@@ -4660,7 +4724,7 @@ node-fetch@2, node-fetch@^2.6.1: | |
dependencies: | ||
whatwg-url "^5.0.0" | ||
|
||
node-fetch@^2.6.12: | ||
node-fetch@^2.6.12, node-fetch@^2.6.7: | ||
version "2.7.0" | ||
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" | ||
integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== | ||
|
@@ -5336,6 +5400,11 @@ safe-regex2@^2.0.0: | |
resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" | ||
integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== | ||
|
||
sax@>=0.6.0: | ||
version "1.3.0" | ||
resolved "https://registry.yarnpkg.com/sax/-/sax-1.3.0.tgz#a5dbe77db3be05c9d1ee7785dbd3ea9de51593d0" | ||
integrity sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA== | ||
|
||
saxes@^5.0.1: | ||
version "5.0.1" | ||
resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d" | ||
|
@@ -5925,11 +5994,16 @@ [email protected]: | |
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.1.tgz#e8a335add5ceae51aa261d32a490158ef042ef01" | ||
integrity sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw== | ||
|
||
tslib@^1.14.1, tslib@^1.8.1, tslib@^1.9.0: | ||
tslib@^1.10.0, tslib@^1.14.1, tslib@^1.8.1, tslib@^1.9.0: | ||
version "1.14.1" | ||
resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" | ||
integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== | ||
|
||
tslib@^2.6.2: | ||
version "2.6.2" | ||
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" | ||
integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== | ||
|
||
tslib@~2.1.0: | ||
version "2.1.0" | ||
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a" | ||
|
@@ -5942,6 +6016,11 @@ tsutils@^3.21.0: | |
dependencies: | ||
tslib "^1.8.1" | ||
|
||
[email protected]: | ||
version "0.0.6" | ||
resolved "https://registry.yarnpkg.com/tunnel/-/tunnel-0.0.6.tgz#72f1314b34a5b192db012324df2cc587ca47f92c" | ||
integrity sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg== | ||
|
||
type-check@^0.4.0, type-check@~0.4.0: | ||
version "0.4.0" | ||
resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1" | ||
|
@@ -6275,6 +6354,19 @@ xml-name-validator@^3.0.0: | |
resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a" | ||
integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw== | ||
|
||
xml2js@^0.5.0: | ||
version "0.5.0" | ||
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.5.0.tgz#d9440631fbb2ed800203fad106f2724f62c493b7" | ||
integrity sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA== | ||
dependencies: | ||
sax ">=0.6.0" | ||
xmlbuilder "~11.0.0" | ||
|
||
xmlbuilder@~11.0.0: | ||
version "11.0.1" | ||
resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3" | ||
integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA== | ||
|
||
xmlchars@^2.2.0: | ||
version "2.2.0" | ||
resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb" | ||
|