feat(WIP): basic ocr

oott123 · Oct 23, 2021 · d750cf7 · d750cf7
1 parent a01cf7e
commit d750cf7
Show file tree

Hide file tree

Showing 13 changed files with 371 additions and 6 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -2,3 +2,4 @@
 /node_modules
 /dist
 /Dockerfile
+/secrets
diff --git a/.gitignore b/.gitignore
@@ -35,3 +35,4 @@ lerna-debug.log*
 !.vscode/extensions.json
 
 /.env
+/secrets
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -6,3 +6,4 @@
 - [ ] 支持多条记录合并上下文搜索，应对说话喜欢换行的人
 - [ ] 为没有头像的人生成基于名字的默认头像
 - [ ] 配置消息队列分批大小和超时
+- [ ] 抓取链接归档，并进行索引（可能要新增一个搜索字段）
diff --git a/package.json b/package.json
@@ -21,6 +21,7 @@
     "test:e2e": "jest --config ./test/jest-e2e.json"
   },
   "dependencies": {
+    "@google-cloud/vision": "^2.4.0",
     "@nestjs/common": "^8.0.0",
     "@nestjs/config": "^1.0.2",
     "@nestjs/core": "^8.0.0",

diff --git a/src/app.module.ts b/src/app.module.ts
@@ -13,6 +13,8 @@ import httpConfig from './config/http.config'
 import authConfig from './config/auth.config'
 import { ServeStaticModule } from '@nestjs/serve-static'
 import { join } from 'path'
+import { OcrModule } from './ocr/ocr.module';
+import { QueueModule } from './queue/queue.module';
 import cacheConfig from './config/cache.config'
 import redisStore = require('cache-manager-ioredis')
 
@@ -47,6 +49,8 @@ import redisStore = require('cache-manager-ioredis')
     BotModule,
     UserModule,
     TokenModule,
+    OcrModule,
+    QueueModule,
   ],
   controllers: [AppController],
   providers: [AppService],

diff --git a/src/config/ocr.config.ts b/src/config/ocr.config.ts
@@ -0,0 +1,8 @@
+import { registerAs } from '@nestjs/config'
+
+export default registerAs('ocr', () => ({
+  enable: process.env.OCR_ENABLE === 'true',
+  driver: process.env.OCR_DRIVER || 'google',
+  endpoint: process.env.OCR_ENDPOINT,
+  credentials: process.env.OCR_CREDENTIALS,
+}))
diff --git a/src/config/queue.config.ts b/src/config/queue.config.ts
@@ -0,0 +1,6 @@
+import { registerAs } from '@nestjs/config'
+
+export default registerAs('queue', () => ({
+  enable: process.env.QUEUE_ENABLE === 'true',
+  amqpUrl: process.env.QUEUE_AMQP_URL || 'amqp://guest@localhost',
+}))
diff --git a/src/ocr/google-ocr.driver.spec.ts b/src/ocr/google-ocr.driver.spec.ts
@@ -0,0 +1,21 @@
+import { GoogleOCRDriver } from './google-ocr.driver'
+import { readFile } from 'fs/promises'
+
+let googleOcr: GoogleOCRDriver
+let image: Buffer
+
+beforeEach(async () => {
+  googleOcr = new GoogleOCRDriver()
+  await googleOcr.config('eu-vision.googleapis.com')
+  image = await readFile('docs/assets/search-ui.jpg')
+})
+
+test('simple ocr', async () => {
+  const result = await googleOcr.recognize(image)
+  const texts = result.map((x) => x.text).join('\n')
+  expect(texts).toContain('搜索界面')
+  expect(texts).toContain('Telegram')
+  expect(texts).toContain('Archive')
+  expect(texts).toContain('Server')
+  expect(texts).toContain('宣传图')
+})
diff --git a/src/ocr/google-ocr.driver.ts b/src/ocr/google-ocr.driver.ts
@@ -0,0 +1,20 @@
+import { OCRDriverInterface, OCRResponse } from './ocr-driver.interface'
+import { ImageAnnotatorClient } from '@google-cloud/vision'
+
+export class GoogleOCRDriver implements OCRDriverInterface {
+  private client!: ImageAnnotatorClient
+
+  async config(endpoint: string): Promise<void> {
+    this.client = new ImageAnnotatorClient({ apiEndpoint: endpoint })
+  }
+
+  public async recognize(image: Uint8Array): Promise<OCRResponse> {
+    const imgBuffer = image instanceof Buffer ? image : Buffer.from(image)
+    const detectResult = await this.client.textDetection(imgBuffer)
+    const { fullTextAnnotation } = detectResult[0]
+    if (!fullTextAnnotation?.text) {
+      return []
+    }
+    return [{ text: fullTextAnnotation.text }]
+  }
+}
diff --git a/src/ocr/ocr-driver.interface.ts b/src/ocr/ocr-driver.interface.ts
@@ -0,0 +1,11 @@
+export interface OCRDriverInterface {
+  config(endpoint: string, credentials: string): Promise<void>
+  recognize(image: Uint8Array): Promise<OCRResponse>
+}
+
+export type OCRResponse = Array<{
+  text: string
+  vertices?: Array<{ x: number; y: number }>
+  confidence?: number
+  rotation?: number
+}>
diff --git a/src/ocr/ocr.module.ts b/src/ocr/ocr.module.ts
@@ -0,0 +1,6 @@
+import { Module } from '@nestjs/common'
+
+@Module({
+  providers: [],
+})
+export class OcrModule {}
diff --git a/src/queue/queue.module.ts b/src/queue/queue.module.ts
@@ -0,0 +1,4 @@
+import { Module } from '@nestjs/common';
+
+@Module({})
+export class QueueModule {}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -35,3 +35,4 @@ lerna-debug.log*
		!.vscode/extensions.json

		/.env
		/secrets