Skip to content

Commit

Permalink
feat(WIP): basic ocr
Browse files Browse the repository at this point in the history
  • Loading branch information
oott123 committed Oct 23, 2021
1 parent a01cf7e commit d750cf7
Show file tree
Hide file tree
Showing 13 changed files with 371 additions and 6 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/node_modules
/dist
/Dockerfile
/secrets
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ lerna-debug.log*
!.vscode/extensions.json

/.env
/secrets
1 change: 1 addition & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
- [ ] 支持多条记录合并上下文搜索,应对说话喜欢换行的人
- [ ] 为没有头像的人生成基于名字的默认头像
- [ ] 配置消息队列分批大小和超时
- [ ] 抓取链接归档,并进行索引(可能要新增一个搜索字段)
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"test:e2e": "jest --config ./test/jest-e2e.json"
},
"dependencies": {
"@google-cloud/vision": "^2.4.0",
"@nestjs/common": "^8.0.0",
"@nestjs/config": "^1.0.2",
"@nestjs/core": "^8.0.0",
Expand Down
4 changes: 4 additions & 0 deletions src/app.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import httpConfig from './config/http.config'
import authConfig from './config/auth.config'
import { ServeStaticModule } from '@nestjs/serve-static'
import { join } from 'path'
import { OcrModule } from './ocr/ocr.module';
import { QueueModule } from './queue/queue.module';
import cacheConfig from './config/cache.config'
import redisStore = require('cache-manager-ioredis')

Expand Down Expand Up @@ -47,6 +49,8 @@ import redisStore = require('cache-manager-ioredis')
BotModule,
UserModule,
TokenModule,
OcrModule,
QueueModule,
],
controllers: [AppController],
providers: [AppService],
Expand Down
8 changes: 8 additions & 0 deletions src/config/ocr.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { registerAs } from '@nestjs/config'

export default registerAs('ocr', () => ({
enable: process.env.OCR_ENABLE === 'true',
driver: process.env.OCR_DRIVER || 'google',
endpoint: process.env.OCR_ENDPOINT,
credentials: process.env.OCR_CREDENTIALS,
}))
6 changes: 6 additions & 0 deletions src/config/queue.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { registerAs } from '@nestjs/config'

export default registerAs('queue', () => ({
enable: process.env.QUEUE_ENABLE === 'true',
amqpUrl: process.env.QUEUE_AMQP_URL || 'amqp://guest@localhost',
}))
21 changes: 21 additions & 0 deletions src/ocr/google-ocr.driver.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { GoogleOCRDriver } from './google-ocr.driver'
import { readFile } from 'fs/promises'

let googleOcr: GoogleOCRDriver
let image: Buffer

beforeEach(async () => {
googleOcr = new GoogleOCRDriver()
await googleOcr.config('eu-vision.googleapis.com')
image = await readFile('docs/assets/search-ui.jpg')
})

test('simple ocr', async () => {
const result = await googleOcr.recognize(image)
const texts = result.map((x) => x.text).join('\n')
expect(texts).toContain('搜索界面')
expect(texts).toContain('Telegram')
expect(texts).toContain('Archive')
expect(texts).toContain('Server')
expect(texts).toContain('宣传图')
})
20 changes: 20 additions & 0 deletions src/ocr/google-ocr.driver.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { OCRDriverInterface, OCRResponse } from './ocr-driver.interface'
import { ImageAnnotatorClient } from '@google-cloud/vision'

export class GoogleOCRDriver implements OCRDriverInterface {
private client!: ImageAnnotatorClient

async config(endpoint: string): Promise<void> {
this.client = new ImageAnnotatorClient({ apiEndpoint: endpoint })
}

public async recognize(image: Uint8Array): Promise<OCRResponse> {
const imgBuffer = image instanceof Buffer ? image : Buffer.from(image)
const detectResult = await this.client.textDetection(imgBuffer)
const { fullTextAnnotation } = detectResult[0]
if (!fullTextAnnotation?.text) {
return []
}
return [{ text: fullTextAnnotation.text }]
}
}
11 changes: 11 additions & 0 deletions src/ocr/ocr-driver.interface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
export interface OCRDriverInterface {
config(endpoint: string, credentials: string): Promise<void>
recognize(image: Uint8Array): Promise<OCRResponse>
}

export type OCRResponse = Array<{
text: string
vertices?: Array<{ x: number; y: number }>
confidence?: number
rotation?: number
}>
6 changes: 6 additions & 0 deletions src/ocr/ocr.module.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { Module } from '@nestjs/common'

@Module({
providers: [],
})
export class OcrModule {}
4 changes: 4 additions & 0 deletions src/queue/queue.module.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { Module } from '@nestjs/common';

@Module({})
export class QueueModule {}
Loading

0 comments on commit d750cf7

Please sign in to comment.