feat: paddle ocr web; upgrade nodejs

oott123 · Sep 9, 2023 · eb3bba0 · eb3bba0
1 parent 00bd130
commit eb3bba0
Show file tree

Hide file tree

Showing 8 changed files with 75 additions and 7 deletions.
diff --git a/.env.example b/.env.example
@@ -68,17 +68,17 @@ CACHE_REDIS_KEY_PREFIX=tas_
 # Enable OCR, if OCR is not enabled then texts in image will not be searchable
 OCR_ENABLE=false
 
-# OCR Driver, 'google' | 'custom' | string, string is the driver module installed from npm
+# OCR Driver, 'google' | 'paddle-ocr-web'
 OCR_DRIVER=google
 
 # OCR Enpoint
 # for google, use 'eu-vision.googleapis.com' or 'us-vision.googleapis.com'
-# for custom, use full base url of your endpoint, like 'https://www.example.com/api/v1/ocr'
+# for paddle-ocr-web, use http://localhost:1234/api
 OCR_ENDPOINT=eu-vision.googleapis.com
 
 # OCR Credentials
 # for google, this will be ignored, you should set GOOGLE_APPLICATION_CREDENTIALS below
-# for custom, use the bearer token of your endpoint
+# for paddle-ocr-web, this will be ignored
 OCR_CREDENTIALS=
 
 ## Queue Config ##

diff --git a/.nvmrc b/.nvmrc
@@ -0,0 +1 @@
+18
diff --git a/Dockerfile b/Dockerfile
@@ -1,11 +1,11 @@
-FROM node:14 AS builder
+FROM node:18 AS builder
 WORKDIR /app
 COPY package.json yarn.lock /app/
 RUN yarn
 COPY . /app
 RUN yarn build && yarn --production
 
-FROM gcr.io/distroless/nodejs:14
+FROM gcr.io/distroless/nodejs:18
 WORKDIR /app
 COPY --from=builder /app/dist /app/dist
 COPY --from=builder /app/node_modules /app/node_modules

diff --git a/README.md b/README.md
@@ -66,7 +66,7 @@ docker run -d --restart=always --env-file=.env quay.io/oott123/telegram-archive-
 如果没有 Docker 或者不想用 Docker，也可以从源码编译部署。此时你还需要：
 
 - git
-- node 14
+- node 18
 
 ```bash
 git clone https://github.com/oott123/telegram-archive-server.git
@@ -144,6 +144,15 @@ OCR_ENDPOINT=eu-vision.googleapis.com # 或者 us-vision.googleapis.com ，决
 GOOGLE_APPLICATION_CREDENTIALS=/path/to/google/credentials.json # 从 GCP 后台下载的 json 鉴权文件
 ```
 
+##### PaddleOCR
+
+你需要一个 [paddleocr-web](https://github.com/lilydjwg/paddleocr-web) 实例。配置如下：
+
+```bash
+OCR_DRIVER=paddle-ocr-web
+OCR_ENDPOINT=http://127.0.0.1:8980/api
+```
+
 #### 启动不同角色
 
 ```bash

diff --git a/src/ocr/ocr.module.ts b/src/ocr/ocr.module.ts
@@ -4,6 +4,7 @@ import { ModuleRef } from '@nestjs/core'
 import ocrConfig from 'src/config/ocr.config'
 import { GoogleOCRService } from './google-ocr.service'
 import { OCRService } from './ocr.service'
+import { PaddleOCRWebService } from './paddle-ocr-web.service'
 
 @Module({
   providers: [
@@ -18,6 +19,8 @@ import { OCRService } from './ocr.service'
         }
         if (ocrCfg.driver === 'google') {
           return moduleRef.create(GoogleOCRService)
+        } else if (ocrCfg.driver === 'paddle-ocr-web') {
+          return moduleRef.create(PaddleOCRWebService)
         }
 
         try {

diff --git a/src/ocr/paddle-ocr-web.service.ts b/src/ocr/paddle-ocr-web.service.ts
@@ -0,0 +1,54 @@
+import { OCRService, OCRResponse } from './ocr.service'
+import { Inject, Injectable } from '@nestjs/common'
+import ocrConfig from 'src/config/ocr.config'
+import { ConfigType } from '@nestjs/config'
+import Debug from 'debug'
+
+const debug = Debug('app:ocr:paddle-ocr-web')
+
+@Injectable()
+export class PaddleOCRWebService implements OCRService {
+  private endpoint: string
+
+  public constructor(
+    @Inject(ocrConfig.KEY) ocrCfg: ConfigType<typeof ocrConfig>,
+  ) {
+    this.endpoint = ocrCfg.endpoint!
+    debug('init paddle-ocr-web with endpoint', this.endpoint)
+  }
+
+  public async recognize(image: Uint8Array): Promise<OCRResponse> {
+    const imgBuffer = image instanceof Buffer ? image : Buffer.from(image)
+    const imgBlob = new Blob([imgBuffer])
+
+    const form = new FormData()
+    form.append('lang', 'zh-Hans')
+    form.append('file', imgBlob)
+
+    debug('uploading file to paddle-ocr-web')
+    const res = await (
+      await fetch(this.endpoint, {
+        method: 'POST',
+        body: form,
+      })
+    ).json()
+
+    debug('paddle-ocr-web response', res?.result)
+    const textParts = [] as OCRResponse
+
+    if (Array.isArray(res.result)) {
+      for (const item of res.result) {
+        textParts.push({
+          text: item[1][0],
+          vertices: item[0].map((v: [number, number]) => ({
+            x: v[0],
+            y: v[1],
+          })),
+          confidence: item[1][1],
+        })
+      }
+    }
+
+    return textParts
+  }
+}
diff --git a/src/search/index.service.ts b/src/search/index.service.ts
@@ -62,7 +62,7 @@ export class IndexService implements OnModuleDestroy {
   }
 
   public queueMessage(message: MessageIndex) {
-    debug('adding message to queue')
+    debug('adding message to queue', message)
     this.messagesQueue.push(message)
 
     this.writeToCache().catch(console.error)

diff --git a/src/search/meili-search.service.ts b/src/search/meili-search.service.ts
@@ -92,6 +92,7 @@ export class MeiliSearchService {
   }
 
   public async importMessages(messages: MessageIndex[]): Promise<void> {
+    debug('importing messages', messages)
     await this.messagesIndex.addDocuments(messages)
   }
-Original file line number
+Diff line change
@@ Expand Up / @@ -92,6 +92,7 @@ export class MeiliSearchService { @@
       }
       public async importMessages(messages: MessageIndex[]): Promise<void> {
+        debug('importing messages', messages)
         await this.messagesIndex.addDocuments(messages)
       }
@@ Expand Down @@