From ae5d88479ee82a0bc54a7032e41fd3f934f645db Mon Sep 17 00:00:00 2001 From: oltodosel Date: Thu, 2 May 2019 03:56:51 +0300 Subject: [PATCH] inverting images for tesseract 4 --- src/vobsub2srt.c++ | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/vobsub2srt.c++ b/src/vobsub2srt.c++ index 2f9e1a0..b9c131c 100644 --- a/src/vobsub2srt.c++ +++ b/src/vobsub2srt.c++ @@ -265,6 +265,25 @@ int main(int argc, char **argv) { << start_pts << ")\n"; } + + // While tesseract version 3.05 (and older) handle inverted image (dark background and light text) without problem, for 4.x version use dark text on light background. + // https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#inverting-images + + bool inverting_images = true; + + if (inverting_images) { + int size_r = width * height; + unsigned char* image_rev = new unsigned char[size_r]; + for (int i = 0; i < size_r; i++) + { + int val = static_cast(image[i]); + unsigned char cz = (255 - val); + image_rev[i] = cz; + } + + image = image_rev; + } + if(dump_images) { dump_pgm(subname, sub_counter, width, height, stride, image, image_size); }