-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_extractor.py
34 lines (28 loc) · 1.03 KB
/
text_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from PIL import Image
import PIL.Image
from scan import scanner
import cv2
import numpy as np
import argparse
from pytesseract import image_to_string
import pytesseract
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required = True,
help = "Path to the image to be scanned")
args = vars(ap.parse_args())
# Resizing the scanned image according to the given dimensions
warped = scanner(args["image"])
imS = cv2.resize(warped, (1350, 1150))
#cv2.imshow("Output", imS)
cv2.imwrite('Output Image.PNG', imS)
cv2.waitKey(0)
#Extracting text, using the PIL library
#pytesseract.pytesseract.tesseract_cmd = '/usr/share/tesseract-ocr/4.00/tessdata'
#TESSDATA_PREFIX = '/usr/share/tesseract-ocr'
image = PIL.Image.open('Output Image.PNG').convert("RGB")
tessdata_dir_config = "/usr/share/tesseract-ocr/4.00/tessdata"
output = pytesseract.image_to_string(image, lang='eng', config=tessdata_dir_config)
text_file = open("output.txt", "w+")
text_file.write(output)
text_file.close()