-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcommon.py
131 lines (102 loc) · 4.6 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import cv2
import numpy as np
import imutils
import matplotlib.pyplot as plt
def imgShow(image):
plt.figure(figsize=(15,2))
plt.imshow(image, cmap="gray")
plt.show()
def grayImage(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return gray
def autoResize(image, size = (800, 100)):
gray = grayImage(image)
height, width = gray.shape
gray = cv2.resize(gray,(int(size[1]/height*width), size[1]))
gray = np.pad(gray, ((0,0),(0, size[0]-gray.shape[1])), 'maximum')
return gray
def orcPreprocess(image, default_fixed_size = (800, 100), default_filter_size = (10, 15), debug = False):
img = autoResize(image, default_fixed_size)
if len(img.shape) > 2:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img.copy()
if debug:
imgShow(gray)
blur = cv2.GaussianBlur(gray,(5,5),0)
th1 = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
# threshold the image using Otsu's thresholding method
th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# apply a distance transform which calculates the distance to the
# closest zero pixel for each pixel in the input image
dist = cv2.distanceTransform(th2, cv2.DIST_L2, 5)
# normalize the distance transform such that the distances lie in
# the range [0, 1] and then convert the distance transform back to
# an unsigned 8-bit integer in the range [0, 255]
dist = cv2.normalize(dist, dist, 0, 1.0, cv2.NORM_MINMAX)
dist = (dist * 255).astype("uint8")
# threshold the distance transform using Otsu's method
dist = cv2.threshold(dist, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if debug:
imgShow(dist)
# apply an "opening" morphological operation to disconnect components
# in the image
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 2))
opening = cv2.morphologyEx(dist, cv2.MORPH_OPEN, kernel)
if debug:
imgShow(opening)
# find contours in the opening image, then initialize the list of
# contours which belong to actual characters that we will be OCR'ing
cnts = cv2.findContours(opening.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
chars = []
# loop over the contours
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# check if contour is at least 35px wide and 100px tall, and if
# so, consider the contour a digit
if w >= default_filter_size[0] and h >= default_filter_size[1]:
chars.append(c)
if len(chars) == 0:
return np.zeros((), dtype = "uint8")
# compute the convex hull of the characters
chars = np.vstack([chars[i] for i in range(0, len(chars))])
hull = cv2.convexHull(chars)
# allocate memory for the convex hull mask, draw the convex hull on
# the image, and then enlarge it via a dilation
mask = np.zeros(img.shape[:2], dtype="uint8")
cv2.drawContours(mask, [hull], -1, 255, -1)
mask = cv2.dilate(mask, None, iterations=2)
# take the bitwise of the opening image and the mask to reveal *just*
# the characters in the image
final = cv2.bitwise_and(opening, opening, mask=mask)
if debug:
imgShow(final)
return final
CHAR_LIST = sorted("-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝàáâãèéêìíòóôõùúýĂăĐđĨĩŨũƠơƯưẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ")
# convert the words to array of indexs based on the char_list
def encodeText(txt):
# encoding each output word into digits of indexes
dig_lst = []
for index, char in enumerate(txt):
try:
dig_lst.append(CHAR_LIST.index(char))
except:
print("No found in char_list :", char)
return dig_lst
def decodeText(arr):
pred = ""
for p in arr:
if int(p) >= 0:
pred += CHAR_LIST[int(p)]
return pred
# import padding library
from tensorflow.keras.preprocessing.sequence import pad_sequences
def pad_listints(txt, max_label_len = 20):
return pad_sequences(txt, maxlen=max_label_len, padding='post', value = -1)
def pad_listint(txt, max_label_len = 20):
return pad_listints([txt], max_label_len)[0]
from Levenshtein import distance as lev
def calCER(inp1, inp2):
return lev(inp1, inp2)