-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathperspective_transform_by_qr.py
132 lines (109 loc) · 4.29 KB
/
perspective_transform_by_qr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from pathlib import Path
from itertools import permutations
import numpy as np
from pyzbar.pyzbar import decode
from PIL import Image, ImageDraw, ImageFont
from constants import DEBUG, DEBUG_IMAGE_DIR
def find_coeffs(pa, pb):
'''
Find coefficients for perspective transformation.
From http://stackoverflow.com/a/14178717/4414003.
'''
matrix = []
for p1, p2 in zip(pa, pb):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
A = np.matrix(matrix, dtype=float)
B = np.array(pb).reshape(8)
res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
return np.array(res).reshape(8)
def transform(startpoints, endpoints, im):
'''
Perform a perspective transformation on an image where startpoints are moved
to endpoints, and the image is stretched accordingly.
'''
# To try to keep the receipt inside the image during perspective
# transform, output a larger image than the input and shift the QR target
# towards the bigger image's center. This is effectively the same as
# making the input image bigger from its center.
scale = 0.5
width, height = im.size
endpoints = np.array(endpoints)
endpoints[:, 0] += int(width * scale / 2)
endpoints[:, 1] += int(height * scale / 2)
coeffs = find_coeffs(endpoints, startpoints)
# Increase the height slightly in case the rotation makes the receipt go
# outside of the original image size
im = im.transform(
(int(width * (1 + scale)), int(height * (1 + scale))),
Image.PERSPECTIVE,
coeffs,
Image.BICUBIC,
)
return im
def perspective_transform_by_qr(filename: Path):
"""
Straighten input image by the QR code detected in the image. This has two
huge advantages:
1. This helps to find the normal vector of the table instead of relying on
the four corners of the receipt. The latter is super unreliable since
receipts can be crumpled, not lying flat on the table, or have their
corners torn when ripping it off of the printer (or some of someone
else's receipt that got torn in this way)
2. Instead of developing a website or app to take a picture and sending it
to the server, I can simply take a photo with my normal camera app. These
photos are already automatically sent to my Nextcloud, and downloaded to
my computer. By running this software on my computer and listening for
new files in this folder, I can automatically analyze any new pictures
and abort if they don't contain the special "I'm a receipt, analyze me"
QR code.
"""
print('perspective_transform_by_qr', filename)
image = Image.open(filename)
if image.width > image.height:
image = image.rotate(-90, Image.NEAREST, expand=True)
if DEBUG:
image.save(DEBUG_IMAGE_DIR /
filename.name.replace('.jpg', '_01_rotated.jpg'))
code = next(code for code in decode(image) if code.data == b'ingest me\n')
debug_image = image.copy()
draw = ImageDraw.Draw(debug_image)
rect = code.rect
draw.rectangle(
(
(rect.left, rect.top),
(rect.left + rect.width, rect.top + rect.height)
),
outline='#0080ff',
width=10,
)
draw.polygon(code.polygon, outline='#e945ff', width=10)
left, top, width, height = code.rect
rect = (
(left, top),
(left, top + height),
(left + width, top),
(left + width, top + height),
)
polygon = min(permutations(code.polygon), key=lambda p: sum(
np.hypot(a[0] - b[0], a[1] - b[1]) for a, b in zip(p, rect)
))
font = ImageFont.truetype('Ubuntu-M.ttf', 200, encoding='unic')
for i, p in enumerate(polygon):
draw.text(p, str(i), fill='#a00000', font=font)
if DEBUG:
debug_image.save(DEBUG_IMAGE_DIR /
filename.name.replace('.jpg', '_02_qr.jpg'))
transformed = transform(
startpoints=polygon,
endpoints=rect,
im=image,
)
if DEBUG:
transform(
startpoints=polygon,
endpoints=rect,
im=debug_image,
).save(DEBUG_IMAGE_DIR /
filename.name.replace('.jpg', '_03_transformed.jpg'))
return transformed