diff --git a/process_image.py b/process_image.py index 545b84c..18dba63 100644 --- a/process_image.py +++ b/process_image.py @@ -45,7 +45,7 @@ def get_total_advanced(data, text, ocrimg, filename: Path): def get_total(text: str, *args, **kwargs): for total in re.finditer( r'\b(?:tota[li]|kreditkarte|total amount|[mh]astercard)' - r'[\sa-z]*(?:ca[ds]{1,2}|)[\s\$\'§=:]*([0-9\., :]+)', + r'[\sa-z«]*(?:\d[ .]{3,}|)(?:ca[ds]{1,2}|)[\s\$\'§=:]*([0-9\., :]+)', text, re.IGNORECASE, ): @@ -198,7 +198,7 @@ def get_paid_by(text: str): last_four_digits: str = env('CARD_LAST_FOUR_DIGITS') # Eights sometimes come across as B in OCR last_four_digits = re.sub('[38]', '[38B]', last_four_digits) - if re.search(last_four_digits, text): + if re.search(f'card number.*{last_four_digits}', text, re.IGNORECASE): return 'Marcel' return 'Federica' @@ -232,6 +232,7 @@ def process_image(filename: Path): # %% # TODO assert str(filename).endswith('.jpg') + print('process_image', filename) # TODO: It's not really orig anylonger orig = perspective_transform_by_qr(filename) @@ -280,7 +281,6 @@ def process_image(filename: Path): # output, # ) - receiptCnt = None receipt = None # loop over the contours @@ -288,7 +288,7 @@ def process_image(filename: Path): # approximate the contour # https://docs.opencv.org/3.4/dd/d49/tutorial_py_contour_features.html peri = cv2.arcLength(c, True) - approx = cv2.approxPolyDP(c, 0.003 * peri, True) + approx = cv2.approxPolyDP(c, 0.004 * peri, True) # if our approximated contour has four points, then we can # assume we have found the outline of the receipt if len(approx) > 6: @@ -394,6 +394,7 @@ def length(line): paid_by = get_paid_by(text=text) store = get_store(text=text) + print() print('Store:', store) print('Paid by:', paid_by) print('Date:', date) @@ -429,24 +430,12 @@ def length(line): headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:97.0) ' 'Gecko/20100101 Firefox/97.0', - 'Accept': 'application/json, text/javascript, */*; q=0.01', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate, br', + # 'Accept': 'application/json, text/javascript, */*; q=0.01', + # 'Accept-Language': 'en-US,en;q=0.5', + # 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://secure.splitwise.com/', 'X-CSRF-Token': env('CSRF_TOKEN'), - # 'Content-Type': 'application/x-www-form-urlencoded', - - # 'Content-Type': 'multipart/form-data', - # 'X-Requested-With': 'XMLHttpRequest', - # 'Origin': 'https://secure.splitwise.com', - # 'DNT': '1', - # 'Connection': 'keep-alive', 'Cookie': env('COOKIE'), - # 'Sec-Fetch-Dest': 'empty', - # 'Sec-Fetch-Mode': 'cors', - # 'Sec-Fetch-Site': 'same-origin', - # 'Pragma': 'no-cache', - # 'Cache-Control': 'no-cache', }, data=data, files={'receipt':