diff --git a/src/invoice2data/extract/templates/com/com.eur.aliexpress.json b/src/invoice2data/extract/templates/com/com.eur.aliexpress.json new file mode 100644 index 00000000..9e20b486 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.eur.aliexpress.json @@ -0,0 +1,60 @@ +{ + "issuer": "Alibaba", + "fields": { + "amount": "Amount paid\\s+\\d{1,4}.\\d{2}\\s+(\\d{1,4}.\\d{2})", + "amount_untaxed": "\\s{4}Total\\s+(\\d+.\\d{2})", + "date": { + "parser": "regex", + "regex": "Invoice Date . (\\d{4}-\\d.-\\d.)", + "type": "date" + }, + "invoice_number": "Invoice No.? . ([A-Z]{2}\\d+)", + "static_vat": "IM5280002556" + }, + "lines": { + "start": "Amount.In.+[)]", + "end": "Grant Total", + "first_line": [ + "(?P(\\w+(?:\\S|[ ]\\w\\w+|\\n)*))\\s+(?P\\S)\\s+(?P\\d+.\\d{2})\\s+(?P\\d+.\\d{2})\\s+(?P\\d{2}).\\s+(?P\\d+.\\d{2})\\s+\\s+(?P\\d+.\\d{2})\\s+(?P\\d+.\\d{2})", + "(?POrder Number.\\s+(\\d+))" + ], + "line": "^(?P\\w+(?:\\S|[ ]\\w\\w+|\\n)*)$", + "types": { + "qty": "float", + "price_unit": "float", + "discount": "float", + "line_tax_percent": "float", + "line_tax_amount": "float", + "amounttxcurrency": "float", + "amountcurrency": "float" + } + }, + "keywords": [ + "Alibaba.com Singapore E-Commerce Private Limited" + ], + "options": { + "currency": "EUR", + "languages": [ + "en" + ], + "decimal_separator": ".", + "replace": [ + [ + "\\s-\\s", + "1" + ], + [ + "/", + "_" + ], + [ + "\\n\\n", + "\\n" + ], + [ + "\\n\\s\\s\\s", + "" + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.accor.rhine.opco hotels.json b/src/invoice2data/extract/templates/nl/nl.accor.rhine.opco hotels.json new file mode 100644 index 00000000..1bcfd916 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.accor.rhine.opco hotels.json @@ -0,0 +1,179 @@ +{ + "issuer": "Rhine Opco Hotel", + "fields": { + "amount": { + "parser": "regex", + "regex": [ + "\\s{37}(?:\\s+\\w+){0,4}\\s\\d*[%]?\\s+\\d*[.,]?\\d+[,.]\\d+\\s{11,13}\\d*[.,]?\\d+[,.]\\d+?\\s{2,30}([.,]?\\d+[,.]\\d+)" + ], + "type": "float", + "group": "sum" + }, + "amount_tax": { + "parser": "regex", + "regex": [ + "\\s{37}BTW verrekenbaar(?:\\s+\\w+){0,4}\\s\\d*[%]?\\s+\\d*[.,]?\\d+[,.]\\d+\\s{11,13}(\\d*[.,]?\\d*[,.]?\\d*)\\s{2,30}[.,]?\\d+[,.]\\d+" + ], + "type": "float", + "group": "sum" + }, + "amount_untaxed": { + "parser": "regex", + "regex": [ + "\\s{37}(?:\\s+\\w+){0,4}\\s\\d*[%]?\\s+(\\d*[.,]?\\d+[,.]\\d+)\\s{11,13}\\d*[.,]?\\d+[,.]\\d+?\\s{2,30}[.,]?\\d+[,.]\\d+" + ], + "type": "float", + "group": "sum" + }, + "date": { + "parser": "regex", + "regex": ",\\s(\\d{1,2}[-]\\S{1,4}[-]\\d{2}\\s\\d{2}[:]\\d{2})\\n", + "type": "date", + "group": "first" + }, + "invoice_number": { + "parser": "regex", + "regex": "REKENING\\s+[:]\\s+(.*)" + }, + "iban": { + "parser": "regex", + "regex": "[A-Z]{2}\\d{2}?\\s?\\w{4}?\\d?\\s?\\d{3,4}\\s?\\d{4}?\\s\\d{0,2}" + }, + "bic": { + "parser": "regex", + "regex": "(?i)BIC[:]\\s+(\\w{8,11})", + "group": "first" + }, + "vat": { + "parser": "regex", + "regex": "\\s(NL\\d{4}[.]?\\d{2}[.]?\\d{3}[.]?B[.]?\\d{2})\\s" + }, + "partner_website": { + "parser": "static", + "value": "all.accor.com" + }, + "partner_name": { + "parser": "regex", + "regex": "(Rhine.+Opco\\sBV)\\s" + }, + "country_code": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 0, + "r": 100, + "W": 270, + "H": 240 + }, + "regex": "nl" + }, + "partner_zip": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 0, + "r": 100, + "W": 270, + "H": 240 + }, + "regex": "\\s(\\d{4}\\s?[A-Z]{2})\\s" + }, + "partner_city": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 0, + "r": 100, + "W": 270, + "H": 240 + }, + "regex": "\\d{4}\\s?[A-Z]{2}\\s(\\w+(?:\\s\\w+)*)\\s" + }, + "partner_street": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 0, + "r": 100, + "W": 270, + "H": 240 + }, + "regex": "^(\\w+.*\\s\\d+)\\n" + }, + "telephone": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 0, + "r": 100, + "W": 270, + "H": 240 + }, + "regex": "Tel[.][:]\\s+(.+)" + }, + "partner_email": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 0, + "r": 100, + "W": 270, + "H": 240 + }, + "regex": "\\s+(\\w+[@]\\w+[.]nl)\\s" + }, + "partner_coc": { + "parser": "regex", + "regex": "C[.]o[.]C[.]\\snr[.](\\d{8})" + }, + "lines": { + "parser": "lines", + "start": "Datum\\s+Omschrijving", + "end": "\\s+Totaal", + "line": "^\\s{37}(?P\\d{2}[-]\\d{2}[-]\\d{2})\\s{13}(?P((?:\\w+\\s){0,2}))\\s{80,94}(?P-?\\d*[.]?\\d+[.,]\\d{2})", + "types": { + "price_unit": "float", + "price_subtotal": "float" + } + }, + "payment_method": [ + "(?i)(American Express)", + "(VISA)", + "(MCARD)", + "(CONTANT)", + "(KAS):\\s.\\s\\d+\\.\\d+" + ], + "payment_reference": { + "parser": "regex", + "regex": "Transaction\\sID\\s+(\\d{9})" + } + }, + "keywords": [ + "(?i)Novotel", + "REKENING", + "Rhine" + ], + "options": { + "currency": "EUR", + "date_formats": [ + "%d/%m/%Y" + ], + "languages": [ + "nl" + ], + "priority": 4, + "decimal_separator": "." + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.agrisneltank.json b/src/invoice2data/extract/templates/nl/nl.agrisneltank.json new file mode 100644 index 00000000..6427e342 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.agrisneltank.json @@ -0,0 +1,115 @@ +{ + "issuer": "Agrisneltank B.V.", + "fields": { + "amount": "TOTAAL\\s+.?€\\s+(\\d{1,3}.\\d{2})", + "amount_untaxed": "Netto\\s+€\\s+(\\d{1,3}.\\d{2})", + "date": "Datum\\s+(\\d{1,2}-\\d{2}-\\d{4})", + "invoice_number": "ketnummer\\s+(\\d+)", + "static_vat": "NL862414489B01", + "partner_name": "(?i)(Agrisneltank B.V.)", + "country_code": { + "parser": "regex", + "regex": "(?i)nl", + "group": "first" + }, + "partner_website": { + "parser": "regex", + "regex": "agrisneltank.nl", + "group": "first" + }, + "payment_method": [ + "(MASTERCARD)", + "(VISA)", + "(MAESTRO)", + "(CONTANT)", + "(KAS):\\s.\\s\\d+\\.\\d+" + ] + }, + "lines": { + "start": "(?i)BON", + "end": "(Netto\\s|\\Z)", + "first_line": "(?i)(?PPOMP\\s+\\d+)\\s+(?P\\w+)", + "line": [ + "(?i)Volume.*\\s+(?P\\d+[,.]\\d+)\\s?(?P[l|ℓ|L])?", + "(?i)Prijs\\s+[E|€] (?P\\d.\\d{2,3})\\s*[/ ]?(?P[l|ℓ|L])?", + "(?i)B.W\\s+(?P\\d{2}[,.]\\d{2})\\s+[%]\\s+[E|€]?\\s+(?P\\d+[,.]\\d{2})?" + ], + "types": { + "qty": "float", + "price_unit": "float", + "line_tax_percent": "float", + "line_tax_amount": "float" + } + }, + "keywords": [ + "(?i)Agri", + "(?i)nl", + "€", + "NL862414489B01" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ",", + "replace": [ + [ + "é", + "€" + ], + [ + "L.p.9.", + "LPG" + ], + [ + "L.P.G.", + "LPG" + ], + [ + "L.P.9.", + "LPG" + ], + [ + "LPLG", + "LPG" + ], + [ + "Contant", + "CONTANT" + ], + [ + "\\s[l|&|@]", + " ℓ" + ], + [ + "([0-9]{2,3})[ /][2]", + "\\1 /ℓ" + ], + [ + "B.W", + "BTW" + ], + [ + " - ", + "-" + ], + [ + "agrisneltank\\s+[.]?nl", + "agrisneltank.nl" + ], + [ + "Kuwait\\s+Petroleum\\s+Ned", + "Kuwait Petroleum Nederland B.V." + ], + [ + "€ ([0-9]+) ([0-9]{2})(\\s)", + "€ \\1,\\2\\3" + ], + [ + "(\\s)([0-9]+)\\.([0-9]{2,3})", + "\\1\\2,\\3" + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.argos.json b/src/invoice2data/extract/templates/nl/nl.argos.json new file mode 100644 index 00000000..0259ddb8 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.argos.json @@ -0,0 +1,113 @@ +{ + "issuer": "Kuwait Petroleum Ned. B.V.", + "fields": { + "amount": "TOTAAL\\s+.?€\\s+(\\d{1,3}.\\d{2})", + "amount_untaxed": "Netto\\s+€\\s+(\\d{1,3}.\\d{2})", + "date": "Datum\\s+(\\d{1,2}-\\d{2}-\\d{4})", + "invoice_number": "Ticketnummer\\s+(\\d+)", + "static_vat": "NL006816721B01", + "partner_name": "(?i)(Kuwait Petroleum Nederland B.V.)", + "country_code": { + "parser": "regex", + "regex": "(?i)nl", + "group": "last" + }, + "partner_website": { + "parser": "regex", + "regex": "argos.nl" + }, + "payment_method": [ + "(AMERICAN EXPRESS)", + "(VISA)", + "(MAESTRO)", + "(CONTANT)", + "(KAS):\\s.\\s\\d+\\.\\d+" + ] + }, + "lines": { + "start": "(?i)BON", + "end": "(Netto\\s|\\Z)", + "first_line": "(?i)(?PPOMP\\s+\\d+)\\s+(?P\\w+)", + "line": [ + "(?i)Volume.*\\s+(?P\\d+[,.]\\d+)\\s?(?P[l|ℓ|L])?", + "(?i)Prijs\\s+[E|€] (?P\\d.\\d{2,3})\\s*[/ ]?(?P[l|ℓ|L])?", + "(?i)B.W\\s+(?P\\d{2}[,.]\\d{2})\\s+[%]\\s+[E|€]?\\s+(?P\\d+[,.]\\d{2})?" + ], + "types": { + "qty": "float", + "price_unit": "float", + "line_tax_percent": "float", + "line_tax_amount": "float" + } + }, + "keywords": [ + "(?i)Argos", + "(?i)nl", + "€" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ",", + "replace": [ + [ + "é", + "€" + ], + [ + "L.p.9.", + "LPG" + ], + [ + "L.P.G.", + "LPG" + ], + [ + "L.P.9.", + "LPG" + ], + [ + "LPLG", + "LPG" + ], + [ + "Contant", + "CONTANT" + ], + [ + "\\s[l|&|@]", + " ℓ" + ], + [ + "([0-9]{2,3})[ /][2]", + "\\1 /ℓ" + ], + [ + "B.W", + "BTW" + ], + [ + " - ", + "-" + ], + [ + "argos\\s[.]?nl", + "argos.nl" + ], + [ + "Kuwait\\s+Petroleum\\s+Ned", + "Kuwait Petroleum Nederland B.V." + ], + [ + "€ ([0-9]+) ([0-9]{2})(\\s)", + "€ \\1,\\2\\3" + ], + [ + "(\\s)([0-9]+)\\.([0-9]{2,3})", + "\\1\\2,\\3" + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.argos.yml b/src/invoice2data/extract/templates/nl/nl.argos.yml deleted file mode 100644 index f876033a..00000000 --- a/src/invoice2data/extract/templates/nl/nl.argos.yml +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: MIT -issuer: Varo Energy Retail B.V. -fields: - amount: TOTAAL\s+€\s+(\d{1,3}.\d{2}) - amount_untaxed: Netto\s+€\s+(\d{1,3}.\d{2}) - date: - parser: regex - regex: - - Datum\s+(\d{1,2}-\d{2}-\d{4}\s+\d{2}[:]\d{2}[:]\d{2}) - - Datum\s+(\d{1,2}-\d{2}-\d{4}) - type: date - # group: max - invoice_number: Ticketnummer\s+(\d+) - vat: - parser: regex - regex: NL\d{9}B\d{2} - partner_name: - parser: regex - regex: (?i)(Varo Energy Retail\s+\S+) - country_code: - parser: static - value: NL - payment_method: - - (?i)(American Express) - - (?i)(VISA) - - (?i)(MCARD) - - (?i)(CONTANT) - - (?i)(KAS) -lines: - start: Datum - end: code - line: - - Pomp\s+\S+\s+(?P(\S+(?:\s*\S+){2}))\s+\w+\s{14,16}(?P\d+[.,]\d{2})\s(?P\S*)\s+\w+\s+.*\s(?P\d.\d{3})[\/].*\w+\s+(?P\d{2}.\d{2})\s+.\s+\S*\s+(?P\d+.\d{2}) - types: - qty: float - price_unit: float - line_tax_percent: float - line_tax_amount: float -keywords: - - Argos - - VARO - - € -options: - currency: EUR - languages: - - nl - decimal_separator: ',' - replace: - - ['é' ,'€'] - - ['L.p.9.' ,'LPG'] - - ['L.P.G.' ,'LPG'] - - ['L.P.9.' ,'LPG'] - - ['LPLG' ,'LPG'] - - ['95 Ongelood' ,'E10 (Euro 95)'] - - ['98 Ongelood' ,'E5 (Euro 98)'] - - ['Contant' ,'CONTANT'] - - ['\s[l|&|@]' ,' ℓ'] - - ['B.W', 'BTW'] - - [' - ', '-'] - - ['\n', ' '] # replace line break - - ['(\d{2}-\d{2}-\d{4})\s+\S*\s+', '\1 '] # format date code - - ['BTW\s(\w{2})\s?.(\d{4}).(\d{2}).(\d{3}).(\w).(\d{2})\s+', 'BTW \1\2\3\4\5\6 '] # format vat - - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr inconsistancy on comma and dot detection - diff --git a/src/invoice2data/extract/templates/nl/nl.fedex.json b/src/invoice2data/extract/templates/nl/nl.fedex.json new file mode 100644 index 00000000..c2927bcc --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.fedex.json @@ -0,0 +1,58 @@ +{ + "issuer": "FedEx Express Netherlands BV", + "fields": { + "amount": { + "parser": "regex", + "regex": "(?i)Totaal\\s+EUR\\s+(\\d*[.,]?\\d+[,.]\\d+)", + "type": "float" + }, + "amount_tax": { + "parser": "regex", + "regex": "BTW\\s+(\\d*[.,]?\\d+[,.]\\d+)", + "group": "sum", + "type": "float" + }, + "date": "(?i)FactuurDatum[:]\\s+(\\d{1,2}[-/]\\d{2}[-/]\\d{4})", + "invoice_number": { + "parser": "regex", + "regex": "Factuurnummer[:]\\s+(\\w+)\\s+" + }, + "static_vat": "NL857768578B01", + "partner_name": "(?i)(FedEx Express Netherlands BV)", + "country_code": { + "parser": "regex", + "regex": "(?i)nl", + "group": "first" + }, + "partner_website": { + "parser": "regex", + "regex": "fedex.\\w+", + "group": "first" + }, + "partner_coc": { + "parser": "regex", + "regex": "K[.]v[.]K[.] Nr\\s(\\d{8})" + }, + "iban": { + "parser": "regex", + "regex": "[A-Z]{2}\\d{2}?\\w{4}?\\d{4}?\\d{4}?\\d{0,2}" + }, + "bic": { + "parser": "regex", + "regex": "BIC[:]\\s+(\\w{8,11})" + } + }, + "keywords": [ + "(?i)fedex", + "(?i)NL857768578B01", + "(?i)nl", + "(?i)factuur" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": "," + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.kuwait-q8.json b/src/invoice2data/extract/templates/nl/nl.kuwait-q8.json new file mode 100644 index 00000000..2c8cf1e0 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.kuwait-q8.json @@ -0,0 +1,84 @@ +{ + "issuer": "Kuwait Petroleum Ned. B.V.", + "fields": { + "amount": "TOTAAL FACTUUR\\s+[:]\\s+\\d{1,4}[.]\\d{2}\\s+\\d{1,4}[.]\\d{2}\\s+(\\d{1,4}[.]\\d{2})", + "amount_tax": "TOTAAL FACTUUR\\s+[:]\\s+\\d{1,4}[.]\\d{2}\\s+(\\d{1,4}[.]\\d{2})", + "amount_untaxed": "TOTAAL FACTUUR\\s+[:]\\s+(\\d{1,4}[.]\\d{2})", + "date": "\\SDATUM\\s+[:]\\s+(\\d{2}[\\/]\\d{2}[\\/]\\d{4})", + "date_due": "VERVALDATUM\\s+[:]\\s+(\\d{2}[\\/]\\d{2}[\\/]\\d{4})", + "invoice_number": "FACTUUR\\sNR\\s+[:]\\s+(\\w+)", + "static_vat": "NL001250590B01", + "bic": { + "parser": "regex", + "regex": "(?i)BIC.(\\w{8,11})" + }, + "iban": { + "parser": "regex", + "regex": "[A-Z]{2}\\d{2}?\\w{4}?\\d{4}?\\d{4}?\\d{0,2}" + }, + "telephone": { + "parser": "static", + "value": "+31703152650" + }, + "partner_website": { + "parser": "static", + "value": "q8libertyweb.com" + }, + "partner_name": { + "parser": "static", + "value": "KUWAIT PETROLEUM (NEDERLAND) B.V." + }, + "country_code": { + "parser": "static", + "value": "nl" + }, + "partner_zip": { + "parser": "regex", + "regex": "2595AR" + }, + "partner_city": { + "parser": "static", + "value": "Den Haag" + }, + "partner_street": { + "parser": "static", + "value": "Schenkkade 50" + }, + "partner_coc": { + "parser": "static", + "value": "24025263" + } + }, + "lines": { + "start": "KLANT", + "end": "TOTAAL KLANT", + "line": "\\s+(?P(\\w+(?:\\s\\S+){1,2}))\\s+(?P(\\d+[.]\\d{3,4}))\\s+(?P\\d+[.]\\d{2})\\s+(?P\\d[.]\\d{3,4})\\s+(?P\\d{2}[.]\\d{2})\\s+(?P\\d+[.]\\d{2})", + "types": { + "qty": "float", + "taxpercent": "float", + "price_unit": "float", + "base_price": "float", + "price_subtotal": "float" + } + }, + "keywords": [ + "(?i)Kuwait", + "FACTUUR NR", + "LIBERTY KAART", + "(NEDERLAND)", + "NL001250590B01" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ".", + "replace": [ + [ + "Euro 95", + "E10 (Euro 95)" + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.kuwait-q8.yml b/src/invoice2data/extract/templates/nl/nl.kuwait-q8.yml deleted file mode 100644 index 9a218234..00000000 --- a/src/invoice2data/extract/templates/nl/nl.kuwait-q8.yml +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: MIT -# Parse invoices for the Tango/Q8 Liberty card -# The card can be used with -# Texaco, Avia, Sakko, OK, Argos, ESSO, Tango, Q8, FIREZONE, Lukoil -issuer: Kuwait Petroleum Ned. B.V. -fields: - amount: TOTAAL FACTUUR\s+[:]\s+\d{1,4}[.]\d{2}\s+\d{1,4}[.]\d{2}\s+(\d{1,4}[.]\d{2}) - amount_tax: TOTAAL FACTUUR\s+[:]\s+\d{1,4}[.]\d{2}\s+(\d{1,4}[.]\d{2}) - amount_untaxed: TOTAAL FACTUUR\s+[:]\s+(\d{1,4}[.]\d{2}) - date: DATUM\s+[:]\s+(\d{2}[\/]\d{2}[\/]\d{4}) - date_due: VERVALDATUM\s+[:]\s+(\d{2}[\/]\d{2}[\/]\d{4}) - invoice_number: FACTUUR\sNR\s+[:]\s+(\w+) - vat: - parser: static - value: NL001250590B01 - bic: BIC.(\w{8,11}) - iban: '[A-Z]{2}\d{2}?\w{4}?\d{4}?\d{4}?\d{0,2}' - telephone: - parser: static - value: '+31703152650' - partner_website: - parser: static - value: q8libertyweb.com - partner_name: - parser: static - value: KUWAIT PETROLEUM (NEDERLAND) B.V. - country_code: - parser: static - value: nl - partner_zip: - parser: regex - regex: 2595AR - partner_city: - parser: static - value: Den Haag - partner_street: - parser: static - value: Schenkkade 50 - partner_coc: - parser: static - value: 24025263 -lines: - start: KLANT - end: TOTAAL KLANT - line: \s+(?P(\w+(?:\s\S+){1,2}))\s+(?P(\d+[.]\d{3,4}))\s+(?P\d+[.]\d{2})\s+(?P\d[.]\d{3,4})\s+(?P\d{2}[.]\d{2})\s+(?P\d+[.]\d{2}) - start: KLANT - end: TOTAAL KLANT - line: '(?P(TOTAAL KAART\s+[:]\s+\w+))\s+\d' - types: - qty: float - line_tax_percent: float - price_unit: float - base_price: float - price_subtotal: float -keywords: - - 'Kuwait' - - 'FACTUUR NR' - - 'LIBERTY KAART' - - '(NEDERLAND)' - - NL001250590B01 -options: - currency: EUR - languages: - - nl - decimal_separator: '.' - replace: - - ['Euro 95' ,'E10 (Euro 95)'] diff --git a/src/invoice2data/extract/templates/nl/nl.makro.json b/src/invoice2data/extract/templates/nl/nl.makro.json new file mode 100644 index 00000000..98b18eb2 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.makro.json @@ -0,0 +1,155 @@ +{ + "issuer": "Makro", + "fields": { + "amount": { + "parser": "regex", + "regex": [ + "Te betalen\\s+(\\d+.\\d{2})", + "Totaal[:]\\s+(\\d+.\\d{2})\\sEUR" + ], + "type": "float" + }, + "amount_untaxed": { + "parser": "regex", + "regex": [ + "Netto totaal[:]\\s+(\\d+[,]\\d{2})" + ], + "type": "float" + }, + "date": { + "parser": "regex", + "regex": [ + "Factuurdatum\\s.?\\s+(\\d{2}-\\d{2}-\\d{4}\\s+\\d{2}[:]\\d{2})" + ], + "type": "date" + }, + "invoice_number": { + "parser": "regex", + "regex": [ + "Factuurnummer[:]\\s+(\\S+)" + ] + }, + "vat": { + "parser": "regex", + "regex": [ + "OB\\s+nr[:]\\s+((?:BE|NL)\\w+)" + ] + }, + "partner_coc": { + "parser": "regex", + "regex": [ + "K[.]v[.]K[:]\\s+(\\d{8})" + ] + }, + "partner_website": { + "parser": "regex", + "regex": [ + "www[.](\\w+[.]\\w{2})" + ], + "group": "first" + }, + "telephone": { + "parser": "regex", + "regex": [ + "Telefoon[:]\\s+(\\d+[-]\\d{4,9})" + ], + "group": "first" + }, + "partner_name": { + "parser": "regex", + "regex": [ + "(Metro\\sCash\\s[&]\\sCarry\\sNederland\\sB[.]V[.])" + ] + }, + "partner_city": { + "parser": "regex", + "regex": "Wateringen" + }, + "partner_zip": { + "parser": "regex", + "regex": "2290 AD" + }, + "country_code": { + "parser": "regex", + "regex": "[.](nl|be)" + }, + "bic": { + "parser": "regex", + "regex": [ + "BIC\\s.\\s(\\w{8,11})" + ] + }, + "iban": { + "parser": "regex", + "regex": [ + "IBAN.\\s([A-Z]{2}\\d{2} ?\\w{4} ?\\d{4} ?\\d{4} ?\\d{0,2})" + ] + }, + "payment_method": [ + "(?i)(AMEX)", + "(?i)(American express)", + "(?i)(VISA)", + "(?i)(Vpay)", + "(?i)(Mastercard)", + "(?i)(CONTANT)", + "(?i)(KAS):\\s.\\s\\d+\\.\\d+" + ], + "lines": { + "parser": "lines", + "rules": [ + { + "start": "na korting", + "end": "-{134}\\n\\s{18}", + "line": [ + "(?P\\d{13})\\s+(?P(\\w+(?:\\s\\S+)*))\\s+(?P\\d+[,.]\\d+)\\s+(?P(\\d+([,.]\\d{3})?))\\s(?P\\w+)\\s+(?P\\d+[,.]\\d{2})\\s+(?P\\d)\\s+(?P\\d+[,.]\\d{2})\\s+(?P\\d{1,2})\\s+(?P\\d+)?\\s+(?P\\d+[,.]\\d{3})", + "---(?P(\\w+(?:\\s\\S+)*))---" + ] + }, + { + "start": ",\\d{3}\\n-{134}", + "end": "\\Z", + "line": [ + "(?P((?:\\S+\\s)?\\w+(?:\\s\\S+)*))\\s+(?P\\d+[,.]\\d{2}[-])\\s+(?P\\d)\\s+(?P(\\d+))?" + ] + } + ], + "types": { + "qty": "float", + "price_unit": "float", + "line_tax_percent": "float", + "price_subtotal": "float", + "collo": "float", + "stukspereenheid": "float", + "prijs_stuk_nakorting": "float" + } + } + }, + "keywords": [ + "Makro", + "NL001799435B01" + ], + "options": { + "date_formats": [ + "%d %m %Y" + ], + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ".", + "replace": [ + [ + ",", + "." + ], + [ + "\\s(?P(\\d+.\\d+))(?P(\\s+))(?P(\\d+(.\\d{3})?))\\s(?P\\w+)\\s+(?P\\d+.\\d{2})\\s+(?P\\d)\\s+(?P\\d+.\\d{2})\\s+(?P1)\\s", + " \\g\\g\\g \\g \\g \\g \\g 21 " + ], + [ + "\\s(?P(\\d+.\\d+))(?P(\\s+))(?P(\\d+(.\\d{3})?))\\s(?P\\w+)\\s+(?P\\d+.\\d{2})\\s+(?P\\d)\\s+(?P\\d+.\\d{2})\\s+(?P5)\\s", + " \\g\\g\\g \\g \\g \\g \\g 9 " + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.marktplaats.json b/src/invoice2data/extract/templates/nl/nl.marktplaats.json new file mode 100644 index 00000000..bfac1587 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.marktplaats.json @@ -0,0 +1,114 @@ +{ + "issuer": "Marktplaats bv", + "fields": { + "amount": { + "parser": "regex", + "regex": "Totaal [(]incl. BTW[)]\\s+€\\s-?(\\d*[.,]?\\d+[,.]\\d{2})", + "type": "float" + }, + "amount_tax": { + "parser": "regex", + "regex": "BTW\\s+\\d+[%]\\s+[$€]\\s+(\\d*[.,]?\\d+[,.]\\d{2})", + "group": "sum", + "type": "float" + }, + "amount_untaxed": { + "parser": "regex", + "regex": "Totaal excl. BTW\\s+[$€]\\s(\\d*[.,]?\\d+[,.]\\d{2})", + "type": "float" + }, + "invoice_number": { + "parser": "regex", + "regex": "Factuurnummer\\s+\\S\\s+(\\w+)" + }, + "partner_website": { + "parser": "regex", + "regex": "\\s+(\\w+[.]nl)" + }, + "partner_name": { + "parser": "regex", + "regex": "( Marktplaats B[.]V[.])" + }, + "partner_street": { + "parser": "regex", + "regex": "Wibautstraat 224" + }, + "partner_city": { + "parser": "regex", + "regex": "\\d{4}\\s[A-Z]{2}\\s(\\w+)" + }, + "state_code": { + "parser": "static", + "value": "NH" + }, + "country_code": { + "parser": "static", + "value": "NL" + }, + "partner_zip": { + "parser": "regex", + "regex": "(\\d{4}\\s[A-Z]{2})\\s\\w+", + "group": "last", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + } + }, + "partner_ref": { + "parser": "regex", + "regex": "Klantnummer\\s+[:]\\s+(\\S+)" + }, + "currency_symbol": { + "parser": "regex", + "regex": "[$€]", + "group": "first" + }, + "date": { + "parser": "regex", + "regex": [ + "datum\\s+\\S\\s+(\\d{2}-\\d{2}-\\d{4})" + ], + "group": "min", + "type": "date" + }, + "partner_coc": { + "parser": "regex", + "regex": "KvK-nummer\\S?\\s+(\\d+)" + }, + "date_start": { + "parser": "regex", + "regex": "\\s+(\\d{2}-\\d{2}-\\d{4})\\s-\\s\\d{2}-\\d{2}-\\d{4}", + "type": "date" + }, + "date_end": { + "parser": "regex", + "regex": "\\d{2}-\\d{2}-\\d{4}\\s-\\s(\\d{2}-\\d{2}-\\d{4})", + "type": "date" + }, + "vat": { + "parser": "regex", + "regex": "BTW-nummer\\S?\\s+(\\d{3}[.]\\d{3}[.]\\d{3}[.]B[.]\\d{2})" + }, + "company_vat": { + "parser": "regex", + "regex": "BTW-nummer\\s+\\S?\\s+(\\w{2}\\d{9}B\\d{2})" + } + }, + "keywords": [ + "Factuur", + "Marktplaats", + "28058712" + ], + "options": { + "languages": [ + "nl" + ], + "decimal_separator": ",", + "currency": "EUR" + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.megekko.json b/src/invoice2data/extract/templates/nl/nl.megekko.json new file mode 100644 index 00000000..afe29bad --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.megekko.json @@ -0,0 +1,153 @@ +{ + "issuer": "MEGEKKO B.V.", + "fields": { + "invoice_number": { + "parser": "regex", + "regex": "Factuurnummer[:]\\s+(\\w+)", + "group": "first" + }, + "purchase_order_id": { + "parser": "regex", + "regex": "\\s+(ME\\d{12})\\n", + "group": "first" + }, + "partner_website": { + "parser": "regex", + "regex": "\\swww[.](.+[.]nl)\\s[-]" + }, + "partner_email": { + "parser": "regex", + "regex": "\\S\\s(\\w+[@]\\w+[.]nl)\\s" + }, + "partner_name": { + "parser": "regex", + "regex": "(MEGEKKO \\S+)\\s" + }, + "partner_street": { + "parser": "regex", + "regex": "Spinveld 8" + }, + "partner_city": { + "parser": "regex", + "regex": "\\d{4}\\s[A-Z]{2}\\s(Breda)", + "group": "last" + }, + "state_code": { + "parser": "static", + "value": "NB" + }, + "country_code": { + "parser": "static", + "value": "NL" + }, + "partner_zip": { + "parser": "regex", + "regex": "\\s(\\d{4}\\s[A-Z]{2})\\s", + "group": "last" + }, + "partner_ref": { + "parser": "regex", + "regex": "Klantnummer\\s[:]\\s+(\\S+)" + }, + "iban": { + "parser": "regex", + "regex": "Rekeningnr[:]\\s+(NL.*)\\s[-]" + }, + "bic": { + "parser": "regex", + "regex": "BIC\\s+(\\w+)\\s" + }, + "date": { + "parser": "regex", + "regex": [ + "Datum[:]\\s+(\\d{2}[-]\\d{2}[-]\\d{4})" + ], + "group": "min", + "type": "date" + }, + "date_due": { + "parser": "regex", + "regex": [ + "Datum[:]\\s+(\\d{2}[-]\\d{2}[-]\\d{4})" + ], + "group": "max", + "type": "date" + }, + "partner_coc": { + "parser": "regex", + "regex": "KVK\\sBreda\\s+(\\d+)" + }, + "lines": { + "parser": "lines", + "start": "ARTIKEL", + "end": "FACTUURCODE", + "line": [ + "(?P\\d+)\\s{6,7}(?P.+)\\s{5,22}(?P\\d+\\s\\w+[.])?\\s{10,40}(?P\\d+)\\s*(?P\\d*[.,]?\\d+[,.]\\d{2})?\\s+(?P\\d*[.,]?\\d+[,.]\\d{2})?", + "(?P\\d+)\\s{6,7}(?P.+)\\s{5,22}(?P\\d+\\s\\w+[.])?\\s{10,40}(?P\\d+)$", + "(?P\\d*[.,]?\\d+[,.]\\d{2})\\s+(?P\\d+[.]\\d)\\s+(?P\\d*[.,]?\\d+[,.]\\d{2})\\s+\\d+", + "^\\s+(?P.{2} Webshoporder[:]\\s\\w+)" + ], + "types": { + "qty": "float", + "price_subtotal": "float", + "price_unit": "float" + } + }, + "tax_lines": { + "parser": "lines", + "area": { + "f": 1, + "l": 1, + "x": 276, + "y": 1020, + "r": 100, + "W": 355, + "H": 87 + }, + "start": "EXCL. BTW", + "end": "\\Z", + "line": [ + "(?P[\\d+.]+)\\s+(?P[\\d+.]+)\\s+(?P[\\d+.]+)" + ], + "types": { + "line_tax_percent": "float", + "price_subtotal": "float", + "line_tax_amount": "float" + } + }, + "vat": { + "parser": "regex", + "regex": "BTW\\s+(NL\\d{9}B\\d{2})" + } + }, + "tables": [ + { + "start": "BTW-BEDRAG\\s+TOTAAL", + "end": "MEGEKKO B[.]", + "body": "\\d+\\s+(?P\\d*[.,]?\\d+[,.]\\d{2})\\s+(?P\\d+[.]\\d)\\s+(?P\\d*[.,]?\\d+[,.]\\d{2})\\s+\\d+" + }, + { + "start": "BTW-BEDRAG\\s+TOTAAL", + "end": "MEGEKKO B[.]", + "body": "Euro\\s+(?P\\d*[.,]?\\d+[,.]\\d{2})", + "types": { + "amount": "float", + "amount_tax": "float", + "amount_untaxed": "float", + "tax_percent_not_used": "float" + } + } + ], + "keywords": [ + "MEGEKKO", + "NL808060016B01", + "FACTUUR" + ], + "options": { + "languages": [ + "nl" + ], + "decimal_separator": ".", + "currency": "EUR" + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.odido.json b/src/invoice2data/extract/templates/nl/nl.odido.json new file mode 100644 index 00000000..e3b63a8f --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.odido.json @@ -0,0 +1,125 @@ +{ + "issuer": "Odido Netherlands bv", + "fields": { + "amount": { + "parser": "regex", + "regex": "Totaal\\s+[$€]\\s-?(\\d*[.,]?\\d+[,.]\\d{2})", + "type": "float" + }, + "amount_tax": { + "parser": "regex", + "regex": "BTW \\d+. over [$€]\\s+\\S+\\s+[$€]\\s+(\\d*[.,]?\\d+[,.]\\d{2})", + "group": "sum", + "type": "float" + }, + "amount_untaxed": { + "parser": "regex", + "regex": "Subtotaal excl. BTW\\s+[$€]\\s(\\d*[.,]?\\d+[,.]\\d{2})", + "type": "float" + }, + "invoice_number": { + "parser": "regex", + "regex": "Factuurnummer\\S\\s+(\\d+)" + }, + "partner_website": { + "parser": "regex", + "regex": "\\s+\\s{20}(.+[.]nl)" + }, + "partner_name": { + "parser": "regex", + "regex": "(Odido Netherlands bv)\\s" + }, + "partner_street": { + "parser": "regex", + "regex": "Waldorpstraat 60" + }, + "partner_city": { + "parser": "regex", + "regex": "Den Haag" + }, + "state_code": { + "parser": "static", + "value": "ZH" + }, + "country_code": { + "parser": "static", + "value": "NL" + }, + "partner_zip": { + "parser": "regex", + "regex": "\\s(\\d{4}\\s[A-Z]{2})\\s", + "group": "last", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + } + }, + "partner_ref": { + "parser": "regex", + "regex": "Klantnummer[:]\\s+(\\S+)" + }, + "currency_symbol": { + "parser": "regex", + "regex": "[$€]", + "group": "first" + }, + "telephone": { + "parser": "regex", + "regex": "0800-7112" + }, + "iban": { + "parser": "regex", + "regex": "Bank\\S\\s+(NL\\d{2}.+)\\s.\\sB" + }, + "date": { + "parser": "regex", + "regex": [ + "\\sDatum\\S\\s+(\\d{2}\\s\\w+\\s\\d{4})" + ], + "group": "min", + "type": "date" + }, + "date_due": { + "parser": "regex", + "regex": [ + "\\somstreeks\\s(\\d{2}\\s\\w+\\s\\d{4})" + ], + "group": "max", + "type": "date" + }, + "partner_coc": { + "parser": "regex", + "regex": "KvK\\S?\\s+(\\d+)" + }, + "date_start": { + "parser": "regex", + "regex": "\\s+(\\d{2}[.]\\d{2}[.]\\d{2})\\st.m\\s\\d{2}[.]\\d{2}[.]\\d{2}", + "type": "date" + }, + "date_end": { + "parser": "regex", + "regex": "\\s+\\d{2}[.]\\d{2}[.]\\d{2}\\st.m\\s(\\d{2}[.]\\d{2}[.]\\d{2})", + "type": "date" + }, + "vat": { + "parser": "regex", + "regex": "BTW\\S?-id\\s+(NL\\d{9}B\\d{2})" + } + }, + "keywords": [ + "factuur", + "NL007053022B01" + ], + "options": { + "languages": [ + "nl" + ], + "decimal_separator": ",", + "currency": "EUR" + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.tango.json b/src/invoice2data/extract/templates/nl/nl.tango.json new file mode 100644 index 00000000..a041d069 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.tango.json @@ -0,0 +1,119 @@ +{ + "issuer": "Kuwait Petroleum Ned. B.V.", + "fields": { + "amount": "TOTAAL\\s+.?€\\s+(\\d{1,3}.\\d{2})", + "amount_untaxed": "Netto\\s+€\\s+(\\d{1,3}.\\d{2})", + "date": "Datum\\s+(\\d{1,2}-\\d{2}-\\d{4})", + "invoice_number": "Ticketnummer\\s+(\\d+)", + "static_vat": "NL001250590B01", + "partner_name": "(?i)(Kuwait Petroleum Nederland B.V.)", + "country_code": { + "parser": "regex", + "regex": "(?i)nl", + "group": "first" + }, + "partner_website": { + "parser": "regex", + "regex": "tango.nl", + "group": "first" + }, + "payment_method": [ + "(AMERICAN EXPRESS)", + "(VISA)", + "(MAESTRO)", + "(CONTANT)", + "(KAS):\\s.\\s\\d+\\.\\d+" + ] + }, + "lines": { + "start": "(?i)BON", + "end": "(Netto\\s|\\Z)", + "first_line": "(?i)(?PPOMP\\s+\\d+)\\s+(?P\\w+)", + "line": [ + "(?i)Volume.*\\s+(?P\\d+[,.]\\d+)\\s?(?P[l|ℓ|L])?", + "(?i)Prijs\\s+[E|€] (?P\\d.\\d{2,3})\\s*[/ ]?(?P[l|ℓ|L])?", + "(?i)B.W\\s+(?P\\d{2}[,.]\\d{2})\\s+[%]\\s+[E|€]?\\s+(?P\\d+[,.]\\d{2})?" + ], + "types": { + "qty": "float", + "price_unit": "float", + "line_tax_percent": "float", + "line_tax_amount": "float" + } + }, + "keywords": [ + "(?i)Kuwait", + "(?i)tango", + "(?i)nl", + "€" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ",", + "replace": [ + [ + "é", + "€" + ], + [ + "L.p.9.", + "LPG" + ], + [ + "L.P.G.", + "LPG" + ], + [ + "L.P.9.", + "LPG" + ], + [ + "LPLG", + "LPG" + ], + [ + "Contant", + "CONTANT" + ], + [ + "\\s[l|&|@]", + " ℓ" + ], + [ + "([0-9]{2,3})[ /][2]", + "\\1 /ℓ" + ], + [ + "B.W", + "BTW" + ], + [ + " - ", + "-" + ], + [ + "tango\\s+[.]?nl", + "tango.nl" + ], + [ + "Tango\\sapp", + "tango.nl" + ], + [ + "Kuwait\\s+Petroleum\\s+Ned", + "Kuwait Petroleum Nederland B.V." + ], + [ + "€ ([0-9]+) ([0-9]{2})(\\s)", + "€ \\1,\\2\\3" + ], + [ + "(\\s)([0-9]+)\\.([0-9]{2,3})", + "\\1\\2,\\3" + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.tango.yml b/src/invoice2data/extract/templates/nl/nl.tango.yml deleted file mode 100644 index bc8fa71e..00000000 --- a/src/invoice2data/extract/templates/nl/nl.tango.yml +++ /dev/null @@ -1,65 +0,0 @@ -# SPDX-License-Identifier: MIT -issuer: Kuwait Petroleum Ned. B.V. -tables: - - start: Tango - end: \s+KLANTENBON - body: '(?P\d{3}.\d{7})' -fields: - amount: TOTAAL\s+€\s+(\d{1,3}.\d{2}) - amount_untaxed: Netto\s+€\s+(\d{1,3}.\d{2}) - date: - parser: regex - regex: - - Datum\s+(\d{1,2}-\d{2}-\d{4}\s+\d{2}[:]\d{2}[:]\d{2}) - - Datum\s+(\d{1,2}-\d{2}-\d{4}) - type: date - # group: max - invoice_number: Ticketnummer\s+(\d+) - vat: - parser: regex - regex: (NL\d{9}B\d{2})\s - partner_website: - parser: static - value: tango.nl - country_code: - parser: static - value: nl -lines: - start: Datum - end: code - line: - - Pomp\s+\S+\s+(?P(\S+(?:\s*\S+){2}))\s+\w+\s{14,30}(?P\d+[.,]\d{2})\s\S+\s+\w+\s+.*\s(?P\d.\d{3})[\/](?P\w)\s+\w+\s+.\s+(?P\d{2}.\d{2})\s+.\s+\S*\s+(?P\d+.\d{2})\s+.\s+\S\s+(?P\d{1,2}.\d{2}) - types: - qty: float - price_unit: float - line_tax_percent: float - line_tax_amount: float -keywords: - - Kuwait - - tango - - € -exclude_keywords: - - (?i)Liberty.card -options: - currency: EUR - languages: - - nl - decimal_separator: ',' - replace: - - ['é' ,'€'] - - ['L.p.9.' ,'LPG'] - - ['L.P.G.' ,'LPG'] - - ['L.P.9.' ,'LPG'] - - ['LPLG' ,'LPG'] - - ['Contant' ,'CONTANT'] - - ['\s[l|&|@]' ,' ℓ'] - - ['B.W', 'BTW'] - - [' - ', '-'] - - ['\n', ' '] # replace line break - - ['\s+DIESEL\s+', ' B7 (Diesel) '] - - ['NL\s+(\d{9})\s', ' NL\1B01 '] # Fixup vat - - ['[,.](?P\d{3})[\/]\w', ',\g/L'] # Fixup UOM - - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr inconsistancy on comma and dot detection -# todo reformat the Euro 95 and 98 products -# need more receipts for that -# - ['Euro 95 E10' ,'E10 (Euro 95)'] diff --git a/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.json b/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.json new file mode 100644 index 00000000..6e14f918 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.json @@ -0,0 +1,132 @@ +{ + "issuer": "Servauto Ned. B.V.", + "fields": { + "amount": { + "parser": "regex", + "regex": [ + "TOTAAL.?\\s+.\\s+(\\d{1,3}[,]\\d{2})", + "Bedrag\\s+(\\d{1,3}.\\d{2}) EUR", + "(?i)Amex\\s+€.?\\s+(\\d{1,3}.\\d{2})" + ], + "type": "float" + }, + "amount_untaxed": { + "parser": "regex", + "regex": [ + "21.?00\\s+.\\s+(\\d{1,3}.\\d{1,2})\\s+.+\\s+\\d{1,3}.\\d{1,2}", + "9.?00\\s+.\\s+(\\d{1,3}.\\d{1,2})\\s+.+\\s+\\d{1,3}.\\d{1,2}" + ], + "type": "float", + "group": "sum" + }, + "amount_tax": { + "parser": "regex", + "regex": [ + "21.?00\\s+.\\s+\\d{1,3}.\\d{1,2}\\s+.\\s+(\\d{1,3}.\\d{1,2})", + "9.?00\\s+.\\s+\\d{1,3}.\\d{1,2}\\s+.\\s+(\\d{1,3}.\\d{1,2})" + ], + "type": "float", + "group": "sum" + }, + "date": { + "parser": "regex", + "regex": [ + "(\\d{1,2}[.|-|\\/]\\d{2}[.|-|\\/]\\d{4})\\s", + "(\\d{1,2}-\\d{2}-\\d{4})\\s+\\d{1,2}.\\d{2}.\\d{2}" + ], + "type": "date" + }, + "invoice_number": "(\\d{1,2}[.|-]\\d{2}[.|-]\\d{4})\\s", + "static_vat": "NL003588567B01", + "payment_method": [ + "(AMEX)", + "(VISA)", + "(MCARD)", + "(CONTANT)", + "(KAS):\\s.\\s\\d+\\.\\d+" + ] + }, + "lines": { + "start": "BEDRAG", + "end": "TOTAAL\\s", + "first_line": ".(?P\\w+).?\\s+\\d\\s[E|€]\\s*\\d+.\\d{0,2}", + "line": [ + "(?PPOMP.*) (?P\\d+.\\d{2})\\s.+\\s.\\s(?P\\d.\\d{2,3})", + "(?i)Prijs\\s+[E|€] (?P\\d.\\d{2,3})\\s*[/]?(?P[l|ℓ|L])?" + ], + "types": { + "qty": "float", + "unitprice": "float" + } + }, + "keywords": [ + "(?i)TOTAL", + "Servau", + "€" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ",", + "replace": [ + [ + "é", + "€" + ], + [ + "L.p.9.", + "LPG" + ], + [ + "L.P.G.", + "LPG" + ], + [ + "L.P.9.", + "LPG" + ], + [ + "L.P.\\s", + "LPG" + ], + [ + "LPLG", + "LPG" + ], + [ + "Contant", + "CONTANT" + ], + [ + "\\s[l|&|@]", + " ℓ" + ], + [ + "B.W", + "BTW" + ], + [ + ", ", + "," + ], + [ + "°", + " " + ], + [ + "©", + " " + ], + [ + "€ ([0-9]+) ([0-9]{2})(\\s)", + "€ \\1,\\2\\3" + ], + [ + "(\\s)([0-9]+)\\.([0-9]{2})(\\s)", + "\\1\\2,\\3\\4" + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.yml b/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.yml deleted file mode 100644 index 2c5efc7e..00000000 --- a/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.yml +++ /dev/null @@ -1,83 +0,0 @@ -# SPDX-License-Identifier: MIT -issuer: Servauto Ned. B.V. -fields: - amount: - parser: regex - regex: - - TOTAAL.?\s+.\s+(\d{1,3}[,]\d{2}) - - Bedrag\s+(\d{1,3}.\d{2}) EUR - - (?i)Amex\s+€.?\s+(\d{1,3}.\d{2}) - type: float - amount_untaxed: - parser: regex - regex: - - 21.?00\s+.\s+(\d{1,3}.\d{1,2})\s+.+\s+\d{1,3}.\d{1,2} - - 9.?00\s+.\s+(\d{1,3}.\d{1,2})\s+.+\s+\d{1,3}.\d{1,2} - type: float - group: sum - amount_tax: - parser: regex - regex: - - 21.?00\s+.\s+\d{1,3}.\d{1,2}\s+.\s+(\d{1,3}.\d{1,2}) - - 9.?00\s+.\s+\d{1,3}.\d{1,2}\s+.\s+(\d{1,3}.\d{1,2}) - type: float - group: sum - date: - parser: regex - regex: - - (\d{1,2}[.|-|\/]\d{2}[.|-|\/]\d{4})\s - - (\d{1,2}-\d{2}-\d{4})\s+\d{1,2}.\d{2}.\d{2} - type: date - invoice_number: - parser: regex - regex: (\d{1,2}[.|-]\d{2}[.|-]\d{4})\s - vat: - parser: static - value: NL003588567B01 - country_code: - parser: static - value: NL - payment_method: - - (AMEX) - - (VISA) - - (MCARD) - - (CONTANT) - - (KAS):\s.\s\d+\.\d+ -lines: -# code below works, but without product - start: BEDRAG - end: TOTAAL\s - first_line: '.(?P\w+).?\s+(?P\d+[,.]?\d*)\s[E|€]\s*\d+.\d{0,2}' - line: (?PPOMP.*) (?P\d+.\d{2})\s.+\s.\s(?P\d.\d{2,3}) - types: - qty: float - price_unit: float - line_tax_percent: float -keywords: - - (?i)TOTAL - - Servau - - € -options: - currency: EUR - languages: - - nl - decimal_separator: ',' - replace: - - ['é' ,'€'] - - ['L.p.9.' ,'LPG'] - - ['L.P.G.' ,'LPG'] - - ['L.P.9.' ,'LPG'] - - ['L.P.\s' ,'LPG'] - - ['LPLG' ,'LPG'] - - ['Contant' ,'CONTANT'] - - ['\s[l|&|@]' ,' ℓ'] - - ['B.W', 'BTW'] - - [', ', ','] - - ['°', ' '] - - ['©', ' '] - # rewrite the tax percentage on the line - - ['(\s+)1(\s+)[E|€]\s(\d{1,3})', '\1 9.00 \2€ \3'] - - ['(\s+)2(\s+)[E|€]\s(\d{1,3})', '\1 21.00 \2€ \3'] - - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr inconsistancy on comma and dot detection - - ['Euro\s+95\s+E10' ,'E10 (Euro 95)'] - - ['Diesel\s+', 'B7 (Diesel) '] diff --git a/src/invoice2data/extract/templates/nl/nl.valk.exclusief.hotel.json b/src/invoice2data/extract/templates/nl/nl.valk.exclusief.hotel.json new file mode 100644 index 00000000..385d233d --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.valk.exclusief.hotel.json @@ -0,0 +1,207 @@ +{ + "issuer": "Valk Exclusief", + "fields": { + "amount": { + "parser": "regex", + "regex": [ + "Totaal\\sfactuur\\s+(\\d?.?\\d{1,4}[.,]\\d{2})" + ], + "type": "float", + "group": "sum" + }, + "amount_tax": { + "parser": "regex", + "regex": [ + "BTW\\s{7,9}[1-9,]+\\d{2}\\%\\s\\w+(?:\\s\\S+){0,4}\\s+\\d?.?\\d{1,4}[.,]\\d{2}\\s+(\\d?.?\\d{1,4}[.,]\\d{2})\\s+\\d?.?\\d{1,4}[.,]\\d{2}" + ], + "type": "float", + "group": "sum" + }, + "amount_untaxed": { + "parser": "regex", + "regex": [ + "BTW\\s{7,9}[0-9,]+\\d{2}\\%\\s\\w+(?:\\s\\S+){0,4}\\s+\\d?.?\\d{1,4}[.,]\\d{2}\\s+\\d?.?\\d{1,4}[.,]\\d{2}\\s+(\\d?.?\\d{1,4}[.,]\\d{2})" + ], + "type": "float", + "group": "sum" + }, + "date": { + "parser": "regex", + "regex": "datum\\s+(\\d{1,2}[-]\\d{1,4}[-]\\d{4}\\s?\\d{0,2}.?\\d{0,2})", + "type": "date", + "group": "first" + }, + "invoice_number": { + "parser": "regex", + "regex": "nummer\\s+(\\d*)" + }, + "customer_order_free_ref": { + "parser": "regex", + "regex": "Referentie\\s+(\\S+)" + }, + "customer_order_number": { + "parser": "regex", + "regex": "Folio\\s+(\\S+(?:\\s\\S+){0,2})" + }, + "iban": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "(?:[A-Z]{2}[\\-]?[0-9]{2})(?:[\\-]?[A-Z0-9]{3,5}){2,7}" + }, + "bic": { + "parser": "regex", + "regex": "(?i)BIC[:]\\s+(\\w{8,11})", + "group": "first" + }, + "vat": { + "parser": "regex", + "regex": "\\s(NL\\d{4}[.]?\\d{2}[.]?\\d{3}[.]?B[.]?\\d{2})\\s" + }, + "partner_website": { + "parser": "regex", + "regex": "\\swww[.](.+[.]nl)\\s" + }, + "partner_name": { + "parser": "regex", + "regex": "(Van der Valk Hotel \\w+)\\s\\W\\s\\w+" + }, + "country_code": { + "parser": "regex", + "regex": "\\.(nl)" + }, + "partner_zip": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "\\s(\\d{4}\\s?[A-Z]{2})\\s" + }, + "partner_city": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "\\d{4}\\s?[A-Z]{2}\\s(\\w+(?:\\s\\w+)*)" + }, + "partner_street": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "\\s([A-Z][a-z]+\\s?\\d{1,4})\\s\\W" + }, + "telephone": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "t[:]\\s+(.?\\d*[(]?[0-9]?[)]?[0-9-]{7,10})" + }, + "partner_email": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "(\\w+[@]\\w+[.]\\w+[.]nl)" + }, + "partner_coc": { + "parser": "regex", + "regex": "Handelsregister[:]\\s(\\d{8})" + }, + "lines": { + "parser": "lines", + "start": "Vertrek", + "end": "Totaal factuur", + "first_line": "^\\s{11}(?P([a-zA-Z]+(?:\\s\\D+){0,8}))", + "line": "(?P\\d{2}[-]\\d{1,2}[-]\\d{4}\\s\\d{2}.\\d{2}.\\d{2})\\s+(?P\\d+)\\s+(?P-?\\d*[.]?\\d+[.,]\\d{2})\\s+(?P-?\\d*[.]?\\d+[.,]\\d{2})", + "types": { + "price_unit": "float", + "price_subtotal": "float", + "qty": "int" + } + }, + "tax_lines": { + "parser": "lines", + "start": "Incl. BTW", + "end": "\\Z", + "line": [ + "(?P[\\d+,]+)[%]\\s[\\S ]+\\s+(?P[\\d+,.]+)\\s+(?P[\\d+,.]+)" + ], + "types": { + "line_tax_percent": "float", + "price_subtotal": "float", + "line_tax_amount": "float" + } + }, + "payment_method": [ + "(?i)(American Express)", + "(?i)(Amex)", + "(VISA)", + "(MCARD)", + "(CONTANT)", + "(KAS)" + ], + "payment_reference": { + "parser": "regex", + "regex": "CRS\\s.\\s\\d+" + } + }, + "keywords": [ + "(?i)van\\sder\\svalk\\shotel\\s", + "Factuur", + "Logies" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl", + "en" + ], + "priority": 4, + "decimal_separator": ",", + "replace": [ + [ + "(\\S+\\s+[0-9]+)[.]([0-9]{2}\\s+[0-9]+)[.]([0-9]{2}\\s+[0-9]+)[.]", + "\\1,\\2,\\3," + ] + ] + } +} \ No newline at end of file diff --git a/src/invoice2data/extract/templates/nl/nl.valk.exclusief.restaurant.json b/src/invoice2data/extract/templates/nl/nl.valk.exclusief.restaurant.json new file mode 100644 index 00000000..199f9cf2 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.valk.exclusief.restaurant.json @@ -0,0 +1,177 @@ +{ + "issuer": "Valk Exclusief", + "fields": { + "amount": { + "parser": "regex", + "regex": [ + "Total\\s+.(\\d?[].]?\\d{1,4}[.,]\\d{2})" + ], + "type": "float", + "group": "sum" + }, + "amount_tax": { + "parser": "regex", + "regex": [ + "\\d{1,4}.?\\d{2}[%]\\s{29,30}[€](\\d?[].]?\\d{1,4}[.,]\\d{2})" + ], + "type": "float", + "group": "sum" + }, + "date": { + "parser": "regex", + "regex": "\\s+(\\d{1,2}\\s\\w+\\s\\d{4})", + "type": "date", + "group": "first" + }, + "invoice_number": { + "parser": "regex", + "regex": "Receipt\\snumber[:]\\s+(\\d*)" + }, + "partner_website": { + "parser": "regex", + "regex": "\\swww[.](.+[.]nl)\\s" + }, + "partner_name": { + "parser": "regex", + "regex": [ + "(?i)(Valk Exclusief)", + "(Hotel \\w+)\\s+" + ], + "group": "first" + }, + "narration": { + "parser": "regex", + "regex": "(Hotel \\w+)\\s+" + }, + "country_code": { + "parser": "regex", + "regex": "\\.(nl)" + }, + "partner_zip": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "\\s(\\d{4}\\s?[A-Z]{2})\\s" + }, + "partner_city": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "\\d{4}\\s?[A-Z]{2}\\s(\\w+(?:\\s\\w+)*)" + }, + "partner_street": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "[|]\\s([A-Z][a-z]+\\s?\\d{1,4}.?\\d*)\\s+[|]\\s\\d{4}\\w{2}" + }, + "telephone": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "T[:]\\s+(.?\\d*[(]?[0-9]?[)]?[0-9-]{7,10})" + }, + "partner_email": { + "parser": "regex", + "area": { + "f": 1, + "l": 1, + "x": 0, + "y": 930, + "r": 100, + "W": 827, + "H": 320 + }, + "regex": "(\\w+[@]\\w+[.]nl)" + }, + "vat": { + "parser": "static", + "value": "NL822428015B01" + }, + "partner_coc": { + "parser": "static", + "value": "27380247" + }, + "lines": { + "parser": "lines", + "start": "Product\\s+Price\\s+Amount\\s+Subtotal", + "end": "Total\\s+.\\d?[].]?\\d{1,4}[.,]\\d{2}", + "line": [ + "^(?P(\\w+(?:\\s\\w+){0,8}))\\s+[€](?P-?\\d*[.]?\\d+[.,]\\d{2})\\s+(?P\\d+)\\s+[€](?P-?\\d*[.]?\\d+[.,]\\d{2})", + "^(?P(\\w+(?:\\s\\w+){0,8}))\\s+[-][€](?P-?\\d*[.]?\\d+[.,]\\d{2})", + "^(?PSubtotal\\s+[€]-?\\d*[.]?\\d+[.,]\\d{2})" + ], + "types": { + "price_unit": "float", + "price_subtotal": "float", + "qty": "int" + } + }, + "tax_lines": { + "parser": "lines", + "start": "VAT\\s+Payment", + "end": "\\Z", + "line": [ + "(?P[\\d+,.]+)[%]\\s[\\S ]+\\s+€(?P[\\d+,.]+)" + ], + "types": { + "line_tax_percent": "float", + "price_subtotal": "float", + "line_tax_amount": "float" + } + }, + "payment_method": [ + "(?i)(American Express)", + "(?i)(Amex)", + "(VISA)", + "(MCARD)", + "(CONTANT)", + "(KAS)" + ], + "payment_reference": { + "parser": "regex", + "regex": "Payment\\sreference[:]\\s+(\\S+)" + } + }, + "keywords": [ + "Receipt", + "valkexclusief.nl" + ], + "options": { + "currency": "EUR", + "languages": [ + "nl", + "en" + ], + "priority": 4, + "decimal_separator": "." + } +} \ No newline at end of file