From 925953f98ea0996c8b1624d3abd60921bbee8080 Mon Sep 17 00:00:00 2001 From: bosd Date: Sat, 3 Feb 2024 19:18:16 +0100 Subject: [PATCH] add templates --- .../extract/templates/com/com.cloudflare.yml | 80 ++++++++++++++ .../extract/templates/com/com.hetzner.yml | 91 +++++++++++++++ .../extract/templates/com/com.runbox.yml | 66 +++++++++++ .../extract/templates/com/com.vultr.yml | 82 ++++++++++++++ .../extract/templates/nl/nl.buijtendijk.yml | 104 ++++++++++++++++++ .../extract/templates/nl/nl.fletcher.yml | 104 ++++++++++++++++++ 6 files changed, 527 insertions(+) create mode 100755 src/invoice2data/extract/templates/com/com.cloudflare.yml create mode 100644 src/invoice2data/extract/templates/com/com.hetzner.yml create mode 100644 src/invoice2data/extract/templates/com/com.runbox.yml create mode 100644 src/invoice2data/extract/templates/com/com.vultr.yml create mode 100644 src/invoice2data/extract/templates/nl/nl.buijtendijk.yml create mode 100644 src/invoice2data/extract/templates/nl/nl.fletcher.yml diff --git a/src/invoice2data/extract/templates/com/com.cloudflare.yml b/src/invoice2data/extract/templates/com/com.cloudflare.yml new file mode 100755 index 00000000..dd56612a --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.cloudflare.yml @@ -0,0 +1,80 @@ +issuer: Cloudflare, Inc +fields: + amount: + parser: regex + regex: + - Total\s+[$€](\d+.\d{2})\s + type: float + amount_untaxed: + parser: regex + regex: + - Subtotal [(]USD[)]\s+[$€](\d+.\d{2})\s + type: float + amount_tax: + parser: regex + regex: + - Tax Amount\s+[$€](\d+.\d{2})\s + type: float + date: + parser: regex + regex: + - Date[:]\s+(\d{2}.\d{2}.\d{4})\s+ + type: date + invoice_number: + parser: regex + regex: + - INVOICE.\s+(\w+) + partner_website: + parser: regex + regex: + - (cloudflare[.]com) + group: first + partner_name: + parser: regex + regex: + - '(Cloudflare, Inc)' + partner_email: + parser: static + value: billing@cloudflare.com + partner_city: + parser: regex + regex: 'San Francisco' + group: first + partner_zip: + parser: regex + regex: 'CA 94107' + country_code: + parser: static + value: US + payment_method: + - (?i)(AMEX) + - (?i)(American express) + - (?i)(VISA) + - (?i)(Vpay) + - (?i)(Mastercard) + - (?i)(CONTANT) + - (?i)(KAS):\s.\s\d+\.\d+ + lines: + parser: lines + rules: + - start: 'Summary of Current Charges' + end: '\s+Total\s+[$€](\d+.\d{2})\s' + line: + - '(?P(\w+(?:\s\S+)*))\s+(?P\d{2}[\/]\d{1,2}[\/]\d{4})\s-\s(?P\d{2}[\/]\d{1,2}[\/]\d{4})\s+(?P\d)\s+[$€](?P\d+.\d{2})\s+[$€](?P\d+.\d{2})' + types: + qty: float + price_unit: float + line_tax_amount: float + price_subtotal: float + date_end: date + date_start: date +keywords: + - 'Cloudflare, Inc' + - INVOICE +options: + date_formats: + - '%d %m %Y' + currency: USD + languages: + - en + decimal_separator: '.' diff --git a/src/invoice2data/extract/templates/com/com.hetzner.yml b/src/invoice2data/extract/templates/com/com.hetzner.yml new file mode 100644 index 00000000..18386873 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.hetzner.yml @@ -0,0 +1,91 @@ +issuer: Hetzner Online GmbH +fields: + amount: + parser: regex + regex: \s+Amount due[:]\s+[€]\s(\d+[.]\d+) + type: float + amount_tax: + parser: regex + regex: Total\s+[€]\s\d+[.]\d+\s+[€]\s(\d+[.]\d+) + type: float + amount_untaxed: + parser: regex + regex: \s+Subtotal [(]excl. VAT[)]\s+[$€]\s(d*[.,]?\d+[,.]\d+) + type: float + invoice_number: + parser: regex + regex: Invoice no\.[:] (\w+) + partner_website: + parser: static + value: hetzner.com + partner_email: + parser: static + value: info@hetzner.com + partner_name: + parser: regex + regex: Hetzner Online GmbH + partner_city: + parser: regex + regex: Gunzenhausen + country_code: + parser: static + value: DE + partner_zip: + parser: regex + regex: '(\d{5}) Gunzenhausen' + partner_street: + parser: regex + regex: 'Industriestr. \d+' + currency_symbol: + parser: regex + regex: '[$€]' + date: + parser: regex + regex: Invoice date[:] (\d+.\d{2}.\d{4}) + type: date + vat: + parser: regex + regex: 'VAT Reg\. No\.[:] (DE8\w+)' + bic: + parser: regex + regex: 'BIC.\s+(\w{8,11})' + iban: + parser: regex + regex: '[A-Z]{2}\d{2}?\s?\w{4}?\s?\d{4}?\s?\d{4}?\s?\d{4}?\s?\d{0,2}' # mod version do not copy + lines: + parser: lines + start: 'Pos\s+' + end: 'Subtotal' + first_line: + - '(?P\w+(?:\s\S+)+\s+[(]\d{2}.\d{2}.\d{4}\s-\s\d{2}.\d{2}.\d{4}[)])\s[*]' + - '\d+\s+(?P\w+(?:\s\w+)+)\s+(?P\d+)\s+[$€]\s(?P\d+.\d+)\s+[$€]\s(?P\d+.\d+)' + line: + - (?P\d{2}.\d{2}.\d{4})\s-\s(?P\d{2}.\d{2}.\d{4}) + - Quantity type[:]\s(?P\w+) + types: + qty: float + unit_price: float + price_subtotal: float + date_start: date + date_end: date + tax_lines: + parser: lines + start: 'Tax code' + end: '(?i)Amount due' + line: + - '(?P\d+)\s+(?P\d+[,.]?\d*)[%]\s+[$€]\s(?P(\d*[.,]?\d+[,.]\d+))\s+[$€]\s(?P(\d*[.,]?\d+[,.]\d+))' + types: + line_tax_percent: float + price_subtotal: float + line_tax_amount: float +keywords: + - 'Hetzner Online' + - 'DE812871812' + - 'Invoice' +required_fields: + - lines +options: + languages: + - en + currency: EUR + diff --git a/src/invoice2data/extract/templates/com/com.runbox.yml b/src/invoice2data/extract/templates/com/com.runbox.yml new file mode 100644 index 00000000..7d832cb1 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.runbox.yml @@ -0,0 +1,66 @@ +issuer: Runbox Solutions AS +fields: + amount: + parser: regex + regex: \s+Total\s+EUR\s(d*[.,]?\d+[,.]\d+) + type: float + amount_untaxed: + parser: regex + regex: \s+Total\s+EUR\s(d*[.,]?\d+[,.]\d+) + type: float + invoice_number: + parser: regex + regex: Invoice no[.:]+\s+(\d+) + partner_website: + parser: static + value: runbox.com + partner_name: + parser: regex + regex: Runbox Solutions AS + partner_city: + parser: regex + regex: Oslo + country_code: + parser: static + value: 'NO' + partner_zip: + parser: regex + regex: '([,]\s\d{4})\s\w+' + partner_email: + parser: regex + regex: '\w+[@]\w+[.]com' + date: + parser: regex + regex: Invoice date[:]\s+(\d+-\d{2}-\d{2}) + type: date + date_due: + parser: regex + regex: 'Due date[:]\s+(\d{4}[-]\d{2}[-]\d{2})' + type: date + iban: + parser: regex + regex: (?:[A-Z]{2}[ \-]?[0-9]{2})(?:[ \-]?[A-Z0-9]{3,5}){2,7} + bic: + parser: regex + regex: SWIFT code[:]\s+(\w{8,11}) + lines: + parser: lines + start: 'Description' + end: 'Total' + line: + - '(?P[\S ]+)\s+(?P\d+)\s+(?P\d+[,.]\d{2})\s+(?P\d+[,.]\d{2})\s+(?P\d+[,.]\d{2})' + types: + qty: float + price_subtotal: float + line_amount_tax: float + unit_price: float +keywords: + - 'Runbox' + - 'Invoice' +required_fields: + - lines +options: + languages: + - en + currency: EUR + diff --git a/src/invoice2data/extract/templates/com/com.vultr.yml b/src/invoice2data/extract/templates/com/com.vultr.yml new file mode 100644 index 00000000..04b0dbd4 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.vultr.yml @@ -0,0 +1,82 @@ +issuer: Vultr The Constant Company, LLC. +fields: + amount: + parser: regex # done + regex: \s+Total\s[(]\w+ \w+[)][:]\s+.(d*[.,]?\d+[,.]\d+) + type: float + amount_tax: # done + parser: regex + regex: \d+[,.]\d+[%):]+\s+.(\d*[.,]?\d+[,.]\d+) + type: float + amount_untaxed: # done + parser: regex + regex: \s+Sub Total[:]\s+.(d*[.,]?\d+[,.]\d+) + type: float + invoice_number: # done + parser: regex + regex: Invoice Number[:] (\d+) + partner_website: # done + parser: static + value: vultr.com + partner_name: # done + parser: regex + regex: Vultr + partner_city: # done + parser: regex + regex: West Palm Beach + state_code: # done + parser: regex + regex: FL + country_code: # done + parser: static + value: US + partner_zip: # done + parser: regex + regex: '([A-Z]{2}\s\d{5})' + partner_street: # done + parser: regex + regex: '\d{3} \w+ Street' + currency_symbol: + parser: regex + regex: '[$€]' + date: # done + parser: regex + regex: Invoice Date[:] (\d+-\d{2}-\d{2}) + type: date + date_due: # done + parser: regex + regex: 'Please Pay By[:] (\w+\s\d{2},\s\d{4})' + type: date + vat: # done + parser: regex # done + regex: 'VAT ID (\w+)' + lines: # done + parser: lines + start: 'Start' + end: 'Total' + line: + - '(?P\d{2}-\d{2}\s\d{2}[:]\d{2})\s+(?P\d{2}-\d{2}\s\d{2}[:]\d{2})\s+(?P[\S ]+)\s+(?P\d+)\s+.(?P\d+[,.]\d{2})' + types: + qty: float + price_subtotal: float + date_start: date + date_end: date + tax_lines: + parser: lines + start: 'Start' + end: '(?i)Please' + line: + - '(?P\d+[,.]\d+)[%):]+\s+.(?P(\d*[.,]?\d+[,.]\d+))' + types: + line_tax_percent: float + line_tax_amount: float +keywords: + - 'vultr' + - 'Invoice' +required_fields: + - lines +options: + languages: + - en + currency: USD + diff --git a/src/invoice2data/extract/templates/nl/nl.buijtendijk.yml b/src/invoice2data/extract/templates/nl/nl.buijtendijk.yml new file mode 100644 index 00000000..f028b30d --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.buijtendijk.yml @@ -0,0 +1,104 @@ +issuer: Buijtendijk B.V. +fields: + amount: + parser: regex + regex: \s+Totaalbedrag Incl[].]\s+(\d*[.,]?\d+[,.]\d+) + type: float + amount_tax: + parser: regex + regex: \s+BTW\s\d{2}[%]\s+(\d*[.,]?\d+[,.]\d+) + group: sum + type: float + amount_untaxed: + parser: regex + regex: Totaalbedrag Excl[.]\s+(\d*[.,]?\d+[,.]\d+) + type: float + purchase_order_id: # todo + parser: regex + regex: \s+Uw ordernr.(\s+) + group: first + partner_website: + parser: regex + regex: \swww[.](.+[.]nl)\s + partner_email: + parser: regex + regex: \s+(\w+[@]\w+[.]nl)\s + telephone: + parser: regex + regex: 'Telefoon[:]\s+(.+)' + partner_name: + parser: static + value: Buijtendijk + partner_street: + parser: regex + area: {f: 1, l: 1, x: 550, y: 14, r: 100, W: 252, H: 240} + regex: ^(\w+ \d+ \w+)\s* + partner_zip: + parser: regex + area: {f: 1, l: 1, x: 550, y: 14, r: 100, W: 252, H: 240} + regex: '\s(\d{4}\s?[A-Z]{2})\s' + group: last + partner_city: + parser: regex + area: {f: 1, l: 1, x: 550, y: 14, r: 100, W: 252, H: 240} + regex: '\d{4}\s?[A-Z]{2}\s(\w+(?:\s\w+)*)\s' + group: last + state_code: + parser: static + value: NH + country_code: + parser: static + value: NL + iban: + parser: regex + regex: Bank[:]\s+(NL.*)\s+[(] + bic: + parser: regex + regex: BIC[:]\s+(\w+)[)] + partner_coc: + parser: regex + regex: KvK.*(\d{8}) + lines: + parser: lines + start: 'Uw ordernr' + end: 'Totaalbedrag' # lines need work + first_line: + - '(?P\d+)\s{8}(?P\d+)\s{12}(?P(\S+(?:\s\S+)*))\s{3,41}(?P\d+)\s+(?P\d*[.,]?\d+[,.]\d{2})\s+(?P\w+)\s+\d+\s+netto\s+(?P\d*[.,]?\d+[,.]\d{2})?' + line: + - '^\s{32}(?P(\S+(?:\s\S+)*))' + types: + qty: float + price_subtotal: float + price_unit: float + tax_lines: + parser: lines + start: 'netto' + end: '\Z' + line: + - 'Excl[.]\s+(?P[\d+,.]+)\s+BTW (?P[\d+.]+)%\s+(?P[\d+.,]+)' + types: + line_tax_percent: float + price_subtotal: float + line_tax_amount: float + vat: + parser: regex + regex: BTW.?\s+(NL\d{9}B\d{2}) + company_vat: + parser: regex + area: {f: 1, l: 1, x: 0, y: 14, r: 100, W: 550, H: 310} + regex: (NL\d{9}B\d{2}) +tables: + - start: Debiteur\s+Factuurnr + end: Betalingsconditie + body: \s+(?P\d*)\s+(?P\d+)\s+(?P\d+[-]\d+[-]\d{4})\s + types: + date: date +keywords: + - buijtendijk + - NL006934109B01 + - FACTUUR +options: + languages: + - nl + decimal_separator: ',' + currency: EUR diff --git a/src/invoice2data/extract/templates/nl/nl.fletcher.yml b/src/invoice2data/extract/templates/nl/nl.fletcher.yml new file mode 100644 index 00000000..ad858563 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.fletcher.yml @@ -0,0 +1,104 @@ +issuer: Fletcher Hotel Exploitaties B.V. +fields: + amount: + parser: regex + regex: Totaal\s+(d*[.,]?\d+[,.]\d+) + type: float + amount_tax: + parser: regex + regex: BTW\s+\d+[,.]\d+[%]+\s+Verrekenbaar\s+\d*[.,]?\d+[,.]\d+\s+(\d*[.,]?\d+[,.]\d+)\s+\d*[.,]?\d+[,.]\d+ + type: float + invoice_number: + parser: regex + regex: Factuurnummer+\s+(\d+) + partner_website: + parser: static + value: fletcher.nl + partner_coc: + parser: static + value: '30144691' + partner_name: + parser: static + value: Fletcher Hotel Exploitaties B.V. + partner_street: + parser: static + value: Buizerdlaan 2 + partner_city: + parser: static + value: Nieuwegein + country_code: + parser: static + value: 'NL' + partner_zip: + parser: static + value: '3435 SB' + vat: + parser: regex + regex: BTW[:] (\S+) + date: + parser: regex + regex: Factuur datum\s+(\d+-\d{2}-\d{4}) + type: date + date_due: + parser: regex + regex: 'Te betalen voor\s+(\d+[-]\d{2}[-]\d{4})' + type: date + iban: + parser: regex + regex: (?:[A-Z]{2}[ \-]?[0-9]{2})(?:[ \-]?[A-Z0-9]{3,5}){2,7} + bic: + parser: regex + regex: BIC[:]\s(\w{8,11}) + payment_method: + - '(?i)(AMERICAN EXPRESS)' + - (Eurocard/Mastercard) + lines: + parser: lines + rules: + - start: 'Datum\s+Omschrijving' + end: 'Totaal factuur' + line: + - '(?P\d+-\d{2}-\d{4})\s{11}(?P.+)\s{10,40}(?P\d*)\s+(?P\d+[,.]\d{2})' + types: + qty: float + price_subtotal: float + line_amount_tax: float + unit_price: float + date_start: date + - start: 'Totaal factuur' + end: 'Totaal betaald' + line: + - (?PBetalingen) + - '^(?P\d+[-]\d{2}[-]\d{4}.+)' + tax_lines: + parser: lines + rules: + - start: 'Incl. BTW' + end: '(i)Totaal\s+' + line: 'BTW\s+(?P\d+[,.]\d+)[%]+\s+.(?P\d*[.,]?\d+[,.]\d+)\s+(?P(\d*[.,]?\d+[,.]\d+))\s+(?P(\d*[.,]?\d+[,.]\d+))' + types: + line_tax_percent: float + line_tax_amount: float + price_subtotal: float + price_total: float + - start: 'Incl. BTW' + end: '(?i)Totaal\s+' + line: 'BTW\s+(?P\d+[,.]\d+)[%]+\s+Verrekenbaar\s+(?P\d*[.,]?\d+[,.]\d+)\s+(?P\d*[.,]?\d+[,.]\d+)\s+(?P\d*[.,]?\d+[,.]\d+)' + types: + line_tax_percent: float + line_tax_amount: float + price_subtotal: float + price_total: float +keywords: + - '30144691' + - 'Factuur' +required_fields: + - amount_tax +options: + decimal_separator: "," + languages: + - nl + replace: + - ['\s(\d+)[.](\d{2})', ' \1,\2'] + currency: EUR +