From 4e34d53bc1a947a7c5d5fe692e5645aec2f1d813 Mon Sep 17 00:00:00 2001 From: MK Date: Thu, 17 May 2018 21:25:57 +0200 Subject: [PATCH 1/5] add code to handle image merge To handle image merges prefix your field by `IMAGE:` and add the binary image (use .png) as a value. In stead of using normal mailmerge fields in the template docx, you should add a placeholder image and add the field name as the image `ALT TEXT`. In this way you can get a correct preview in you template, and add image layout markup to the placeholder image, that will be inherited by the inserted images. you can call it like this: ```python f1 = open('test_img1.png') img_data_1 = fp.read() f1.close() f2 = open('test_img2.png') img_data_2 = fp.read() f2.close() document.merge_templates([ {'field1': "Foo", 'field2: "Copy #1", 'IMAGE:merge_img1': img_data_1 }, {'field1': "Bar", 'field2: "Copy #2", 'IMAGE:merge_img1': img_data_2 }, ], separator='page_break') ``` --- mailmerge.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/mailmerge.py b/mailmerge.py index 0d83518..829d1ac 100644 --- a/mailmerge.py +++ b/mailmerge.py @@ -9,6 +9,9 @@ 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006', 'ct': 'http://schemas.openxmlformats.org/package/2006/content-types', + 'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing', + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture', } CONTENT_TYPES_PARTS = ( @@ -26,6 +29,12 @@ def __init__(self, file, remove_empty_tables=False): self.parts = {} self.settings = None self._settings_info = None + + self.media = {} # new images to add indexed by embed id + self.rels = None # etree for relations + self._rels_info = None # zi info block for rels + self.RELS_NAMESPACES = {'ns': None, 'od': None} + self.remove_empty_tables = remove_empty_tables try: @@ -37,6 +46,14 @@ def __init__(self, file, remove_empty_tables=False): elif type == CONTENT_TYPE_SETTINGS: self._settings_info, self.settings = self.__get_tree_of_file(file) + # get the rels for image mappings + try: + self._rels_info, self.rels = self.__get_tree_of_file('word/_rels/document.xml.rels') + self.RELS_NAMESPACES['ns'] = self.rels.getroot().nsmap.get(None) + self.RELS_NAMESPACES['od'] = self.rels.getroot().nsmap.get(None).replace('package', 'officeDocument') + except: + pass + to_delete = [] r = re.compile(r' MERGEFIELD +"?([^ ]+?)"? +(|\\\* MERGEFORMAT )', re.I) @@ -108,7 +125,10 @@ def __init__(self, file, remove_empty_tables=False): raise def __get_tree_of_file(self, file): - fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1] + if isinstance(file, basestring): + fn = file + else: + fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1] zi = self.zip.getinfo(fn) return zi, etree.parse(self.zip.open(zi)) @@ -125,8 +145,14 @@ def write(self, file): elif zi == self._settings_info: xml = etree.tostring(self.settings.getroot()) output.writestr(zi.filename, xml) + elif zi == self._rels_info: + xml = etree.tostring(self.rels.getroot()) + output.writestr(zi.filename, xml) else: output.writestr(zi.filename, self.zip.read(zi)) + # add new images to media folder is we have images merged + for img_id, img_data in self.media.items(): + output.writestr('media/{}.png'.format(img_id), img_data) def get_merge_fields(self, parts=None): if not parts: @@ -254,6 +280,31 @@ def merge(self, parts=None, **replacements): self.__merge_field(part, field, replacement) def __merge_field(self, part, field, text): + if field.startswith('IMAGE:'): + _, img_name = field.split(':') + inline_img_el = part.find('.//wp:docPr[@title="{}"]/..'.format(img_name), namespaces=NAMESPACES) + if inline_img_el: + embed_node = inline_img_el.find('.//a:blip', namespaces=NAMESPACES) + if embed_node: + # generate a random id and add tp media list for later export to media folder in zip file + img_id = 'MMR{}'.format(randint(10000000, 999999999)) + self.media[img_id] = text + + # add a relationship + last_img_relationship = self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1] + new_img_relationship = deepcopy(last_img_relationship) + new_img_relationship.set('Id', img_id) + new_img_relationship.set('Target', '/media/{}.png'.format(img_id)) + self.rels.getroot().append(new_img_relationship) + + # replace the embed attrib with the new image_id + embed_node = inline_img_el.find('.//a:blip', namespaces=NAMESPACES) + embed_attr = embed_node.attrib.keys()[0] + embed_node.attrib[embed_attr] = img_id + # mark as done + inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format(img_id) + return + for mf in part.findall('.//MergeField[@name="%s"]' % field): children = list(mf) mf.clear() # clear away the attributes From b68f0ab5a2746e7f554849fb66a76d3b97854040 Mon Sep 17 00:00:00 2001 From: matthijs Date: Fri, 18 May 2018 09:32:22 +0200 Subject: [PATCH 2/5] fix tabs inserted by github online editor --- mailmerge.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mailmerge.py b/mailmerge.py index 829d1ac..fe0bda8 100644 --- a/mailmerge.py +++ b/mailmerge.py @@ -30,7 +30,7 @@ def __init__(self, file, remove_empty_tables=False): self.settings = None self._settings_info = None - self.media = {} # new images to add indexed by embed id + self.media = {} # new images to add indexed by embed id self.rels = None # etree for relations self._rels_info = None # zi info block for rels self.RELS_NAMESPACES = {'ns': None, 'od': None} @@ -53,7 +53,6 @@ def __init__(self, file, remove_empty_tables=False): self.RELS_NAMESPACES['od'] = self.rels.getroot().nsmap.get(None).replace('package', 'officeDocument') except: pass - to_delete = [] r = re.compile(r' MERGEFIELD +"?([^ ]+?)"? +(|\\\* MERGEFORMAT )', re.I) @@ -125,7 +124,7 @@ def __init__(self, file, remove_empty_tables=False): raise def __get_tree_of_file(self, file): - if isinstance(file, basestring): + if isinstance(file, basestring): fn = file else: fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1] @@ -145,7 +144,7 @@ def write(self, file): elif zi == self._settings_info: xml = etree.tostring(self.settings.getroot()) output.writestr(zi.filename, xml) - elif zi == self._rels_info: + elif zi == self._rels_info: xml = etree.tostring(self.rels.getroot()) output.writestr(zi.filename, xml) else: @@ -190,14 +189,14 @@ def merge_templates(self, replacements, separator): tag = root.tag if tag == '{%(w)s}ftr' % NAMESPACES or tag == '{%(w)s}hdr' % NAMESPACES: continue - + if sepClass == 'section': #FINDING FIRST SECTION OF THE DOCUMENT firstSection = root.find("w:body/w:p/w:pPr/w:sectPr", namespaces=NAMESPACES) if firstSection == None: firstSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES) - + #MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING nextPageSec = deepcopy(firstSection) for child in nextPageSec: From 06c57f44aeaf37fe37f3dee6dfcaf249d6183f14 Mon Sep 17 00:00:00 2001 From: matthijs Date: Fri, 18 May 2018 09:50:45 +0200 Subject: [PATCH 3/5] fix basesting error in python3 and remove not used string formatter --- mailmerge.py | 74 +++++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/mailmerge.py b/mailmerge.py index fe0bda8..bc13a34 100644 --- a/mailmerge.py +++ b/mailmerge.py @@ -29,12 +29,12 @@ def __init__(self, file, remove_empty_tables=False): self.parts = {} self.settings = None self._settings_info = None - + self.media = {} # new images to add indexed by embed id self.rels = None # etree for relations - self._rels_info = None # zi info block for rels + self._rels_info = None # zi info block for rels self.RELS_NAMESPACES = {'ns': None, 'od': None} - + self.remove_empty_tables = remove_empty_tables try: @@ -124,10 +124,10 @@ def __init__(self, file, remove_empty_tables=False): raise def __get_tree_of_file(self, file): - if isinstance(file, basestring): - fn = file + if isinstance(file, etree._Element): + fn = file.get('PartName').split('/', 1)[1] else: - fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1] + fn = file zi = self.zip.getinfo(fn) return zi, etree.parse(self.zip.open(zi)) @@ -166,7 +166,7 @@ def merge_templates(self, replacements, separator): """ Duplicate template. Creates a copy of the template, does a merge, and separates them by a new paragraph, a new break or a new section break. separator must be : - - page_break : Page Break. + - page_break : Page Break. - column_break : Column Break. ONLY HAVE EFFECT IF DOCUMENT HAVE COLUMNS - textWrapping_break : Line Break. - continuous_section : Continuous section break. Begins the section on the next paragraph. @@ -176,14 +176,14 @@ def merge_templates(self, replacements, separator): - oddPage_section : oddPage section break. section begins on the next odd-numbered page, leaving the next even page blank if necessary. """ - #TYPE PARAM CONTROL AND SPLIT - valid_separators = {'page_break', 'column_break', 'textWrapping_break', 'continuous_section', 'evenPage_section', 'nextColumn_section', 'nextPage_section', 'oddPage_section'} + # TYPE PARAM CONTROL AND SPLIT + valid_separators = {'page_break', 'column_break', 'textWrapping_break', 'continuous_section', + 'evenPage_section', 'nextColumn_section', 'nextPage_section', 'oddPage_section'} if not separator in valid_separators: raise ValueError("Invalid separator argument") type, sepClass = separator.split("_") - - #GET ROOT - WORK WITH DOCUMENT + # GET ROOT - WORK WITH DOCUMENT for part in self.parts.values(): root = part.getroot() tag = root.tag @@ -192,42 +192,42 @@ def merge_templates(self, replacements, separator): if sepClass == 'section': - #FINDING FIRST SECTION OF THE DOCUMENT + # FINDING FIRST SECTION OF THE DOCUMENT firstSection = root.find("w:body/w:p/w:pPr/w:sectPr", namespaces=NAMESPACES) if firstSection == None: firstSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES) - #MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING + # MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING nextPageSec = deepcopy(firstSection) for child in nextPageSec: - #Delete old type if exist + # Delete old type if exist if child.tag == '{%(w)s}type' % NAMESPACES: nextPageSec.remove(child) - #Create new type (def parameter) - newType = etree.SubElement(nextPageSec, '{%(w)s}type' % NAMESPACES) - newType.set('{%(w)s}val' % NAMESPACES, type) + # Create new type (def parameter) + newType = etree.SubElement(nextPageSec, '{%(w)s}type' % NAMESPACES) + newType.set('{%(w)s}val' % NAMESPACES, type) - #REPLACING FIRST SECTION + # REPLACING FIRST SECTION secRoot = firstSection.getparent() secRoot.replace(firstSection, nextPageSec) - #FINDING LAST SECTION OF THE DOCUMENT + # FINDING LAST SECTION OF THE DOCUMENT lastSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES) - #SAVING LAST SECTION + # SAVING LAST SECTION mainSection = deepcopy(lastSection) lsecRoot = lastSection.getparent() lsecRoot.remove(lastSection) - #COPY CHILDREN ELEMENTS OF BODY IN A LIST + # COPY CHILDREN ELEMENTS OF BODY IN A LIST childrenList = root.findall('w:body/*', namespaces=NAMESPACES) - #DELETE ALL CHILDREN OF BODY + # DELETE ALL CHILDREN OF BODY for child in root: if child.tag == '{%(w)s}body' % NAMESPACES: child.clear() - #REFILL BODY AND MERGE DOCS - ADD LAST SECTION ENCAPSULATED OR NOT + # REFILL BODY AND MERGE DOCS - ADD LAST SECTION ENCAPSULATED OR NOT lr = len(replacements) lc = len(childrenList) parts = [] @@ -245,13 +245,13 @@ def merge_templates(self, replacements, separator): else: if sepClass == 'section': intSection = deepcopy(mainSection) - p = etree.SubElement(child, '{%(w)s}p' % NAMESPACES) - pPr = etree.SubElement(p, '{%(w)s}pPr' % NAMESPACES) + p = etree.SubElement(child, '{%(w)s}p' % NAMESPACES) + pPr = etree.SubElement(p, '{%(w)s}pPr' % NAMESPACES) pPr.append(intSection) parts.append(p) elif sepClass == 'break': - pb = etree.SubElement(child, '{%(w)s}p' % NAMESPACES) - r = etree.SubElement(pb, '{%(w)s}r' % NAMESPACES) + pb = etree.SubElement(child, '{%(w)s}p' % NAMESPACES) + r = etree.SubElement(pb, '{%(w)s}r' % NAMESPACES) nbreak = Element('{%(w)s}br' % NAMESPACES) nbreak.attrib['{%(w)s}type' % NAMESPACES] = type r.append(nbreak) @@ -259,13 +259,13 @@ def merge_templates(self, replacements, separator): self.merge(parts, **repl) def merge_pages(self, replacements): - """ - Deprecated method. - """ - warnings.warn("merge_pages has been deprecated in favour of merge_templates", + """ + Deprecated method. + """ + warnings.warn("merge_pages has been deprecated in favour of merge_templates", category=DeprecationWarning, - stacklevel=2) - self.merge_templates(replacements, "page_break") + stacklevel=2) + self.merge_templates(replacements, "page_break") def merge(self, parts=None, **replacements): if not parts: @@ -290,7 +290,8 @@ def __merge_field(self, part, field, text): self.media[img_id] = text # add a relationship - last_img_relationship = self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1] + last_img_relationship = \ + self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1] new_img_relationship = deepcopy(last_img_relationship) new_img_relationship.set('Id', img_id) new_img_relationship.set('Target', '/media/{}.png'.format(img_id)) @@ -301,9 +302,10 @@ def __merge_field(self, part, field, text): embed_attr = embed_node.attrib.keys()[0] embed_node.attrib[embed_attr] = img_id # mark as done - inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format(img_id) + inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format( + img_id) return - + for mf in part.findall('.//MergeField[@name="%s"]' % field): children = list(mf) mf.clear() # clear away the attributes From 663f4ebde6a224b62feec83e1e100a704bb82034 Mon Sep 17 00:00:00 2001 From: matthijs Date: Fri, 18 May 2018 10:04:48 +0200 Subject: [PATCH 4/5] fix more python3 errors (?) --- mailmerge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mailmerge.py b/mailmerge.py index bc13a34..9fb0b68 100644 --- a/mailmerge.py +++ b/mailmerge.py @@ -30,8 +30,8 @@ def __init__(self, file, remove_empty_tables=False): self.settings = None self._settings_info = None - self.media = {} # new images to add indexed by embed id - self.rels = None # etree for relations + self.media = {} # new images to add indexed by embed id + self.rels = None # etree for relationships self._rels_info = None # zi info block for rels self.RELS_NAMESPACES = {'ns': None, 'od': None} From 051ad0afa91d7fb3f693ac3f63f5ebc59d80de6f Mon Sep 17 00:00:00 2001 From: matthijs Date: Fri, 18 May 2018 17:20:41 +0200 Subject: [PATCH 5/5] add forgotten import --- mailmerge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mailmerge.py b/mailmerge.py index 9fb0b68..1f3a6f6 100644 --- a/mailmerge.py +++ b/mailmerge.py @@ -4,6 +4,7 @@ from lxml.etree import Element from lxml import etree from zipfile import ZipFile, ZIP_DEFLATED +from random import randint NAMESPACES = { 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',