From 4e34d53bc1a947a7c5d5fe692e5645aec2f1d813 Mon Sep 17 00:00:00 2001
From: MK <matthijs@kadijk.com>
Date: Thu, 17 May 2018 21:25:57 +0200
Subject: [PATCH 1/5] add code to handle image merge

To handle image merges prefix your field by `IMAGE:` and add the binary image (use .png) as a value.

In stead of using normal mailmerge fields in the template docx, you should add a placeholder image and add the field name as the image `ALT TEXT`.

In this way you can get a correct preview in you template, and add image layout markup to the placeholder image, that will be inherited by the inserted images.

you can call it like this:

```python
f1 = open('test_img1.png')
img_data_1 = fp.read()
f1.close()

f2 = open('test_img2.png')
img_data_2 = fp.read()
f2.close()

document.merge_templates([
    {'field1': "Foo", 'field2: "Copy #1", 'IMAGE:merge_img1': img_data_1 },
    {'field1': "Bar", 'field2: "Copy #2", 'IMAGE:merge_img1': img_data_2 },
], separator='page_break')
```
---
 mailmerge.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/mailmerge.py b/mailmerge.py
index 0d83518..829d1ac 100644
--- a/mailmerge.py
+++ b/mailmerge.py
@@ -9,6 +9,9 @@
     'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
     'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
     'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
+    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
+    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
+    'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
 }
 
 CONTENT_TYPES_PARTS = (
@@ -26,6 +29,12 @@ def __init__(self, file, remove_empty_tables=False):
         self.parts = {}
         self.settings = None
         self._settings_info = None
+        
+	self.media = {}  # new images to add indexed by embed id
+        self.rels = None  # etree for relations
+        self._rels_info = None # zi info block for rels
+        self.RELS_NAMESPACES = {'ns': None, 'od': None}
+        
         self.remove_empty_tables = remove_empty_tables
 
         try:
@@ -37,6 +46,14 @@ def __init__(self, file, remove_empty_tables=False):
                 elif type == CONTENT_TYPE_SETTINGS:
                     self._settings_info, self.settings = self.__get_tree_of_file(file)
 
+            # get the rels for image mappings
+            try:
+                self._rels_info, self.rels = self.__get_tree_of_file('word/_rels/document.xml.rels')
+                self.RELS_NAMESPACES['ns'] = self.rels.getroot().nsmap.get(None)
+                self.RELS_NAMESPACES['od'] = self.rels.getroot().nsmap.get(None).replace('package', 'officeDocument')
+            except:
+                pass
+	    
             to_delete = []
 
             r = re.compile(r' MERGEFIELD +"?([^ ]+?)"? +(|\\\* MERGEFORMAT )', re.I)
@@ -108,7 +125,10 @@ def __init__(self, file, remove_empty_tables=False):
             raise
 
     def __get_tree_of_file(self, file):
-        fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1]
+	if isinstance(file, basestring):
+            fn = file
+        else:
+            fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1]
         zi = self.zip.getinfo(fn)
         return zi, etree.parse(self.zip.open(zi))
 
@@ -125,8 +145,14 @@ def write(self, file):
                 elif zi == self._settings_info:
                     xml = etree.tostring(self.settings.getroot())
                     output.writestr(zi.filename, xml)
+		elif zi == self._rels_info:
+                    xml = etree.tostring(self.rels.getroot())
+                    output.writestr(zi.filename, xml)
                 else:
                     output.writestr(zi.filename, self.zip.read(zi))
+            # add new images to media folder is we have images merged
+            for img_id, img_data in self.media.items():
+                output.writestr('media/{}.png'.format(img_id), img_data)
 
     def get_merge_fields(self, parts=None):
         if not parts:
@@ -254,6 +280,31 @@ def merge(self, parts=None, **replacements):
                     self.__merge_field(part, field, replacement)
 
     def __merge_field(self, part, field, text):
+        if field.startswith('IMAGE:'):
+            _, img_name = field.split(':')
+            inline_img_el = part.find('.//wp:docPr[@title="{}"]/..'.format(img_name), namespaces=NAMESPACES)
+            if inline_img_el:
+                embed_node = inline_img_el.find('.//a:blip', namespaces=NAMESPACES)
+                if embed_node:
+                    # generate a random id and add tp media list for later export to media folder in zip file
+                    img_id = 'MMR{}'.format(randint(10000000, 999999999))
+                    self.media[img_id] = text
+
+                    # add a relationship
+                    last_img_relationship = self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1]
+                    new_img_relationship = deepcopy(last_img_relationship)
+                    new_img_relationship.set('Id', img_id)
+                    new_img_relationship.set('Target', '/media/{}.png'.format(img_id))
+                    self.rels.getroot().append(new_img_relationship)
+
+                    # replace the embed attrib with the new image_id
+                    embed_node = inline_img_el.find('.//a:blip', namespaces=NAMESPACES)
+                    embed_attr = embed_node.attrib.keys()[0]
+                    embed_node.attrib[embed_attr] = img_id
+                # mark as done
+                inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format(img_id)
+            return
+        
         for mf in part.findall('.//MergeField[@name="%s"]' % field):
             children = list(mf)
             mf.clear()  # clear away the attributes

From b68f0ab5a2746e7f554849fb66a76d3b97854040 Mon Sep 17 00:00:00 2001
From: matthijs <matthijs@kadijk.com>
Date: Fri, 18 May 2018 09:32:22 +0200
Subject: [PATCH 2/5] fix tabs inserted by github online editor

---
 mailmerge.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/mailmerge.py b/mailmerge.py
index 829d1ac..fe0bda8 100644
--- a/mailmerge.py
+++ b/mailmerge.py
@@ -30,7 +30,7 @@ def __init__(self, file, remove_empty_tables=False):
         self.settings = None
         self._settings_info = None
         
-	self.media = {}  # new images to add indexed by embed id
+        self.media = {}  # new images to add indexed by embed id
         self.rels = None  # etree for relations
         self._rels_info = None # zi info block for rels
         self.RELS_NAMESPACES = {'ns': None, 'od': None}
@@ -53,7 +53,6 @@ def __init__(self, file, remove_empty_tables=False):
                 self.RELS_NAMESPACES['od'] = self.rels.getroot().nsmap.get(None).replace('package', 'officeDocument')
             except:
                 pass
-	    
             to_delete = []
 
             r = re.compile(r' MERGEFIELD +"?([^ ]+?)"? +(|\\\* MERGEFORMAT )', re.I)
@@ -125,7 +124,7 @@ def __init__(self, file, remove_empty_tables=False):
             raise
 
     def __get_tree_of_file(self, file):
-	if isinstance(file, basestring):
+        if isinstance(file, basestring):
             fn = file
         else:
             fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1]
@@ -145,7 +144,7 @@ def write(self, file):
                 elif zi == self._settings_info:
                     xml = etree.tostring(self.settings.getroot())
                     output.writestr(zi.filename, xml)
-		elif zi == self._rels_info:
+                elif zi == self._rels_info:
                     xml = etree.tostring(self.rels.getroot())
                     output.writestr(zi.filename, xml)
                 else:
@@ -190,14 +189,14 @@ def merge_templates(self, replacements, separator):
             tag = root.tag
             if tag == '{%(w)s}ftr' % NAMESPACES or tag == '{%(w)s}hdr' % NAMESPACES:
                 continue
-		
+
             if sepClass == 'section':
 
                 #FINDING FIRST SECTION OF THE DOCUMENT
                 firstSection = root.find("w:body/w:p/w:pPr/w:sectPr", namespaces=NAMESPACES)
                 if firstSection == None:
                     firstSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES)
-			
+
                 #MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING
                 nextPageSec = deepcopy(firstSection)
                 for child in nextPageSec:

From 06c57f44aeaf37fe37f3dee6dfcaf249d6183f14 Mon Sep 17 00:00:00 2001
From: matthijs <matthijs@kadijk.com>
Date: Fri, 18 May 2018 09:50:45 +0200
Subject: [PATCH 3/5] fix basesting error in python3 and remove not used string
 formatter

---
 mailmerge.py | 74 +++++++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 36 deletions(-)

diff --git a/mailmerge.py b/mailmerge.py
index fe0bda8..bc13a34 100644
--- a/mailmerge.py
+++ b/mailmerge.py
@@ -29,12 +29,12 @@ def __init__(self, file, remove_empty_tables=False):
         self.parts = {}
         self.settings = None
         self._settings_info = None
-        
+
         self.media = {}  # new images to add indexed by embed id
         self.rels = None  # etree for relations
-        self._rels_info = None # zi info block for rels
+        self._rels_info = None  # zi info block for rels
         self.RELS_NAMESPACES = {'ns': None, 'od': None}
-        
+
         self.remove_empty_tables = remove_empty_tables
 
         try:
@@ -124,10 +124,10 @@ def __init__(self, file, remove_empty_tables=False):
             raise
 
     def __get_tree_of_file(self, file):
-        if isinstance(file, basestring):
-            fn = file
+        if isinstance(file, etree._Element):
+            fn = file.get('PartName').split('/', 1)[1]
         else:
-            fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1]
+            fn = file
         zi = self.zip.getinfo(fn)
         return zi, etree.parse(self.zip.open(zi))
 
@@ -166,7 +166,7 @@ def merge_templates(self, replacements, separator):
         """
         Duplicate template. Creates a copy of the template, does a merge, and separates them by a new paragraph, a new break or a new section break.
         separator must be :
-        - page_break : Page Break. 
+        - page_break : Page Break.
         - column_break : Column Break. ONLY HAVE EFFECT IF DOCUMENT HAVE COLUMNS
         - textWrapping_break : Line Break.
         - continuous_section : Continuous section break. Begins the section on the next paragraph.
@@ -176,14 +176,14 @@ def merge_templates(self, replacements, separator):
         - oddPage_section : oddPage section break. section begins on the next odd-numbered page, leaving the next even page blank if necessary.
         """
 
-        #TYPE PARAM CONTROL AND SPLIT
-        valid_separators = {'page_break', 'column_break', 'textWrapping_break', 'continuous_section', 'evenPage_section', 'nextColumn_section', 'nextPage_section', 'oddPage_section'}
+        # TYPE PARAM CONTROL AND SPLIT
+        valid_separators = {'page_break', 'column_break', 'textWrapping_break', 'continuous_section',
+                            'evenPage_section', 'nextColumn_section', 'nextPage_section', 'oddPage_section'}
         if not separator in valid_separators:
             raise ValueError("Invalid separator argument")
         type, sepClass = separator.split("_")
-  
 
-        #GET ROOT - WORK WITH DOCUMENT
+        # GET ROOT - WORK WITH DOCUMENT
         for part in self.parts.values():
             root = part.getroot()
             tag = root.tag
@@ -192,42 +192,42 @@ def merge_templates(self, replacements, separator):
 
             if sepClass == 'section':
 
-                #FINDING FIRST SECTION OF THE DOCUMENT
+                # FINDING FIRST SECTION OF THE DOCUMENT
                 firstSection = root.find("w:body/w:p/w:pPr/w:sectPr", namespaces=NAMESPACES)
                 if firstSection == None:
                     firstSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES)
 
-                #MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING
+                # MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING
                 nextPageSec = deepcopy(firstSection)
                 for child in nextPageSec:
-                #Delete old type if exist
+                    # Delete old type if exist
                     if child.tag == '{%(w)s}type' % NAMESPACES:
                         nextPageSec.remove(child)
-                #Create new type (def parameter)
-                newType = etree.SubElement(nextPageSec, '{%(w)s}type'  % NAMESPACES)
-                newType.set('{%(w)s}val'  % NAMESPACES, type)
+                # Create new type (def parameter)
+                newType = etree.SubElement(nextPageSec, '{%(w)s}type' % NAMESPACES)
+                newType.set('{%(w)s}val' % NAMESPACES, type)
 
-                #REPLACING FIRST SECTION
+                # REPLACING FIRST SECTION
                 secRoot = firstSection.getparent()
                 secRoot.replace(firstSection, nextPageSec)
 
-            #FINDING LAST SECTION OF THE DOCUMENT
+            # FINDING LAST SECTION OF THE DOCUMENT
             lastSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES)
 
-            #SAVING LAST SECTION
+            # SAVING LAST SECTION
             mainSection = deepcopy(lastSection)
             lsecRoot = lastSection.getparent()
             lsecRoot.remove(lastSection)
 
-            #COPY CHILDREN ELEMENTS OF BODY IN A LIST
+            # COPY CHILDREN ELEMENTS OF BODY IN A LIST
             childrenList = root.findall('w:body/*', namespaces=NAMESPACES)
 
-            #DELETE ALL CHILDREN OF BODY
+            # DELETE ALL CHILDREN OF BODY
             for child in root:
                 if child.tag == '{%(w)s}body' % NAMESPACES:
                     child.clear()
 
-            #REFILL BODY AND MERGE DOCS - ADD LAST SECTION ENCAPSULATED OR NOT
+            # REFILL BODY AND MERGE DOCS - ADD LAST SECTION ENCAPSULATED OR NOT
             lr = len(replacements)
             lc = len(childrenList)
             parts = []
@@ -245,13 +245,13 @@ def merge_templates(self, replacements, separator):
                                 else:
                                     if sepClass == 'section':
                                         intSection = deepcopy(mainSection)
-                                        p   = etree.SubElement(child, '{%(w)s}p'  % NAMESPACES)
-                                        pPr = etree.SubElement(p, '{%(w)s}pPr'  % NAMESPACES)
+                                        p = etree.SubElement(child, '{%(w)s}p' % NAMESPACES)
+                                        pPr = etree.SubElement(p, '{%(w)s}pPr' % NAMESPACES)
                                         pPr.append(intSection)
                                         parts.append(p)
                                     elif sepClass == 'break':
-                                        pb   = etree.SubElement(child, '{%(w)s}p'  % NAMESPACES)
-                                        r = etree.SubElement(pb, '{%(w)s}r'  % NAMESPACES)
+                                        pb = etree.SubElement(child, '{%(w)s}p' % NAMESPACES)
+                                        r = etree.SubElement(pb, '{%(w)s}r' % NAMESPACES)
                                         nbreak = Element('{%(w)s}br' % NAMESPACES)
                                         nbreak.attrib['{%(w)s}type' % NAMESPACES] = type
                                         r.append(nbreak)
@@ -259,13 +259,13 @@ def merge_templates(self, replacements, separator):
                     self.merge(parts, **repl)
 
     def merge_pages(self, replacements):
-         """
-         Deprecated method.
-         """
-         warnings.warn("merge_pages has been deprecated in favour of merge_templates",
+        """
+        Deprecated method.
+        """
+        warnings.warn("merge_pages has been deprecated in favour of merge_templates",
                       category=DeprecationWarning,
-                      stacklevel=2)         
-         self.merge_templates(replacements, "page_break")
+                      stacklevel=2)
+        self.merge_templates(replacements, "page_break")
 
     def merge(self, parts=None, **replacements):
         if not parts:
@@ -290,7 +290,8 @@ def __merge_field(self, part, field, text):
                     self.media[img_id] = text
 
                     # add a relationship
-                    last_img_relationship = self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1]
+                    last_img_relationship = \
+                    self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1]
                     new_img_relationship = deepcopy(last_img_relationship)
                     new_img_relationship.set('Id', img_id)
                     new_img_relationship.set('Target', '/media/{}.png'.format(img_id))
@@ -301,9 +302,10 @@ def __merge_field(self, part, field, text):
                     embed_attr = embed_node.attrib.keys()[0]
                     embed_node.attrib[embed_attr] = img_id
                 # mark as done
-                inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format(img_id)
+                inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format(
+                    img_id)
             return
-        
+
         for mf in part.findall('.//MergeField[@name="%s"]' % field):
             children = list(mf)
             mf.clear()  # clear away the attributes

From 663f4ebde6a224b62feec83e1e100a704bb82034 Mon Sep 17 00:00:00 2001
From: matthijs <matthijs@kadijk.com>
Date: Fri, 18 May 2018 10:04:48 +0200
Subject: [PATCH 4/5] fix more python3 errors (?)

---
 mailmerge.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mailmerge.py b/mailmerge.py
index bc13a34..9fb0b68 100644
--- a/mailmerge.py
+++ b/mailmerge.py
@@ -30,8 +30,8 @@ def __init__(self, file, remove_empty_tables=False):
         self.settings = None
         self._settings_info = None
 
-        self.media = {}  # new images to add indexed by embed id
-        self.rels = None  # etree for relations
+        self.media = {}         # new images to add indexed by embed id
+        self.rels = None        # etree for relationships
         self._rels_info = None  # zi info block for rels
         self.RELS_NAMESPACES = {'ns': None, 'od': None}
 

From 051ad0afa91d7fb3f693ac3f63f5ebc59d80de6f Mon Sep 17 00:00:00 2001
From: matthijs <matthijs@kadijk.com>
Date: Fri, 18 May 2018 17:20:41 +0200
Subject: [PATCH 5/5] add forgotten import

---
 mailmerge.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mailmerge.py b/mailmerge.py
index 9fb0b68..1f3a6f6 100644
--- a/mailmerge.py
+++ b/mailmerge.py
@@ -4,6 +4,7 @@
 from lxml.etree import Element
 from lxml import etree
 from zipfile import ZipFile, ZIP_DEFLATED
+from random import randint
 
 NAMESPACES = {
     'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',