From a607378d00beb840074344db5a392c9e3d22a551 Mon Sep 17 00:00:00 2001 From: Emanuele Date: Fri, 12 Oct 2018 14:27:57 +0100 Subject: [PATCH 1/6] Fixing parsing of Evernote's new codeblocks, by reverse engineering They could have used `
`, they could have introduced an `` element.
But no. They choose a custom CSS style `-en-codeblock:true`.

This is obviously fragile.
---
 lib/html2text.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/lib/html2text.py b/lib/html2text.py
index 1bc45e0..d0e786f 100644
--- a/lib/html2text.py
+++ b/lib/html2text.py
@@ -449,18 +449,34 @@ def handle_tag(self, tag, attrs, start):
                 if start and google_has_height(tag_style):
                     self.p()
                 else:
-                    self.soft_br()
+                    self.br()
             else:
                 if start == 1:
-                    if attrs and attrs.get("title") != "footnotes":
+                    if attrs and '-en-codeblock:true' in attrs.get("style"):
+                        self.o("```")
+                        self.block_stack.append("encodeblock")
+                        self.pre = "encodeblock"
+                        self.startpre = 1
+                    elif self.pre == "encodeblock":
+                        self.block_stack.append(False)
+                    elif attrs and attrs.get("title") != "footnotes":
                         self.block_stack.append(True)
                         self.o(tag_str(tag+' markdown="1"', attrs, start))
+                        self.p()
                     else:
                         self.block_stack.append(False)
+                        self.p()
                 elif start == 0:
-                    if self.block_stack.pop():
+                    t = self.block_stack.pop()
+                    if t == "encodeblock":
+                        self.pre = 0
+                        self.o("```")
+                    elif t == True:
                         self.o('' % tag)
-                self.p()
+                    elif self.pre:
+                        self.pbr()
+                    else:
+                        self.p()
 
         if tag == 'p':
             if self.google_doc:

From 1144e58bbe07e05d35a7c8ec02fb403079e2d8df Mon Sep 17 00:00:00 2001
From: Emanuele 
Date: Fri, 12 Oct 2018 14:38:41 +0100
Subject: [PATCH 2/6] Adding support for Evernote's new `-en-paragraph:true`
 marks

---
 lib/html2text.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/html2text.py b/lib/html2text.py
index d0e786f..0b75168 100644
--- a/lib/html2text.py
+++ b/lib/html2text.py
@@ -452,7 +452,10 @@ def handle_tag(self, tag, attrs, start):
                     self.br()
             else:
                 if start == 1:
-                    if attrs and '-en-codeblock:true' in attrs.get("style"):
+                    if attrs and '-en-paragraph:true' in attrs.get("style"):
+                        self.block_stack.append(False)
+                        self.p()
+                    elif attrs and '-en-codeblock:true' in attrs.get("style"):
                         self.o("```")
                         self.block_stack.append("encodeblock")
                         self.pre = "encodeblock"

From 530f706c235ed9f2ea6af7c580df4f9a1201f1fd Mon Sep 17 00:00:00 2001
From: Emanuele 
Date: Fri, 12 Oct 2018 17:42:30 +0100
Subject: [PATCH 3/6] Making markdown generate -en-codeblock style

---
 sublime_evernote.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sublime_evernote.py b/sublime_evernote.py
index 2c314eb..fdf85fc 100644
--- a/sublime_evernote.py
+++ b/sublime_evernote.py
@@ -338,6 +338,7 @@ def load_settings(self):
                 css[tag] = css[tag].strip()
                 if len(css[tag]) > 0 and not css[tag].endswith(";"):
                     css[tag] = css[tag] + ";"
+            css['pre'] = css.get('pre', "") + "-en-codeblock:true;"
             EvernoteDo.MD_EXTRAS['inline-css'] = css
         self.md_syntax = self.settings.get("md_syntax")
         if not self.md_syntax:

From 4a9e725111c655fa68efc596e2a35f617e142a07 Mon Sep 17 00:00:00 2001
From: Emanuele 
Date: Mon, 15 Oct 2018 11:51:02 +0100
Subject: [PATCH 4/6] Fixing webclip divs

Two problems:

1. distinguishing styled divs that are made by user (so may contain markdown) and the ones produced by webclips which need to be copied verbatim
2. avoid fooling markdown2 which does not tag matching to find boundaries of HTML so it gets confused with nested divs

Problem 1 is solved by looking for the `-evernote-webclip` style (not documented)
Problem 2 is solved by using verbatim output for webclip contents, and prepending a space to each line so that nested `` are ignored.
---
 lib/html2text.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/html2text.py b/lib/html2text.py
index 0b75168..b06e03e 100644
--- a/lib/html2text.py
+++ b/lib/html2text.py
@@ -444,6 +444,11 @@ def handle_tag(self, tag, attrs, start):
                 self.inheader = False
                 return # prevent redundant emphasis marks on headers
 
+        if attrs and "-evernote-webclip" in attrs.get("style", "") and start:
+            self.out('\n\n'+tag_str(tag, attrs, start))
+            self.verbatim = [tag, 1]
+            return
+
         if tag == 'div':
             if self.google_doc:
                 if start and google_has_height(tag_style):
@@ -460,7 +465,7 @@ def handle_tag(self, tag, attrs, start):
                         self.block_stack.append("encodeblock")
                         self.pre = "encodeblock"
                         self.startpre = 1
-                    elif self.pre == "encodeblock":
+                    elif self.pre:
                         self.block_stack.append(False)
                     elif attrs and attrs.get("title") != "footnotes":
                         self.block_stack.append(True)
@@ -757,7 +762,9 @@ def o(self, data, puredata=0, force=0):
             bq = (">" * self.blockquote)
             if not (force and data and data[0] == ">") and self.blockquote: bq += " "
 
-            if self.pre == "indent":
+            if self.verbatim:
+                data = data.replace("\n", "\n ")
+            elif self.pre == "indent":
                 if not self.list:
                     bq += "    "
                 #else: list content is already partially indented

From 8aaca5393ba9f1c3f2b4d57c619768a118eca2b4 Mon Sep 17 00:00:00 2001
From: Emanuele 
Date: Thu, 18 Oct 2018 12:12:10 +0100
Subject: [PATCH 5/6] Bug Fix: do not use " in attributes!!!

---
 lib/html2text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/html2text.py b/lib/html2text.py
index b06e03e..8cfb14f 100644
--- a/lib/html2text.py
+++ b/lib/html2text.py
@@ -115,7 +115,7 @@ def tag_str(tag, attrs, start):
     if start:
         attr_str = ""
         for k in attrs:
-            attr_str += (' %s="%s"' % (k, attrs[k]))
+            attr_str += (' %s="%s"' % (k, attrs[k].replace('"',"'")))
         return "<%s%s%s>" % (tag, attr_str, '/' if start == 2 else '')
     else:
         return "" % tag

From 5a8f6cee15a7a413559981b81cafb07e85cc455c Mon Sep 17 00:00:00 2001
From: Emanuele 
Date: Thu, 18 Oct 2018 12:13:03 +0100
Subject: [PATCH 6/6] Webclips: proper line breaks

---
 lib/html2text.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/html2text.py b/lib/html2text.py
index 8cfb14f..6cfa5b2 100644
--- a/lib/html2text.py
+++ b/lib/html2text.py
@@ -445,7 +445,10 @@ def handle_tag(self, tag, attrs, start):
                 return # prevent redundant emphasis marks on headers
 
         if attrs and "-evernote-webclip" in attrs.get("style", "") and start:
-            self.out('\n\n'+tag_str(tag, attrs, start))
+            if start:
+                self.out('\n\n'+tag_str(tag, attrs, start)+'\n ')
+            else:
+                self.out('\n'+tag_str(tag, attrs, start)+'\n\n')
             self.verbatim = [tag, 1]
             return