From 2785df74ce2af88eaf973132a5d3b4f8c5b7d598 Mon Sep 17 00:00:00 2001 From: Mathieu PATUREL Date: Sat, 16 Nov 2019 09:54:07 +1100 Subject: [PATCH] Format everything with black Follow the readme's instruction haha --- MarkdownLivePreview.py | 102 +-- lib/markdown2.py | 1257 ++++++++++++++++++++------------- markdown2html.py | 63 +- resources/convertresources.py | 4 +- utils.py | 4 + 5 files changed, 877 insertions(+), 553 deletions(-) diff --git a/MarkdownLivePreview.py b/MarkdownLivePreview.py index 3b09249..54617e5 100644 --- a/MarkdownLivePreview.py +++ b/MarkdownLivePreview.py @@ -10,22 +10,26 @@ MARKDOWN_VIEW_INFOS = "markdown_view_infos" PREVIEW_VIEW_INFOS = "preview_view_infos" # FIXME: put this as a setting for the user to choose? -DELAY = 100 # ms +DELAY = 100 # ms + def get_resource(resource): - path = 'Packages/MarkdownLivePreview/resources/' + resource - abs_path = os.path.join(sublime.packages_path(), '..', path) + path = "Packages/MarkdownLivePreview/resources/" + resource + abs_path = os.path.join(sublime.packages_path(), "..", path) if os.path.isfile(abs_path): - with open(abs_path, 'r') as fp: + with open(abs_path, "r") as fp: return fp.read() return sublime.load_resource(path) + resources = {} + def plugin_loaded(): - resources["base64_loading_image"] = get_resource('loading.base64') - resources["base64_404_image"] = get_resource('404.base64') - resources["stylesheet"] = get_resource('stylesheet.css') + resources["base64_loading_image"] = get_resource("loading.base64") + resources["base64_404_image"] = get_resource("404.base64") + resources["stylesheet"] = get_resource("stylesheet.css") + # try to reload the resources if we save this file try: @@ -40,13 +44,13 @@ def plugin_loaded(): # original_window: the regular window # preview_window: the window with the markdown file and the preview -class MdlpInsertCommand(sublime_plugin.TextCommand): +class MdlpInsertCommand(sublime_plugin.TextCommand): def run(self, edit, point, string): self.view.insert(edit, point, string) -class OpenMarkdownPreviewCommand(sublime_plugin.TextCommand): +class OpenMarkdownPreviewCommand(sublime_plugin.TextCommand): def run(self, edit): """ If the file is saved exists on disk, we close it, and reopen it in a new @@ -57,7 +61,7 @@ def run(self, edit): original_window_id = original_view.window().id() file_name = original_view.file_name() - syntax_file = original_view.settings().get('syntax') + syntax_file = original_view.settings().get("syntax") if file_name: original_view.close() @@ -70,41 +74,44 @@ def run(self, edit): # FIXME: save the document to a temporary file, so that if we crash, # the user doesn't lose what he wrote - sublime.run_command('new_window') + sublime.run_command("new_window") preview_window = sublime.active_window() - preview_window.run_command('set_layout', { - 'cols': [0.0, 0.5, 1.0], - 'rows': [0.0, 1.0], - 'cells': [[0, 0, 1, 1], [1, 0, 2, 1]] - }) + preview_window.run_command( + "set_layout", + { + "cols": [0.0, 0.5, 1.0], + "rows": [0.0, 1.0], + "cells": [[0, 0, 1, 1], [1, 0, 2, 1]], + }, + ) preview_window.focus_group(1) preview_view = preview_window.new_file() preview_view.set_scratch(True) preview_view.settings().set(PREVIEW_VIEW_INFOS, {}) - preview_view.set_name('Preview') - + preview_view.set_name("Preview") preview_window.focus_group(0) if file_name: markdown_view = preview_window.open_file(file_name) else: markdown_view = preview_window.new_file() - markdown_view.run_command('mdlp_insert', {'point': 0, 'string': content}) + markdown_view.run_command("mdlp_insert", {"point": 0, "string": content}) markdown_view.set_scratch(True) markdown_view.set_syntax_file(syntax_file) - markdown_view.settings().set(MARKDOWN_VIEW_INFOS, { - "original_window_id": original_window_id - }) + markdown_view.settings().set( + MARKDOWN_VIEW_INFOS, {"original_window_id": original_window_id} + ) def is_enabled(self): # FIXME: is this the best way there is to check if the current syntax is markdown? # should we only support default markdown? # what about "md"? # FIXME: what about other languages, where markdown preview roughly works? - return 'markdown' in self.view.settings().get('syntax').lower() + return "markdown" in self.view.settings().get("syntax").lower() + class MarkdownLivePreviewListener(sublime_plugin.EventListener): @@ -153,30 +160,36 @@ def on_close(self, markdown_view): if not infos: return - assert markdown_view.id() == self.markdown_view.id(), \ - "pre_close view.id() != close view.id()" + assert ( + markdown_view.id() == self.markdown_view.id() + ), "pre_close view.id() != close view.id()" del self.phantom_sets[markdown_view.id()] - self.preview_window.run_command('close_window') + self.preview_window.run_command("close_window") # find the window with the right id - original_window = next(window for window in sublime.windows() \ - if window.id() == infos['original_window_id']) + original_window = next( + window + for window in sublime.windows() + if window.id() == infos["original_window_id"] + ) if self.file_name: original_window.open_file(self.file_name) else: - assert markdown_view.is_scratch(), "markdown view of an unsaved file should " \ - "be a scratch" + assert markdown_view.is_scratch(), ( + "markdown view of an unsaved file should " "be a scratch" + ) # note here that this is called original_view, because it's what semantically # makes sense, but this original_view.id() will be different than the one # that we closed first to reopen in the preview window # shouldn't cause any trouble though original_view = original_window.new_file() - original_view.run_command('mdlp_insert', {'point': 0, 'string': self.content}) - - original_view.set_syntax_file(markdown_view.settings().get('syntax')) + original_view.run_command( + "mdlp_insert", {"point": 0, "string": self.content} + ) + original_view.set_syntax_file(markdown_view.settings().get("syntax")) # here, views are NOT treated independently, which is theoretically wrong # but in practice, you can only edit one markdown file at a time, so it doesn't really @@ -188,7 +201,7 @@ def on_modified_async(self, markdown_view): if not infos: return - # we schedule an update, which won't run if an + # we schedule an update, which won't run if an sublime.set_timeout(partial(self._update_preview, markdown_view), DELAY) def _update_preview(self, markdown_view): @@ -209,15 +222,16 @@ def _update_preview(self, markdown_view): basepath = os.path.dirname(markdown_view.file_name()) html = markdown2html( - markdown, - basepath, - partial(self._update_preview, markdown_view), - resources + markdown, basepath, partial(self._update_preview, markdown_view), resources ) - self.phantom_sets[markdown_view.id()].update([ - sublime.Phantom(sublime.Region(0), html, sublime.LAYOUT_BLOCK, - lambda href: sublime.run_command('open_url', {'url': href})) - ]) - - \ No newline at end of file + self.phantom_sets[markdown_view.id()].update( + [ + sublime.Phantom( + sublime.Region(0), + html, + sublime.LAYOUT_BLOCK, + lambda href: sublime.run_command("open_url", {"url": href}), + ) + ] + ) diff --git a/lib/markdown2.py b/lib/markdown2.py index 49bedd0..25a0a09 100644 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -97,7 +97,7 @@ # and 'perldoc perlre'. __version_info__ = (2, 3, 9) -__version__ = '.'.join(map(str, __version_info__)) +__version__ = ".".join(map(str, __version_info__)) __author__ = "Trent Mick" import sys @@ -108,6 +108,7 @@ from random import random, randint import codecs from collections import defaultdict + try: from urllib import quote_plus except ImportError: @@ -141,15 +142,15 @@ # MD5 function was previously used for this; the "md5" prefix was kept for # backwards compatibility. def _hash_text(s): - return 'md5-' + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:] + return "md5-" + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:] + # Table of hash values for escaped characters: -g_escape_table = dict([(ch, _hash_text(ch)) - for ch in '\\`*_{}[]()>#+-.!']) +g_escape_table = dict([(ch, _hash_text(ch)) for ch in "\\`*_{}[]()>#+-.!"]) # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: # http://bumppo.net/projects/amputator/ -_AMPERSAND_RE = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') +_AMPERSAND_RE = re.compile(r"&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)") # ---- exceptions @@ -159,32 +160,57 @@ class MarkdownError(Exception): # ---- public api -def markdown_path(path, encoding="utf-8", - html4tags=False, tab_width=DEFAULT_TAB_WIDTH, - safe_mode=None, extras=None, link_patterns=None, - footnote_title=None, footnote_return_symbol=None, - use_file_vars=False): - fp = codecs.open(path, 'r', encoding) + +def markdown_path( + path, + encoding="utf-8", + html4tags=False, + tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, + extras=None, + link_patterns=None, + footnote_title=None, + footnote_return_symbol=None, + use_file_vars=False, +): + fp = codecs.open(path, "r", encoding) text = fp.read() fp.close() - return Markdown(html4tags=html4tags, tab_width=tab_width, - safe_mode=safe_mode, extras=extras, - link_patterns=link_patterns, - footnote_title=footnote_title, - footnote_return_symbol=footnote_return_symbol, - use_file_vars=use_file_vars).convert(text) - - -def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, - safe_mode=None, extras=None, link_patterns=None, - footnote_title=None, footnote_return_symbol=None, - use_file_vars=False, cli=False): - return Markdown(html4tags=html4tags, tab_width=tab_width, - safe_mode=safe_mode, extras=extras, - link_patterns=link_patterns, - footnote_title=footnote_title, - footnote_return_symbol=footnote_return_symbol, - use_file_vars=use_file_vars, cli=cli).convert(text) + return Markdown( + html4tags=html4tags, + tab_width=tab_width, + safe_mode=safe_mode, + extras=extras, + link_patterns=link_patterns, + footnote_title=footnote_title, + footnote_return_symbol=footnote_return_symbol, + use_file_vars=use_file_vars, + ).convert(text) + + +def markdown( + text, + html4tags=False, + tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, + extras=None, + link_patterns=None, + footnote_title=None, + footnote_return_symbol=None, + use_file_vars=False, + cli=False, +): + return Markdown( + html4tags=html4tags, + tab_width=tab_width, + safe_mode=safe_mode, + extras=extras, + link_patterns=link_patterns, + footnote_title=footnote_title, + footnote_return_symbol=footnote_return_symbol, + use_file_vars=use_file_vars, + cli=cli, + ).convert(text) class Markdown(object): @@ -208,10 +234,18 @@ class Markdown(object): _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) - def __init__(self, html4tags=False, tab_width=4, safe_mode=None, - extras=None, link_patterns=None, - footnote_title=None, footnote_return_symbol=None, - use_file_vars=False, cli=False): + def __init__( + self, + html4tags=False, + tab_width=4, + safe_mode=None, + extras=None, + link_patterns=None, + footnote_title=None, + footnote_return_symbol=None, + use_file_vars=False, + cli=False, + ): if html4tags: self.empty_element_suffix = ">" else: @@ -239,7 +273,7 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None, if "toc" in self.extras: if "header-ids" not in self.extras: - self.extras["header-ids"] = None # "toc" implies "header-ids" + self.extras["header-ids"] = None # "toc" implies "header-ids" if self.extras["toc"] is None: self._toc_depth = 6 @@ -251,7 +285,7 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None, self.footnote_title = footnote_title self.footnote_return_symbol = footnote_return_symbol self.use_file_vars = use_file_vars - self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + self._outdent_re = re.compile(r"^(\t|[ ]{1,%d})" % tab_width, re.M) self.cli = cli self._escape_table = g_escape_table.copy() @@ -276,7 +310,8 @@ def reset(self): # Per "rel" # should only be used in tags with an "href" attribute. - _a_nofollow = re.compile(r""" + _a_nofollow = re.compile( + r""" <(a) ( [^>]* @@ -285,7 +320,7 @@ def reset(self): [^#'"] # We don't want to match href values that start with # (like footnotes) ) """, - re.IGNORECASE | re.VERBOSE + re.IGNORECASE | re.VERBOSE, ) # Opens the linked document in a new window or tab @@ -308,7 +343,7 @@ def convert(self, text): if not isinstance(text, unicode): # TODO: perhaps shouldn't presume UTF-8 for string input? - text = unicode(text, 'utf-8') + text = unicode(text, "utf-8") if self.use_file_vars: # Look for emacs-style file variable hints. @@ -316,8 +351,8 @@ def convert(self, text): if "markdown-extras" in emacs_vars: splitter = re.compile("[ ,]+") for e in splitter.split(emacs_vars["markdown-extras"]): - if '=' in e: - ename, earg = e.split('=', 1) + if "=" in e: + ename, earg = e.split("=", 1) try: earg = int(earg) except ValueError: @@ -396,7 +431,7 @@ def convert(self, text): # Prepend toc html to output if self.cli: - text = '{}\n{}'.format(self._toc_html, text) + text = "{}\n{}".format(self._toc_html, text) text += "\n" @@ -436,14 +471,18 @@ def preprocess(self, text): # another-var: blah blah # # # header - _meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|([\S\w]+\s*:(?! >)[ \t]*.*\n?)(?:---[\ \t]*\n)?', re.MULTILINE) + _meta_data_pattern = re.compile( + r"^(?:---[\ \t]*\n)?(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|([\S\w]+\s*:(?! >)[ \t]*.*\n?)(?:---[\ \t]*\n)?", + re.MULTILINE, + ) _key_val_pat = re.compile(r"[\S\w]+\s*:(?! >)[ \t]*.*\n?", re.MULTILINE) # this allows key: > # value # conutiues over multiple lines _key_val_block_pat = re.compile( - "(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE) - _meta_data_fence_pattern = re.compile(r'^---[\ \t]*\n', re.MULTILINE) + "(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE + ) + _meta_data_fence_pattern = re.compile(r"^---[\ \t]*\n", re.MULTILINE) _meta_data_newline = re.compile("^\n", re.MULTILINE) def _extract_metadata(self, text): @@ -481,12 +520,15 @@ def _extract_metadata(self, text): # - "[ \t]" is used instead of "\s" to specifically exclude newlines # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does # not like anything other than Unix-style line terminators. - _emacs_local_vars_pat = re.compile(r"""^ + _emacs_local_vars_pat = re.compile( + r"""^ (?P(?:[^\r\n|\n|\r])*?) [\ \t]*Local\ Variables:[\ \t]* (?P.*?)(?:\r\n|\n|\r) (?P.*?\1End:) - """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) + """, + re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE, + ) def _get_emacs_vars(self, text): """Return a dictionary of emacs-style local variables. @@ -504,10 +546,11 @@ def _get_emacs_vars(self, text): match = self._emacs_oneliner_vars_pat.search(head) if match: emacs_vars_str = match.group(1) - assert '\n' not in emacs_vars_str - emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') - if s.strip()] - if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: + assert "\n" not in emacs_vars_str + emacs_var_strs = [ + s.strip() for s in emacs_vars_str.split(";") if s.strip() + ] + if len(emacs_var_strs) == 1 and ":" not in emacs_var_strs[0]: # While not in the spec, this form is allowed by emacs: # -*- Tcl -*- # where the implied "variable" is "mode". This form @@ -516,10 +559,12 @@ def _get_emacs_vars(self, text): else: for emacs_var_str in emacs_var_strs: try: - variable, value = emacs_var_str.strip().split(':', 1) + variable, value = emacs_var_str.strip().split(":", 1) except ValueError: - log.debug("emacs variables error: malformed -*- " - "line: %r", emacs_var_str) + log.debug( + "emacs variables error: malformed -*- " "line: %r", + emacs_var_str, + ) continue # Lowercase the variable name because Emacs allows "Mode" # or "mode" or "MoDe", etc. @@ -539,42 +584,48 @@ def _get_emacs_vars(self, text): # usage. for i, line in enumerate(lines): if not line.startswith(prefix): - log.debug("emacs variables error: line '%s' " - "does not use proper prefix '%s'" - % (line, prefix)) + log.debug( + "emacs variables error: line '%s' " + "does not use proper prefix '%s'" % (line, prefix) + ) return {} # Don't validate suffix on last line. Emacs doesn't care, # neither should we. - if i != len(lines)-1 and not line.endswith(suffix): - log.debug("emacs variables error: line '%s' " - "does not use proper suffix '%s'" - % (line, suffix)) + if i != len(lines) - 1 and not line.endswith(suffix): + log.debug( + "emacs variables error: line '%s' " + "does not use proper suffix '%s'" % (line, suffix) + ) return {} # Parse out one emacs var per line. continued_for = None for line in lines[:-1]: # no var on the last line ("PREFIX End:") - if prefix: line = line[len(prefix):] # strip prefix - if suffix: line = line[:-len(suffix)] # strip suffix + if prefix: + line = line[len(prefix) :] # strip prefix + if suffix: + line = line[: -len(suffix)] # strip suffix line = line.strip() if continued_for: variable = continued_for - if line.endswith('\\'): + if line.endswith("\\"): line = line[:-1].rstrip() else: continued_for = None - emacs_vars[variable] += ' ' + line + emacs_vars[variable] += " " + line else: try: - variable, value = line.split(':', 1) + variable, value = line.split(":", 1) except ValueError: - log.debug("local variables error: missing colon " - "in local variables entry: '%s'" % line) + log.debug( + "local variables error: missing colon " + "in local variables entry: '%s'" % line + ) continue # Do NOT lowercase the variable name, because Emacs only # allows "mode" (and not "Mode", "MoDe", etc.) in this block. value = value.strip() - if value.endswith('\\'): + if value.endswith("\\"): value = value[:-1].rstrip() continued_for = variable else: @@ -583,8 +634,12 @@ def _get_emacs_vars(self, text): # Unquote values. for var, val in list(emacs_vars.items()): - if len(val) > 1 and (val.startswith('"') and val.endswith('"') - or val.startswith('"') and val.endswith('"')): + if len(val) > 1 and ( + val.startswith('"') + and val.endswith('"') + or val.startswith('"') + and val.endswith('"') + ): emacs_vars[var] = val[1:-1] return emacs_vars @@ -593,10 +648,10 @@ def _detab_line(self, line): r"""Recusively convert tabs to spaces in a single line. Called from _detab().""" - if '\t' not in line: + if "\t" not in line: return line - chunk1, chunk2 = line.split('\t', 1) - chunk1 += (' ' * (self.tab_width - len(chunk1) % self.tab_width)) + chunk1, chunk2 = line.split("\t", 1) + chunk1 += " " * (self.tab_width - len(chunk1) % self.tab_width) output = chunk1 + chunk2 return self._detab_line(output) @@ -615,21 +670,22 @@ def _detab(self, text): >>> m._detab(" foo\n\tbar\tblam") ' foo\n bar blam' """ - if '\t' not in text: + if "\t" not in text: return text output = [] for line in text.splitlines(): output.append(self._detab_line(line)) - return '\n'.join(output) + return "\n".join(output) # I broke out the html5 tags here and add them to _block_tags_a and # _block_tags_b. This way html5 tags are easy to keep track of. - _html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption' + _html5tags = "|article|aside|header|hgroup|footer|nav|section|figure|figcaption" - _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del" _block_tags_a += _html5tags - _strict_tag_block_re = re.compile(r""" + _strict_tag_block_re = re.compile( + r""" ( # save in \1 ^ # start of line (with re.M) <(%s) # start tag = \2 @@ -639,13 +695,16 @@ def _detab(self, text): [ \t]* # trailing spaces/tabs (?=\n+|\Z) # followed by a newline or end of document ) - """ % _block_tags_a, - re.X | re.M) + """ + % _block_tags_a, + re.X | re.M, + ) - _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math" _block_tags_b += _html5tags - _liberal_tag_block_re = re.compile(r""" + _liberal_tag_block_re = re.compile( + r""" ( # save in \1 ^ # start of line (with re.M) <(%s) # start tag = \2 @@ -655,30 +714,30 @@ def _detab(self, text): [ \t]* # trailing spaces/tabs (?=\n+|\Z) # followed by a newline or end of document ) - """ % _block_tags_b, - re.X | re.M) + """ + % _block_tags_b, + re.X | re.M, + ) + + _html_markdown_attr_re = re.compile(r"""\s+markdown=("1"|'1')""") - _html_markdown_attr_re = re.compile( - r'''\s+markdown=("1"|'1')''') def _hash_html_block_sub(self, match, raw=False): html = match.group(1) if raw and self.safe_mode: html = self._sanitize_html(html) - elif 'markdown-in-html' in self.extras and 'markdown=' in html: - first_line = html.split('\n', 1)[0] + elif "markdown-in-html" in self.extras and "markdown=" in html: + first_line = html.split("\n", 1)[0] m = self._html_markdown_attr_re.search(first_line) if m: - lines = html.split('\n') - middle = '\n'.join(lines[1:-1]) + lines = html.split("\n") + middle = "\n".join(lines[1:-1]) last_line = lines[-1] - first_line = first_line[:m.start()] + first_line[m.end():] + first_line = first_line[: m.start()] + first_line[m.end() :] f_key = _hash_text(first_line) self.html_blocks[f_key] = first_line l_key = _hash_text(last_line) self.html_blocks[l_key] = last_line - return ''.join(["\n\n", f_key, - "\n\n", middle, "\n\n", - l_key, "\n\n"]) + return "".join(["\n\n", f_key, "\n\n", middle, "\n\n", l_key, "\n\n"]) key = _hash_text(html) self.html_blocks[key] = html return "\n\n" + key + "\n\n" @@ -695,7 +754,7 @@ def _hash_html_blocks(self, text, raw=False): @param raw {boolean} indicates if these are raw HTML blocks in the original source. It makes a difference in "safe" mode. """ - if '<' not in text: + if "<" not in text: return text # Pass `raw` value into our calls to self._hash_html_block_sub. @@ -744,7 +803,7 @@ def _hash_html_blocks(self, text, raw=False): if start_idx: # - Up to `tab_width - 1` spaces before start_idx. for i in range(self.tab_width - 1): - if text[start_idx - 1] != ' ': + if text[start_idx - 1] != " ": break start_idx -= 1 if start_idx == 0: @@ -753,9 +812,9 @@ def _hash_html_blocks(self, text, raw=False): # the document. if start_idx == 0: pass - elif start_idx == 1 and text[0] == '\n': + elif start_idx == 1 and text[0] == "\n": start_idx = 0 # to match minute detail of Markdown.pl regex - elif text[start_idx-2:start_idx] == '\n\n': + elif text[start_idx - 2 : start_idx] == "\n\n": pass else: break @@ -763,11 +822,11 @@ def _hash_html_blocks(self, text, raw=False): # Validate whitespace after comment. # - Any number of spaces and tabs. while end_idx < len(text): - if text[end_idx] not in ' \t': + if text[end_idx] not in " \t": break end_idx += 1 # - Must be following by 2 newlines or hit end of text. - if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): + if text[end_idx : end_idx + 2] not in ("", "\n", "\n\n"): continue # Escape and hash (must match `_hash_html_block_sub`). @@ -798,7 +857,8 @@ def _strip_link_definitions(self, text): # Link defs are in the form: # [id]: url "optional title" - _link_def_re = re.compile(r""" + _link_def_re = re.compile( + r""" ^[ ]{0,%d}\[(.+)\]: # id = \1 [ \t]* \n? # maybe *one* newline @@ -815,33 +875,41 @@ def _strip_link_definitions(self, text): [ \t]* )? # title is optional (?:\n+|\Z) - """ % less_than_tab, re.X | re.M | re.U) + """ + % less_than_tab, + re.X | re.M | re.U, + ) return _link_def_re.sub(self._extract_link_def_sub, text) def _extract_link_def_sub(self, match): id, url, title = match.groups() - key = id.lower() # Link IDs are case-insensitive + key = id.lower() # Link IDs are case-insensitive self.urls[key] = self._encode_amps_and_angles(url) if title: self.titles[key] = title return "" def _do_numbering(self, text): - ''' We handle the special extension for generic numbering for + """ We handle the special extension for generic numbering for tables, figures etc. - ''' + """ # First pass to define all the references - self.regex_defns = re.compile(r''' + self.regex_defns = re.compile( + r""" \[\#(\w+)\s* # the counter. Open square plus hash plus a word \1 ([^@]*)\s* # Some optional characters, that aren't an @. \2 @(\w+) # the id. Should this be normed? \3 ([^\]]*)\] # The rest of the text up to the terminating ] \4 - ''', re.VERBOSE) + """, + re.VERBOSE, + ) self.regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id] counters = {} references = {} replacements = [] - definition_html = '
{}{}{}
' + definition_html = ( + '
{}{}{}
' + ) reference_html = '
{}' for match in self.regex_defns.finditer(text): # We must have four match groups otherwise this isn't a numbering reference @@ -853,16 +921,18 @@ def _do_numbering(self, text): text_after = match.group(4) number = counters.get(counter, 1) references[ref_id] = (number, counter) - replacements.append((match.start(0), - definition_html.format(counter, - ref_id, - text_before, - number, - text_after), - match.end(0))) + replacements.append( + ( + match.start(0), + definition_html.format( + counter, ref_id, text_before, number, text_after + ), + match.end(0), + ) + ) counters[counter] = number + 1 for repl in reversed(replacements): - text = text[:repl[0]] + repl[1] + text[repl[2]:] + text = text[: repl[0]] + repl[1] + text[repl[2] :] # Second pass to replace the references with the right # value of the counter @@ -871,23 +941,21 @@ def _do_numbering(self, text): for match in reversed(list(self.regex_subs.finditer(text))): number, counter = references.get(match.group(1), (None, None)) if number is not None: - repl = reference_html.format(counter, - match.group(1), - number) + repl = reference_html.format(counter, match.group(1), number) else: - repl = reference_html.format(match.group(1), - 'countererror', - '?' + match.group(1) + '?') + repl = reference_html.format( + match.group(1), "countererror", "?" + match.group(1) + "?" + ) if "smarty-pants" in self.extras: repl = repl.replace('"', self._escape_table['"']) - text = text[:match.start()] + repl + text[match.end():] + text = text[: match.start()] + repl + text[match.end() :] return text def _extract_footnote_def_sub(self, match): id, text = match.groups() - text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() - normed_id = re.sub(r'\W', '-', id) + text = _dedent(text, skip_first_line=not text.startswith("\n")).strip() + normed_id = re.sub(r"\W", "-", id) # Ensure footnote text ends with a couple newlines (for some # block gamut matches). self.footnotes[normed_id] = text + "\n\n" @@ -909,7 +977,8 @@ def _strip_footnote_definitions(self, text): Text of the note. """ less_than_tab = self.tab_width - 1 - footnote_def_re = re.compile(r''' + footnote_def_re = re.compile( + r""" ^[ ]{0,%d}\[\^(.+)\]: # id = \1 [ \t]* ( # footnote text = \2 @@ -922,11 +991,13 @@ def _strip_footnote_definitions(self, text): ) # Lookahead for non-space at line-start, or end of doc. (?:(?=^[ ]{0,%d}\S)|\Z) - ''' % (less_than_tab, self.tab_width, self.tab_width), - re.X | re.M) + """ + % (less_than_tab, self.tab_width, self.tab_width), + re.X | re.M, + ) return footnote_def_re.sub(self._extract_footnote_def_sub, text) - _hr_re = re.compile(r'^[ ]{0,3}([-_*][ ]{0,2}){3,}$', re.M) + _hr_re = re.compile(r"^[ ]{0,3}([-_*][ ]{0,2}){3,}$", re.M) def _run_block_gamut(self, text): # These are all the transformations that form block-level @@ -942,7 +1013,7 @@ def _run_block_gamut(self, text): # you wish, you may use spaces between the hyphens or asterisks." # Markdown.pl 1.0.1's hr regexes limit the number of spaces between the # hr chars to one or two. We'll reproduce that limit here. - hr = "\n>>[ ].*\n # first line ^(\1.*\S+.*\n)* # any number of subsequent lines ^\n # ends with a blank line - """ % less_than_tab, re.M | re.X) + """ + % less_than_tab, + re.M | re.X, + ) return _pyshell_block_re.sub(self._pyshell_block_sub, text) def _table_sub(self, match): - trim_space_re = '^[ \t\n]+|[ \t\n]+$' - trim_bar_re = r'^\||\|$' - split_bar_re = r'^\||(?' % self._html_class_str_from_tag('table'), '', ''] - cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))] + hlines = [ + "" % self._html_class_str_from_tag("table"), + "", + "", + ] + cols = [ + re.sub(escape_bar_re, "|", cell.strip()) + for cell in re.split( + split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)) + ) + ] for col_idx, col in enumerate(cols): - hlines.append(' %s' % ( - align_from_col_idx.get(col_idx, ''), - self._run_span_gamut(col) - )) - hlines.append('') - hlines.append('') + hlines.append( + " %s" + % (align_from_col_idx.get(col_idx, ""), self._run_span_gamut(col)) + ) + hlines.append("") + hlines.append("") # tbody - hlines.append('') - for line in body.strip('\n').split('\n'): - hlines.append('') - cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))] + hlines.append("") + for line in body.strip("\n").split("\n"): + hlines.append("") + cols = [ + re.sub(escape_bar_re, "|", cell.strip()) + for cell in re.split( + split_bar_re, + re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)), + ) + ] for col_idx, col in enumerate(cols): - hlines.append(' %s' % ( - align_from_col_idx.get(col_idx, ''), - self._run_span_gamut(col) - )) - hlines.append('') - hlines.append('') - hlines.append('') + hlines.append( + " %s" + % (align_from_col_idx.get(col_idx, ""), self._run_span_gamut(col)) + ) + hlines.append("") + hlines.append("") + hlines.append("") - return '\n'.join(hlines) + '\n' + return "\n".join(hlines) + "\n" def _do_tables(self, text): """Copying PHP-Markdown and GFM table syntax. Some regex borrowed from https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538 """ less_than_tab = self.tab_width - 1 - table_re = re.compile(r''' + table_re = re.compile( + r""" (?:(?<=\n\n)|\A\n?) # leading blank line ^[ ]{0,%d} # allowed whitespace @@ -1065,7 +1165,10 @@ def _do_tables(self, text): .*\|.* \n )+ ) - ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X) + """ + % (less_than_tab, less_than_tab, less_than_tab), + re.M | re.X, + ) return table_re.sub(self._table_sub, text) def _wiki_table_sub(self, match): @@ -1074,20 +1177,20 @@ def _wiki_table_sub(self, match): rows = [] for line in ttext.splitlines(0): line = line.strip()[2:-2].strip() - row = [c.strip() for c in re.split(r'(?' % self._html_class_str_from_tag('table'), ''] + hlines = ["" % self._html_class_str_from_tag("table"), ""] for row in rows: - hrow = [''] + hrow = [""] for cell in row: - hrow.append('') + hrow.append("") hrow.append(self._run_span_gamut(cell)) - hrow.append('') - hrow.append('') - hlines.append(''.join(hrow)) - hlines += ['', ''] - return '\n'.join(hlines) + '\n' + hrow.append("") + hrow.append("") + hlines.append("".join(hrow)) + hlines += ["", ""] + return "\n".join(hlines) + "\n" def _do_wiki_tables(self, text): # Optimization. @@ -1095,11 +1198,15 @@ def _do_wiki_tables(self, text): return text less_than_tab = self.tab_width - 1 - wiki_table_re = re.compile(r''' + wiki_table_re = re.compile( + r""" (?:(?<=\n\n)|\A\n?) # leading blank line ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n # first line (^\1\|\|.+?\|\|\n)* # any number of subsequent lines - ''' % less_than_tab, re.M | re.X) + """ + % less_than_tab, + re.M | re.X, + ) return wiki_table_re.sub(self._wiki_table_sub, text) def _run_span_gamut(self, text): @@ -1140,7 +1247,8 @@ def _run_span_gamut(self, text): return text # "Sorta" because auto-links are identified as "tag" tokens. - _sorta_html_tokenize_re = re.compile(r""" + _sorta_html_tokenize_re = re.compile( + r""" ( # tag # processing instruction ) - """, re.X) + """, + re.X, + ) def _escape_special_chars(self, text): # Python markdown note: the HTML tokenization here differs from @@ -1174,20 +1284,23 @@ def _escape_special_chars(self, text): # character with its corresponding MD5 checksum value; # this is likely overkill, but it should prevent us from # colliding with the escape values by accident. - escaped.append(token.replace('*', self._escape_table['*']) - .replace('_', self._escape_table['_'])) + escaped.append( + token.replace("*", self._escape_table["*"]).replace( + "_", self._escape_table["_"] + ) + ) else: escaped.append(self._encode_backslash_escapes(token)) is_html_markup = not is_html_markup - return ''.join(escaped) + return "".join(escaped) def _hash_html_spans(self, text): # Used for safe_mode. def _is_auto_link(s): - if ':' in s and self._auto_link_re.match(s): + if ":" in s and self._auto_link_re.match(s): return True - elif '@' in s and self._auto_email_link_re.match(s): + elif "@" in s and self._auto_email_link_re.match(s): return True return False @@ -1202,7 +1315,7 @@ def _is_auto_link(s): else: tokens.append(self._encode_incomplete_tags(token)) is_html_markup = not is_html_markup - return ''.join(tokens) + return "".join(tokens) def _unhash_html_spans(self, text): for key, sanitized in list(self.html_spans.items()): @@ -1214,18 +1327,21 @@ def _sanitize_html(self, s): return self.html_removed_text elif self.safe_mode == "escape": replacements = [ - ('&', '&'), - ('<', '<'), - ('>', '>'), + ("&", "&"), + ("<", "<"), + (">", ">"), ] for before, after in replacements: s = s.replace(before, after) return s else: - raise MarkdownError("invalid value for 'safe_mode': %r (must be " - "'escape' or 'replace')" % self.safe_mode) + raise MarkdownError( + "invalid value for 'safe_mode': %r (must be " + "'escape' or 'replace')" % self.safe_mode + ) - _inline_link_title = re.compile(r''' + _inline_link_title = re.compile( + r""" ( # \1 [ \t]+ (['"]) # quote char = \2 @@ -1233,19 +1349,24 @@ def _sanitize_html(self, s): \2 )? # title is optional \)$ - ''', re.X | re.S) - _tail_of_reference_link_re = re.compile(r''' + """, + re.X | re.S, + ) + _tail_of_reference_link_re = re.compile( + r""" # Match tail of: [text][id] [ ]? # one optional space (?:\n[ ]*)? # one optional newline followed by spaces \[ (?P.*?) \] - ''', re.X | re.S) + """, + re.X | re.S, + ) - _whitespace = re.compile(r'\s*') + _whitespace = re.compile(r"\s*") - _strip_anglebrackets = re.compile(r'<(.*)>.*') + _strip_anglebrackets = re.compile(r"<(.*)>.*") def _find_non_whitespace(self, text, start): """Returns the index of the first non-whitespace character in text @@ -1273,23 +1394,24 @@ def _find_balanced(self, text, start, open_c, close_c): def _extract_url_and_title(self, text, start): """Extracts the url and (optional) title from the tail of a link""" # text[start] equals the opening parenthesis - idx = self._find_non_whitespace(text, start+1) + idx = self._find_non_whitespace(text, start + 1) if idx == len(text): return None, None, None end_idx = idx has_anglebrackets = text[idx] == "<" if has_anglebrackets: - end_idx = self._find_balanced(text, end_idx+1, "<", ">") + end_idx = self._find_balanced(text, end_idx + 1, "<", ">") end_idx = self._find_balanced(text, end_idx, "(", ")") match = self._inline_link_title.search(text, idx, end_idx) if not match: return None, None, None - url, title = text[idx:match.start()], match.group("title") + url, title = text[idx : match.start()], match.group("title") if has_anglebrackets: - url = self._strip_anglebrackets.sub(r'\1', url) + url = self._strip_anglebrackets.sub(r"\1", url) return url, title, end_idx - _safe_protocols = re.compile(r'(https?|ftp):', re.I) + _safe_protocols = re.compile(r"(https?|ftp):", re.I) + def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. @@ -1324,7 +1446,7 @@ def _do_links(self, text): # _strip_link_definitions() so no need to watch for them. # - not markup: [...anything else... try: - start_idx = text.index('[', curr_pos) + start_idx = text.index("[", curr_pos) except ValueError: break text_length = len(text) @@ -1335,34 +1457,37 @@ def _do_links(self, text): # matching brackets in img alt text -- we'll differ in that # regard. bracket_depth = 0 - for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, - text_length)): + for p in range( + start_idx + 1, min(start_idx + MAX_LINK_TEXT_SENTINEL, text_length) + ): ch = text[p] - if ch == ']': + if ch == "]": bracket_depth -= 1 if bracket_depth < 0: break - elif ch == '[': + elif ch == "[": bracket_depth += 1 else: # Closing bracket not found within sentinel length. # This isn't markup. curr_pos = start_idx + 1 continue - link_text = text[start_idx+1:p] + link_text = text[start_idx + 1 : p] # Possibly a footnote ref? if "footnotes" in self.extras and link_text.startswith("^"): - normed_id = re.sub(r'\W', '-', link_text[1:]) + normed_id = re.sub(r"\W", "-", link_text[1:]) if normed_id in self.footnotes: self.footnote_ids.append(normed_id) - result = '' \ - '%s' \ - % (normed_id, normed_id, len(self.footnote_ids)) - text = text[:start_idx] + result + text[p+1:] + result = ( + '' + '%s' + % (normed_id, normed_id, len(self.footnote_ids)) + ) + text = text[:start_idx] + result + text[p + 1 :] else: # This id isn't defined, leave the markup alone. - curr_pos = p+1 + curr_pos = p + 1 continue # Now determine what this is by the remainder. @@ -1371,44 +1496,52 @@ def _do_links(self, text): return text # Inline anchor or img? - if text[p] == '(': # attempt at perf improvement + if text[p] == "(": # attempt at perf improvement url, title, url_end_idx = self._extract_url_and_title(text, p) if url is not None: # Handle an inline anchor or img. - is_img = start_idx > 0 and text[start_idx-1] == "!" + is_img = start_idx > 0 and text[start_idx - 1] == "!" if is_img: start_idx -= 1 # We've got to encode these to avoid conflicting # with italics/bold. - url = url.replace('*', self._escape_table['*']) \ - .replace('_', self._escape_table['_']) + url = url.replace("*", self._escape_table["*"]).replace( + "_", self._escape_table["_"] + ) if title: title_str = ' title="%s"' % ( _xml_escape_attr(title) - .replace('*', self._escape_table['*']) - .replace('_', self._escape_table['_'])) + .replace("*", self._escape_table["*"]) + .replace("_", self._escape_table["_"]) + ) else: - title_str = '' + title_str = "" if is_img: img_class_str = self._html_class_str_from_tag("img") - result = '%s= anchor_allowed_pos: - safe_link = self._safe_protocols.match(url) or url.startswith('#') + safe_link = self._safe_protocols.match(url) or url.startswith( + "#" + ) if self.safe_mode and not safe_link: result_head = '' % (title_str) else: - result_head = '' % (_html_escape_url(url, safe_mode=self.safe_mode), title_str) - result = '%s%s' % (result_head, link_text) + result_head = '' % ( + _html_escape_url(url, safe_mode=self.safe_mode), + title_str, + ) + result = "%s%s" % (result_head, link_text) if "smarty-pants" in self.extras: result = result.replace('"', self._escape_table['"']) # allowed from curr_pos on, from @@ -1426,7 +1559,7 @@ def _do_links(self, text): match = self._tail_of_reference_link_re.match(text, p) if match: # Handle a reference-style anchor or img. - is_img = start_idx > 0 and text[start_idx-1] == "!" + is_img = start_idx > 0 and text[start_idx - 1] == "!" if is_img: start_idx -= 1 link_id = match.group("id").lower() @@ -1436,41 +1569,48 @@ def _do_links(self, text): url = self.urls[link_id] # We've got to encode these to avoid conflicting # with italics/bold. - url = url.replace('*', self._escape_table['*']) \ - .replace('_', self._escape_table['_']) + url = url.replace("*", self._escape_table["*"]).replace( + "_", self._escape_table["_"] + ) title = self.titles.get(link_id) if title: - title = _xml_escape_attr(title) \ - .replace('*', self._escape_table['*']) \ - .replace('_', self._escape_table['_']) + title = ( + _xml_escape_attr(title) + .replace("*", self._escape_table["*"]) + .replace("_", self._escape_table["_"]) + ) title_str = ' title="%s"' % title else: - title_str = '' + title_str = "" if is_img: img_class_str = self._html_class_str_from_tag("img") - result = '%s= anchor_allowed_pos: if self.safe_mode and not self._safe_protocols.match(url): result_head = '' % (title_str) else: - result_head = '' % (_html_escape_url(url, safe_mode=self.safe_mode), title_str) - result = '%s%s' % (result_head, link_text) + result_head = '' % ( + _html_escape_url(url, safe_mode=self.safe_mode), + title_str, + ) + result = "%s%s" % (result_head, link_text) if "smarty-pants" in self.extras: result = result.replace('"', self._escape_table['"']) # allowed from curr_pos on, from # anchor_allowed_pos on. curr_pos = start_idx + len(result_head) anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] + text = text[:start_idx] + result + text[match.end() :] else: # Anchor not allowed here. curr_pos = start_idx + 1 @@ -1501,15 +1641,16 @@ def header_id_from_text(self, text, prefix, n): """ header_id = _slugify(text) if prefix and isinstance(prefix, base_string_type): - header_id = prefix + '-' + header_id + header_id = prefix + "-" + header_id self._count_from_header_id[header_id] += 1 if 0 == len(header_id) or self._count_from_header_id[header_id] > 1: - header_id += '-%s' % self._count_from_header_id[header_id] + header_id += "-%s" % self._count_from_header_id[header_id] return header_id _toc = None + def _toc_add_entry(self, level, id, name): if level > self._toc_depth: return @@ -1517,7 +1658,7 @@ def _toc_add_entry(self, level, id, name): self._toc = [] self._toc.append((level, id, self._unescape_special_chars(name))) - _h_re_base = r''' + _h_re_base = r""" (^(.+)[ \t]*\n(=+|-+)[ \t]*\n+) | (^(\#{1,6}) # \1 = string of #'s @@ -1528,10 +1669,10 @@ def _toc_add_entry(self, level, id, name): \#* # optional closing #'s (not counted) \n+ ) - ''' + """ - _h_re = re.compile(_h_re_base % '*', re.X | re.M) - _h_re_tag_friendly = re.compile(_h_re_base % '+', re.X | re.M) + _h_re = re.compile(_h_re_base % "*", re.X | re.M) + _h_re_tag_friendly = re.compile(_h_re_base % "+", re.X | re.M) def _h_sub(self, match): if match.group(1) is not None and match.group(3) == "-": @@ -1550,8 +1691,9 @@ def _h_sub(self, match): n = min(n + demote_headers, 6) header_id_attr = "" if "header-ids" in self.extras: - header_id = self.header_id_from_text(header_group, - self.extras["header-ids"], n) + header_id = self.header_id_from_text( + header_group, self.extras["header-ids"], n + ) if header_id: header_id_attr = ' id="%s"' % header_id html = self._run_span_gamut(header_group) @@ -1574,14 +1716,14 @@ def _do_headers(self, text): # ... # ###### Header 6 - if 'tag-friendly' in self.extras: + if "tag-friendly" in self.extras: return self._h_re_tag_friendly.sub(self._h_sub, text) return self._h_re.sub(self._h_sub, text) - _marker_ul_chars = '*+-' - _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars - _marker_ul = '(?:[%s])' % _marker_ul_chars - _marker_ol = r'(?:\d+\.)' + _marker_ul_chars = "*+-" + _marker_any = r"(?:[%s]|\d+\.)" % _marker_ul_chars + _marker_ul = "(?:[%s])" % _marker_ul_chars + _marker_ol = r"(?:\d+\.)" def _list_sub(self, match): lst = match.group(1) @@ -1604,7 +1746,7 @@ def _do_lists(self, text): hits = [] for marker_pat in (self._marker_ul, self._marker_ol): less_than_tab = self.tab_width - 1 - whole_list = r''' + whole_list = r""" ( # \1 = whole list ( # \2 [ ]{0,%d} @@ -1624,12 +1766,17 @@ def _do_lists(self, text): ) ) ) - ''' % (less_than_tab, marker_pat, marker_pat) + """ % ( + less_than_tab, + marker_pat, + marker_pat, + ) if self.list_level: # sub-list - list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + list_re = re.compile("^" + whole_list, re.X | re.M | re.S) else: - list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, - re.X | re.M | re.S) + list_re = re.compile( + r"(?:(?<=\n\n)|\A\n?)" + whole_list, re.X | re.M | re.S + ) match = list_re.search(text, pos) if match: hits.append((match.start(), match)) @@ -1644,32 +1791,41 @@ def _do_lists(self, text): return text - _list_item_re = re.compile(r''' + _list_item_re = re.compile( + r""" (\n)? # leading line = \1 (^[ \t]*) # leading whitespace = \2 (?P%s) [ \t]+ # list marker = \3 ((?:.+?) # list item text = \4 (\n{1,2})) # eols = \5 (?= \n* (\Z | \2 (?P%s) [ \t]+)) - ''' % (_marker_any, _marker_any), - re.M | re.X | re.S) + """ + % (_marker_any, _marker_any), + re.M | re.X | re.S, + ) - _task_list_item_re = re.compile(r''' + _task_list_item_re = re.compile( + r""" (\[[\ xX]\])[ \t]+ # tasklist marker = \1 (.*) # list item text = \2 - ''', re.M | re.X | re.S) + """, + re.M | re.X | re.S, + ) - _task_list_warpper_str = r' %s' + _task_list_warpper_str = ( + r' %s' + ) def _task_list_item_sub(self, match): marker = match.group(1) item_text = match.group(2) - if marker in ['[x]','[X]']: - return self._task_list_warpper_str % ('checked ', item_text) - elif marker == '[ ]': - return self._task_list_warpper_str % ('', item_text) + if marker in ["[x]", "[X]"]: + return self._task_list_warpper_str % ("checked ", item_text) + elif marker == "[ ]": + return self._task_list_warpper_str % ("", item_text) _last_li_endswith_two_eols = False + def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) @@ -1678,10 +1834,10 @@ def _list_item_sub(self, match): else: # Recursion for sub-lists: item = self._do_lists(self._outdent(item)) - if item.endswith('\n'): + if item.endswith("\n"): item = item[:-1] item = self._run_span_gamut(item) - self._last_li_endswith_two_eols = (len(match.group(5)) == 2) + self._last_li_endswith_two_eols = len(match.group(5)) == 2 if "task_list" in self.extras: item = self._task_list_item_re.sub(self._task_list_item_sub, item) @@ -1714,7 +1870,7 @@ def _process_list_items(self, list_str): # starting cardinal number; e.g. "1." or "a.". self.list_level += 1 self._last_li_endswith_two_eols = False - list_str = list_str.rstrip('\n') + '\n' + list_str = list_str.rstrip("\n") + "\n" list_str = self._list_item_re.sub(self._list_item_sub, list_str) self.list_level -= 1 return list_str @@ -1756,41 +1912,38 @@ def _code_block_sub(self, match, is_fenced_code_block=False): if is_fenced_code_block: lexer_name = match.group(1) if lexer_name: - formatter_opts = self.extras['fenced-code-blocks'] or {} + formatter_opts = self.extras["fenced-code-blocks"] or {} codeblock = match.group(2) codeblock = codeblock[:-1] # drop one trailing newline else: codeblock = match.group(1) codeblock = self._outdent(codeblock) codeblock = self._detab(codeblock) - codeblock = codeblock.lstrip('\n') # trim leading newlines - codeblock = codeblock.rstrip() # trim trailing whitespace + codeblock = codeblock.lstrip("\n") # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace # Note: "code-color" extra is DEPRECATED. if "code-color" in self.extras and codeblock.startswith(":::"): - lexer_name, rest = codeblock.split('\n', 1) + lexer_name, rest = codeblock.split("\n", 1) lexer_name = lexer_name[3:].strip() - codeblock = rest.lstrip("\n") # Remove lexer declaration line. - formatter_opts = self.extras['code-color'] or {} + codeblock = rest.lstrip("\n") # Remove lexer declaration line. + formatter_opts = self.extras["code-color"] or {} # Use pygments only if not using the highlightjs-lang extra if lexer_name and "highlightjs-lang" not in self.extras: + def unhash_code(codeblock): for key, sanitized in list(self.html_spans.items()): codeblock = codeblock.replace(key, sanitized) - replacements = [ - ("&", "&"), - ("<", "<"), - (">", ">") - ] + replacements = [("&", "&"), ("<", "<"), (">", ">")] for old, new in replacements: codeblock = codeblock.replace(old, new) return codeblock + lexer = self._get_pygments_lexer(lexer_name) if lexer: - codeblock = unhash_code( codeblock ) - colored = self._color_with_pygments(codeblock, lexer, - **formatter_opts) + codeblock = unhash_code(codeblock) + colored = self._color_with_pygments(codeblock, lexer, **formatter_opts) return "\n\n%s\n\n" % colored codeblock = self._encode_code(codeblock) @@ -1802,7 +1955,10 @@ def unhash_code(codeblock): code_class_str = self._html_class_str_from_tag("code") return "\n\n%s\n\n\n" % ( - pre_class_str, code_class_str, codeblock) + pre_class_str, + code_class_str, + codeblock, + ) def _html_class_str_from_tag(self, tag): """Get the appropriate ' class="..."' string (note the leading @@ -1821,7 +1977,8 @@ def _html_class_str_from_tag(self, tag): def _do_code_blocks(self, text): """Process Markdown `
` blocks."""
-        code_block_re = re.compile(r'''
+        code_block_re = re.compile(
+            r"""
             (?:\n\n|\A\n?)
             (               # $1 = the code block -- one or more lines, starting with a space/tab
               (?:
@@ -1833,16 +1990,21 @@ def _do_code_blocks(self, text):
             # Lookahead to make sure this block isn't already in a code block.
             # Needed when syntax highlighting is being used.
             (?![^<]*\)
-            ''' % (self.tab_width, self.tab_width),
-            re.M | re.X)
+            """
+            % (self.tab_width, self.tab_width),
+            re.M | re.X,
+        )
         return code_block_re.sub(self._code_block_sub, text)
 
-    _fenced_code_block_re = re.compile(r'''
+    _fenced_code_block_re = re.compile(
+        r"""
         (?:\n+|\A\n?)
         ^```\s*?([\w+-]+)?\s*?\n    # opening fence, $1 = optional lang
         (.*?)                       # $2 = code block content
         ^```[ \t]*\n                # closing fence
-        ''', re.M | re.X | re.S)
+        """,
+        re.M | re.X | re.S,
+    )
 
     def _fenced_code_block_sub(self, match):
         return self._code_block_sub(match, is_fenced_code_block=True)
@@ -1859,7 +2021,8 @@ def _do_fenced_code_blocks(self, text):
     #   space and that space will be removed in the emitted HTML
     # See `test/tm-cases/escapes.text` for a number of edge-case
     # examples.
-    _code_span_re = re.compile(r'''
+    _code_span_re = re.compile(
+        r"""
             (?', '>'),
+            ("<", "<"),
+            (">", ">"),
         ]
         for before, after in replacements:
             text = text.replace(before, after)
@@ -1919,6 +2084,7 @@ def _encode_code(self, text):
         return hashed
 
     _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
+
     def _do_strike(self, text):
         text = self._strike_re.sub(r"\1", text)
         return text
@@ -1927,6 +2093,7 @@ def _do_strike(self, text):
     _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
     _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
     _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
+
     def _do_italics_and_bold(self, text):
         #  must go first:
         if "code-friendly" in self.extras:
@@ -1942,14 +2109,26 @@ def _do_italics_and_bold(self, text):
     # "twixt" can be written without an initial apostrophe. This is fine because
     # using scare quotes (single quotation marks) is rare.
     _apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
-    _contractions = ["tis", "twas", "twer", "neath", "o", "n",
-        "round", "bout", "twixt", "nuff", "fraid", "sup"]
+    _contractions = [
+        "tis",
+        "twas",
+        "twer",
+        "neath",
+        "o",
+        "n",
+        "round",
+        "bout",
+        "twixt",
+        "nuff",
+        "fraid",
+        "sup",
+    ]
+
     def _do_smart_contractions(self, text):
         text = self._apostrophe_year_re.sub(r"’\1", text)
         for c in self._contractions:
             text = text.replace("'%s" % c, "’%s" % c)
-            text = text.replace("'%s" % c.capitalize(),
-                "’%s" % c.capitalize())
+            text = text.replace("'%s" % c.capitalize(), "’%s" % c.capitalize())
         return text
 
     # Substitute double-quotes before single-quotes.
@@ -1957,6 +2136,7 @@ def _do_smart_contractions(self, text):
     _opening_double_quote_re = re.compile(r'(?%s[ \t]?        # '>' at the start of a line
@@ -1999,48 +2179,49 @@ def _do_smart_punctuation(self, text):
             \n*                     # blanks
           )+
         )
-    '''
-    _block_quote_re = re.compile(_block_quote_base % '', re.M | re.X)
-    _block_quote_re_spoiler = re.compile(_block_quote_base % '[ \t]*?!?', re.M | re.X)
-    _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
-    _bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
-    _bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
-    _html_pre_block_re = re.compile(r'(\s*
.+?
)', re.S) + """ + _block_quote_re = re.compile(_block_quote_base % "", re.M | re.X) + _block_quote_re_spoiler = re.compile(_block_quote_base % "[ \t]*?!?", re.M | re.X) + _bq_one_level_re = re.compile("^[ \t]*>[ \t]?", re.M) + _bq_one_level_re_spoiler = re.compile("^[ \t]*>[ \t]*?![ \t]?", re.M) + _bq_all_lines_spoilers = re.compile(r"\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z", re.M) + _html_pre_block_re = re.compile(r"(\s*
.+?
)", re.S) + def _dedent_two_spaces_sub(self, match): - return re.sub(r'(?m)^ ', '', match.group(1)) + return re.sub(r"(?m)^ ", "", match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) - is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq) + is_spoiler = "spoiler" in self.extras and self._bq_all_lines_spoilers.match(bq) # trim one level of quoting if is_spoiler: - bq = self._bq_one_level_re_spoiler.sub('', bq) + bq = self._bq_one_level_re_spoiler.sub("", bq) else: - bq = self._bq_one_level_re.sub('', bq) + bq = self._bq_one_level_re.sub("", bq) # trim whitespace-only lines - bq = self._ws_only_line_re.sub('', bq) - bq = self._run_block_gamut(bq) # recurse + bq = self._ws_only_line_re.sub("", bq) + bq = self._run_block_gamut(bq) # recurse - bq = re.sub('(?m)^', ' ', bq) + bq = re.sub("(?m)^", " ", bq) # These leading spaces screw with
 content, so we need to fix that:
         bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
 
         if is_spoiler:
             return '
\n%s\n
\n\n' % bq else: - return '
\n%s\n
\n\n' % bq + return "
\n%s\n
\n\n" % bq def _do_block_quotes(self, text): - if '>' not in text: + if ">" not in text: return text - if 'spoiler' in self.extras: + if "spoiler" in self.extras: return self._block_quote_re_spoiler.sub(self._block_quote_sub, text) else: return self._block_quote_re.sub(self._block_quote_sub, text) def _form_paragraphs(self, text): # Strip leading and trailing lines: - text = text.strip('\n') + text = text.strip("\n") # Wrap

tags. grafs = [] @@ -2053,27 +2234,38 @@ def _form_paragraphs(self, text): if "cuddled-lists" in self.extras: # Need to put back trailing '\n' for `_list_item_re` # match at the end of the paragraph. - li = self._list_item_re.search(graf + '\n') + li = self._list_item_re.search(graf + "\n") # Two of the same list marker in this paragraph: a likely # candidate for a list cuddled to preceding paragraph # text (issue 33). Note the `[-1]` is a quick way to # consider numeric bullets (e.g. "1." and "2.") to be # equal. - if (li and len(li.group(2)) <= 3 - and ( - (li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1]) - or - li.group("next_marker") is None + if ( + li + and len(li.group(2)) <= 3 + and ( + ( + li.group("next_marker") + and li.group("marker")[-1] + == li.group("next_marker")[-1] ) + or li.group("next_marker") is None + ) ): start = li.start() cuddled_list = self._do_lists(graf[start:]).rstrip("\n") - assert cuddled_list.startswith("

    ") or cuddled_list.startswith("
      ") + assert cuddled_list.startswith( + "
    ") + footer.append("") + return text + "\n\n" + "\n".join(footer) else: return text - _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) - _naked_gt_re = re.compile(r'''(?''', re.I) + _naked_lt_re = re.compile(r"<(?![a-z/?\$!])", re.I) + _naked_gt_re = re.compile(r"""(?""", re.I) def _encode_amps_and_angles(self, text): # Smart processing for ampersands and angle brackets that need # to be encoded. - text = _AMPERSAND_RE.sub('&', text) + text = _AMPERSAND_RE.sub("&", text) # Encode naked <'s - text = self._naked_lt_re.sub('<', text) + text = self._naked_lt_re.sub("<", text) # Encode naked >'s # Note: Other markdown implementations (e.g. Markdown.pl, PHP # Markdown) don't do this. - text = self._naked_gt_re.sub('>', text) + text = self._naked_gt_re.sub(">", text) return text _incomplete_tags_re = re.compile("<(/?\w+[\s/]+?)") @@ -2151,15 +2352,17 @@ def _encode_incomplete_tags(self, text): def _encode_backslash_escapes(self, text): for ch, escape in list(self._escape_table.items()): - text = text.replace("\\"+ch, escape) + text = text.replace("\\" + ch, escape) return text _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): g1 = match.group(1) return '%s' % (g1, g1) - _auto_email_link_re = re.compile(r""" + _auto_email_link_re = re.compile( + r""" < (?:mailto:)? ( @@ -2168,10 +2371,12 @@ def _auto_link_sub(self, match): [-\w]+(\.[-\w]+)*\.[a-z]+ ) > - """, re.I | re.X | re.U) + """, + re.I | re.X | re.U, + ) + def _auto_email_link_sub(self, match): - return self._encode_email_address( - self._unescape_special_chars(match.group(1))) + return self._encode_email_address(self._unescape_special_chars(match.group(1))) def _do_auto_links(self, text): text = self._auto_link_re.sub(self._auto_link_sub, text) @@ -2191,11 +2396,9 @@ def _encode_email_address(self, addr): # # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk # mailing list: - chars = [_xml_encode_email_char_at_random(ch) - for ch in "mailto:" + addr] + chars = [_xml_encode_email_char_at_random(ch) for ch in "mailto:" + addr] # Strip the mailto: from the visible part. - addr = '%s' \ - % (''.join(chars), ''.join(chars[7:])) + addr = '%s' % ("".join(chars), "".join(chars[7:])) return addr def _do_link_patterns(self, text): @@ -2211,23 +2414,25 @@ def _do_link_patterns(self, text): for (start, end), href in reversed(replacements): # Do not match against links inside brackets. - if text[start - 1:start] == '[' and text[end:end + 1] == ']': + if text[start - 1 : start] == "[" and text[end : end + 1] == "]": continue # Do not match against links in the standard markdown syntax. - if text[start - 2:start] == '](' or text[end:end + 2] == '")': + if text[start - 2 : start] == "](" or text[end : end + 2] == '")': continue # Do not match against links which are escaped. - if text[start - 3:start] == '"""' and text[end:end + 3] == '"""': - text = text[:start - 3] + text[start:end] + text[end + 3:] + if text[start - 3 : start] == '"""' and text[end : end + 3] == '"""': + text = text[: start - 3] + text[start:end] + text[end + 3 :] continue escaped_href = ( - href.replace('"', '"') # b/c of attr quote - # To avoid markdown and : - .replace('*', self._escape_table['*']) - .replace('_', self._escape_table['_'])) + href.replace('"', """) # b/c of attr quote + # To avoid markdown and : + .replace("*", self._escape_table["*"]).replace( + "_", self._escape_table["_"] + ) + ) link = '%s' % (escaped_href, text[start:end]) hash = _hash_text(link) link_from_hash[hash] = link @@ -2244,7 +2449,7 @@ def _unescape_special_chars(self, text): def _outdent(self, text): # Remove one level of line-leading tabs or spaces - return self._outdent_re.sub('', text) + return self._outdent_re.sub("", text) class MarkdownWithExtras(Markdown): @@ -2259,6 +2464,7 @@ class MarkdownWithExtras(Markdown): - link-patterns (because you need to specify some actual link-patterns anyway) """ + extras = ["footnotes", "code-color"] @@ -2274,9 +2480,10 @@ def calculate_toc_html(toc): return None def indent(): - return ' ' * (len(h_stack) - 1) + return " " * (len(h_stack) - 1) + lines = [] - h_stack = [0] # stack of header-level numbers + h_stack = [0] # stack of header-level numbers for level, id, name in toc: if level > h_stack[-1]: lines.append("%s
      " % indent()) @@ -2289,14 +2496,13 @@ def indent(): if not lines[-1].endswith(""): lines[-1] += "" lines.append("%s
    " % indent()) - lines.append('%s
  • %s' % ( - indent(), id, name)) + lines.append('%s
  • %s' % (indent(), id, name)) while len(h_stack) > 1: h_stack.pop() if not lines[-1].endswith("
  • "): lines[-1] += "" lines.append("%s
" % indent()) - return '\n'.join(lines) + '\n' + return "\n".join(lines) + "\n" class UnicodeWithAttrs(unicode): @@ -2304,12 +2510,16 @@ class UnicodeWithAttrs(unicode): possibly attach some attributes. E.g. the "toc_html" attribute when the "toc" extra is used. """ + metadata = None toc_html = None + ## {{{ http://code.activestate.com/recipes/577257/ (r1) -_slugify_strip_re = re.compile(r'[^\w\s-]') -_slugify_hyphenate_re = re.compile(r'[-\s]+') +_slugify_strip_re = re.compile(r"[^\w\s-]") +_slugify_hyphenate_re = re.compile(r"[-\s]+") + + def _slugify(value): """ Normalizes string, converts to lowercase, removes non-alpha characters, @@ -2318,19 +2528,24 @@ def _slugify(value): From Django's "django/template/defaultfilters.py". """ import unicodedata - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode() - value = _slugify_strip_re.sub('', value).strip().lower() - return _slugify_hyphenate_re.sub('-', value) + + value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode() + value = _slugify_strip_re.sub("", value).strip().lower() + return _slugify_hyphenate_re.sub("-", value) + + ## end of http://code.activestate.com/recipes/577257/ }}} # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 def _curry(*args, **kwargs): function, args = args[0], args[1:] + def result(*rest, **kwrest): combined = kwargs.copy() combined.update(kwrest) return function(*args + rest, **combined) + return result @@ -2340,10 +2555,10 @@ def _regex_from_encoded_pattern(s): '/foo/' -> re.compile('foo') '/foo/i' -> re.compile('foo', re.I) """ - if s.startswith('/') and s.rfind('/') != 0: + if s.startswith("/") and s.rfind("/") != 0: # Parse it: /PATTERN/FLAGS - idx = s.rfind('/') - pattern, flags_str = s[1:idx], s[idx+1:] + idx = s.rfind("/") + pattern, flags_str = s[1:idx], s[idx + 1 :] flag_from_char = { "i": re.IGNORECASE, "l": re.LOCALE, @@ -2356,9 +2571,11 @@ def _regex_from_encoded_pattern(s): try: flags |= flag_from_char[char] except KeyError: - raise ValueError("unsupported regex flag: '%s' in '%s' " - "(must be one of '%s')" - % (char, s, ''.join(list(flag_from_char.keys())))) + raise ValueError( + "unsupported regex flag: '%s' in '%s' " + "(must be one of '%s')" + % (char, s, "".join(list(flag_from_char.keys()))) + ) return re.compile(s[1:idx], flags) else: # not an encoded regex return re.compile(re.escape(s)) @@ -2379,55 +2596,64 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): """ DEBUG = False if DEBUG: - print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ - % (tabsize, skip_first_line)) + print( + "dedent: dedent(..., tabsize=%d, skip_first_line=%r)" + % (tabsize, skip_first_line) + ) margin = None for i, line in enumerate(lines): - if i == 0 and skip_first_line: continue + if i == 0 and skip_first_line: + continue indent = 0 for ch in line: - if ch == ' ': + if ch == " ": indent += 1 - elif ch == '\t': + elif ch == "\t": indent += tabsize - (indent % tabsize) - elif ch in '\r\n': + elif ch in "\r\n": continue # skip all-whitespace lines else: break else: continue # skip all-whitespace lines - if DEBUG: print("dedent: indent=%d: %r" % (indent, line)) + if DEBUG: + print("dedent: indent=%d: %r" % (indent, line)) if margin is None: margin = indent else: margin = min(margin, indent) - if DEBUG: print("dedent: margin=%r" % margin) + if DEBUG: + print("dedent: margin=%r" % margin) if margin is not None and margin > 0: for i, line in enumerate(lines): - if i == 0 and skip_first_line: continue + if i == 0 and skip_first_line: + continue removed = 0 for j, ch in enumerate(line): - if ch == ' ': + if ch == " ": removed += 1 - elif ch == '\t': + elif ch == "\t": removed += tabsize - (removed % tabsize) - elif ch in '\r\n': - if DEBUG: print("dedent: %r: EOL -> strip up to EOL" % line) + elif ch in "\r\n": + if DEBUG: + print("dedent: %r: EOL -> strip up to EOL" % line) lines[i] = lines[i][j:] break else: - raise ValueError("unexpected non-whitespace char %r in " - "line %r while removing %d-space margin" - % (ch, line, margin)) + raise ValueError( + "unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" % (ch, line, margin) + ) if DEBUG: - print("dedent: %r: %r -> removed %d/%d"\ - % (line, ch, removed, margin)) + print( + "dedent: %r: %r -> removed %d/%d" % (line, ch, removed, margin) + ) if removed == margin: - lines[i] = lines[i][j+1:] + lines[i] = lines[i][j + 1 :] break elif removed > margin: - lines[i] = ' '*(removed-margin) + lines[i][j+1:] + lines[i] = " " * (removed - margin) + lines[i][j + 1 :] break else: if removed: @@ -2448,7 +2674,7 @@ def _dedent(text, tabsize=8, skip_first_line=False): """ lines = text.splitlines(1) _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) - return ''.join(lines) + return "".join(lines) class _memoized(object): @@ -2458,6 +2684,7 @@ class _memoized(object): http://wiki.python.org/moin/PythonDecoratorLibrary """ + def __init__(self, func): self.func = func self.cache = {} @@ -2480,7 +2707,8 @@ def __repr__(self): def _xml_oneliner_re_from_tab_width(tab_width): """Standalone XML processing instruction regex.""" - return re.compile(r""" + return re.compile( + r""" (?: (?<=\n\n) # Starting after a blank line | # or @@ -2496,12 +2724,18 @@ def _xml_oneliner_re_from_tab_width(tab_width): [ \t]* (?=\n{2,}|\Z) # followed by a blank line or end of document ) - """ % (tab_width - 1), re.X) + """ + % (tab_width - 1), + re.X, + ) + + _xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) def _hr_tag_re_from_tab_width(tab_width): - return re.compile(r""" + return re.compile( + r""" (?: (?<=\n\n) # Starting after a blank line | # or @@ -2516,7 +2750,12 @@ def _hr_tag_re_from_tab_width(tab_width): [ \t]* (?=\n{2,}|\Z) # followed by a blank line or end of document ) - """ % (tab_width - 1), re.X) + """ + % (tab_width - 1), + re.X, + ) + + _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) @@ -2526,12 +2765,9 @@ def _xml_escape_attr(attr, skip_single_quote=True): By default this doesn't bother with escaping `'` to `'`, presuming that the tag attribute is surrounded by double quotes. """ - escaped = _AMPERSAND_RE.sub('&', attr) + escaped = _AMPERSAND_RE.sub("&", attr) - escaped = (attr - .replace('"', '"') - .replace('<', '<') - .replace('>', '>')) + escaped = attr.replace('"', """).replace("<", "<").replace(">", ">") if not skip_single_quote: escaped = escaped.replace("'", "'") return escaped @@ -2546,33 +2782,33 @@ def _xml_encode_email_char_at_random(ch): return ch elif r < 0.45: # The [1:] is to drop leading '0': 0x63 -> x63 - return '&#%s;' % hex(ord(ch))[1:] + return "&#%s;" % hex(ord(ch))[1:] else: - return '&#%s;' % ord(ch) + return "&#%s;" % ord(ch) def _html_escape_url(attr, safe_mode=False): """Replace special characters that are potentially malicious in url string.""" - escaped = (attr - .replace('"', '"') - .replace('<', '<') - .replace('>', '>')) + escaped = attr.replace('"', """).replace("<", "<").replace(">", ">") if safe_mode: - escaped = escaped.replace('+', ' ') + escaped = escaped.replace("+", " ") escaped = escaped.replace("'", "'") return escaped # ---- mainline + class _NoReflowFormatter(optparse.IndentedHelpFormatter): """An optparse formatter that does NOT reflow the description.""" + def format_description(self, description): return description or "" def _test(): import doctest + doctest.testmod() @@ -2583,36 +2819,69 @@ def main(argv=None): logging.basicConfig() usage = "usage: %prog [PATHS...]" - version = "%prog "+__version__ - parser = optparse.OptionParser(prog="markdown2", usage=usage, - version=version, description=cmdln_desc, - formatter=_NoReflowFormatter()) - parser.add_option("-v", "--verbose", dest="log_level", - action="store_const", const=logging.DEBUG, - help="more verbose output") - parser.add_option("--encoding", - help="specify encoding of text content") - parser.add_option("--html4tags", action="store_true", default=False, - help="use HTML 4 style for empty element tags") - parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", - help="sanitize literal HTML: 'escape' escapes " - "HTML meta chars, 'replace' replaces with an " - "[HTML_REMOVED] note") - parser.add_option("-x", "--extras", action="append", - help="Turn on specific extra features (not part of " - "the core Markdown spec). See above.") - parser.add_option("--use-file-vars", - help="Look for and use Emacs-style 'markdown-extras' " - "file var to turn on extras. See " - "") - parser.add_option("--link-patterns-file", - help="path to a link pattern file") - parser.add_option("--self-test", action="store_true", - help="run internal self-tests (some doctests)") - parser.add_option("--compare", action="store_true", - help="run against Markdown.pl as well (for testing)") - parser.set_defaults(log_level=logging.INFO, compare=False, - encoding="utf-8", safe_mode=None, use_file_vars=False) + version = "%prog " + __version__ + parser = optparse.OptionParser( + prog="markdown2", + usage=usage, + version=version, + description=cmdln_desc, + formatter=_NoReflowFormatter(), + ) + parser.add_option( + "-v", + "--verbose", + dest="log_level", + action="store_const", + const=logging.DEBUG, + help="more verbose output", + ) + parser.add_option("--encoding", help="specify encoding of text content") + parser.add_option( + "--html4tags", + action="store_true", + default=False, + help="use HTML 4 style for empty element tags", + ) + parser.add_option( + "-s", + "--safe", + metavar="MODE", + dest="safe_mode", + help="sanitize literal HTML: 'escape' escapes " + "HTML meta chars, 'replace' replaces with an " + "[HTML_REMOVED] note", + ) + parser.add_option( + "-x", + "--extras", + action="append", + help="Turn on specific extra features (not part of " + "the core Markdown spec). See above.", + ) + parser.add_option( + "--use-file-vars", + help="Look for and use Emacs-style 'markdown-extras' " + "file var to turn on extras. See " + "", + ) + parser.add_option("--link-patterns-file", help="path to a link pattern file") + parser.add_option( + "--self-test", + action="store_true", + help="run internal self-tests (some doctests)", + ) + parser.add_option( + "--compare", + action="store_true", + help="run against Markdown.pl as well (for testing)", + ) + parser.set_defaults( + log_level=logging.INFO, + compare=False, + encoding="utf-8", + safe_mode=None, + use_file_vars=False, + ) opts, paths = parser.parse_args() log.setLevel(opts.log_level) @@ -2624,8 +2893,8 @@ def main(argv=None): for s in opts.extras: splitter = re.compile("[,;: ]+") for e in splitter.split(s): - if '=' in e: - ename, earg = e.split('=', 1) + if "=" in e: + ename, earg = e.split("=", 1) try: earg = int(earg) except ValueError: @@ -2641,64 +2910,88 @@ def main(argv=None): f = open(opts.link_patterns_file) try: for i, line in enumerate(f.readlines()): - if not line.strip(): continue - if line.lstrip().startswith("#"): continue + if not line.strip(): + continue + if line.lstrip().startswith("#"): + continue try: pat, href = line.rstrip().rsplit(None, 1) except ValueError: - raise MarkdownError("%s:%d: invalid link pattern line: %r" - % (opts.link_patterns_file, i+1, line)) - link_patterns.append( - (_regex_from_encoded_pattern(pat), href)) + raise MarkdownError( + "%s:%d: invalid link pattern line: %r" + % (opts.link_patterns_file, i + 1, line) + ) + link_patterns.append((_regex_from_encoded_pattern(pat), href)) finally: f.close() else: link_patterns = None from os.path import join, dirname, abspath, exists - markdown_pl = join(dirname(dirname(abspath(__file__))), "test", - "Markdown.pl") + + markdown_pl = join(dirname(dirname(abspath(__file__))), "test", "Markdown.pl") if not paths: - paths = ['-'] + paths = ["-"] for path in paths: - if path == '-': + if path == "-": text = sys.stdin.read() else: - fp = codecs.open(path, 'r', opts.encoding) + fp = codecs.open(path, "r", opts.encoding) text = fp.read() fp.close() if opts.compare: from subprocess import Popen, PIPE + print("==== Markdown.pl ====") - p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True) - p.stdin.write(text.encode('utf-8')) + p = Popen( + "perl %s" % markdown_pl, + shell=True, + stdin=PIPE, + stdout=PIPE, + close_fds=True, + ) + p.stdin.write(text.encode("utf-8")) p.stdin.close() - perl_html = p.stdout.read().decode('utf-8') + perl_html = p.stdout.read().decode("utf-8") if py3: sys.stdout.write(perl_html) else: - sys.stdout.write(perl_html.encode( - sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + sys.stdout.write( + perl_html.encode( + sys.stdout.encoding or "utf-8", "xmlcharrefreplace" + ) + ) print("==== markdown2.py ====") - html = markdown(text, + html = markdown( + text, html4tags=opts.html4tags, safe_mode=opts.safe_mode, - extras=extras, link_patterns=link_patterns, + extras=extras, + link_patterns=link_patterns, use_file_vars=opts.use_file_vars, - cli=True) + cli=True, + ) if py3: sys.stdout.write(html) else: - sys.stdout.write(html.encode( - sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + sys.stdout.write( + html.encode(sys.stdout.encoding or "utf-8", "xmlcharrefreplace") + ) if extras and "toc" in extras: - log.debug("toc_html: " + - str(html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))) + log.debug( + "toc_html: " + + str( + html.toc_html.encode( + sys.stdout.encoding or "utf-8", "xmlcharrefreplace" + ) + ) + ) if opts.compare: test_dir = join(dirname(dirname(abspath(__file__))), "test") if exists(join(test_dir, "test_markdown2.py")): sys.path.insert(0, test_dir) from test_markdown2 import norm_html_from_html + norm_html = norm_html_from_html(html) norm_perl_html = norm_html_from_html(perl_html) else: diff --git a/markdown2html.py b/markdown2html.py index eed7dbb..85a7014 100644 --- a/markdown2html.py +++ b/markdown2html.py @@ -9,9 +9,9 @@ from .lib.markdown2 import Markdown -__all__ = ('markdown2html', ) +__all__ = ("markdown2html",) -markdowner = Markdown(extras=['fenced-code-blocks']) +markdowner = Markdown(extras=["fenced-code-blocks"]) # FIXME: how do I choose how many workers I want? Does thread pool reuse threads or # does it stupidly throw them out? (we could implement something of our own) @@ -19,9 +19,11 @@ images_cache = {} + class LoadingError(Exception): pass + def markdown2html(markdown, basepath, re_render, resources): """ converts the markdown to html, loads the images and puts in base64 for sublime to understand them correctly. That means that we are responsible for loading the @@ -31,19 +33,19 @@ def markdown2html(markdown, basepath, re_render, resources): html = markdowner.convert(markdown) soup = bs4.BeautifulSoup(html, "html.parser") - for img_element in soup.find_all('img'): - src = img_element['src'] + for img_element in soup.find_all("img"): + src = img_element["src"] # already in base64, or something of the like # FIXME: what other types are possible? Are they handled by ST? If not, could we # convert it into base64? is it worth the effort? - if src.startswith('data:image/'): + if src.startswith("data:image/"): continue - if src.startswith('http://') or src.startswith('https://'): + if src.startswith("http://") or src.startswith("https://"): path = src - elif src.startswith('file://'): - path = src[len('file://'):] + elif src.startswith("file://"): + path = src[len("file://") :] else: # expanduser: ~ -> /home/math2001 # realpath: simplify that paths so that we don't have duplicated caches @@ -52,39 +54,45 @@ def markdown2html(markdown, basepath, re_render, resources): try: base64 = get_base64_image(path, re_render) except FileNotFoundError as e: - base64 = resources['base64_404_image'] + base64 = resources["base64_404_image"] except LoadingError: - base64 = resources['base64_loading_image'] + base64 = resources["base64_loading_image"] - img_element['src'] = base64 + img_element["src"] = base64 # remove comments, because they pollute the console with error messages - for comment_element in soup.find_all(text=lambda text: isinstance(text, bs4.Comment)): + for comment_element in soup.find_all( + text=lambda text: isinstance(text, bs4.Comment) + ): comment_element.extract() # FIXME: how do tables look? should we use ascii tables? # pre aren't handled by ST3. The require manual adjustment - for pre_element in soup.find_all('pre'): + for pre_element in soup.find_all("pre"): # select the first child, code_element = next(pre_element.children) # FIXME: this method sucks, but can we do better? - fixed_pre = str(code_element) \ - .replace(' ', '.') \ - .replace('\n', '
') + fixed_pre = ( + str(code_element) + .replace(" ", '.') + .replace("\n", "
") + ) code_element.replace_with(bs4.BeautifulSoup(fixed_pre, "html.parser")) # FIXME: highlight the code using Sublime's syntax # FIXME: report that ST doesn't support
but does work with
... WTF? - return "\n\n{}".format(resources['stylesheet'], soup).replace('
', '
') + return "\n\n{}".format(resources["stylesheet"], soup).replace( + "
", "
" + ) -def get_base64_image(path, re_render): +def get_base64_image(path, re_render): def callback(url, future): - # this is "safe" to do because callback is called in the same thread as + # this is "safe" to do because callback is called in the same thread as # add_done_callback: # > Added callables are called in the order that they were added and are always # > called in a thread belonging to the process that added them @@ -94,7 +102,7 @@ def callback(url, future): # will read from the cache re_render() - if path.startswith('http://') or path.startswith('https://'): + if path.startswith("http://") or path.startswith("https://"): if path in images_cache: return images_cache[path] executor.submit(load_image, path).add_done_callback(partial(callback, path)) @@ -102,8 +110,9 @@ def callback(url, future): # FIXME: use some kind of cache for this as well, because it decodes on every # keystroke here... - with open(path, 'rb') as fp: - return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8') + with open(path, "rb") as fp: + return "data:image/png;base64," + base64.b64encode(fp.read()).decode("utf-8") + # FIXME: wait what the hell? Why do I have two caches? (lru and images_cache) # FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be @@ -114,6 +123,10 @@ def callback(url, future): def load_image(url): with urllib.request.urlopen(url, timeout=60) as conn: content_type = conn.info().get_content_type() - if 'image' not in content_type: - raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type)) - return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8') + if "image" not in content_type: + raise ValueError( + "{!r} doesn't point to an image, but to a {!r}".format( + url, content_type + ) + ) + return "data:image/png;base64," + base64.b64encode(conn.read()).decode("utf-8") diff --git a/resources/convertresources.py b/resources/convertresources.py index fdcd878..ed1d52a 100644 --- a/resources/convertresources.py +++ b/resources/convertresources.py @@ -2,8 +2,8 @@ from base64 import b64encode -with open('404.png', 'rb') as png, open('404.base64', 'wb') as base64: +with open("404.png", "rb") as png, open("404.base64", "wb") as base64: base64.write(b64encode(png.read())) -with open('loading.png', 'rb') as png, open('loading.base64', 'wb') as base64: +with open("loading.png", "rb") as png, open("loading.base64", "wb") as base64: base64.write(b64encode(png.read())) diff --git a/utils.py b/utils.py index e0661d1..c89ab72 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,11 @@ # import sublime import time + def get_settings(): return sublime.get_settings("MarkdownLivePreview.sublime-settings") + def min_time_between_call(timeout, on_block=lambda *args, **kwargs: None): """ Enforces a timeout between each call to the function timeout is in seconds @@ -19,5 +21,7 @@ def wrapper(*args, **kwargs): last_call = time.time() return func(*args, **kwargs) + return wrapper + return outer