♻️ imrove project structure & performance

hmiladhia · Oct 8, 2022 · 420f3f8 · 420f3f8
2 parents 1867b40 + 94890f8
commit 420f3f8
Show file tree

Hide file tree

Showing 26 changed files with 865 additions and 717 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -32,5 +32,5 @@ repos:
       - id: mypy
         files: ^nbmanips/
         # exclude utils.py for now
-        exclude: 'nbmanips/utils\.py$'
+        exclude: 'nbmanips/notebook/utils\.py$'
         args: [--no-strict-optional, --ignore-missing-imports, --follow-imports, silent]
diff --git a/nbmanips/VERSION b/nbmanips/VERSION
@@ -1 +1 @@
-1.4.0
+2.0.0
diff --git a/nbmanips/_utils.py b/nbmanips/_utils.py
@@ -0,0 +1,10 @@
+from functools import wraps
+
+
+def partial(func, *args, **keywords):
+    @wraps(func)
+    def new_func(*f_args, **f_keywords):
+        new_keywords = {**f_keywords, **keywords}
+        return func(*f_args, *args, **new_keywords)
+
+    return new_func
diff --git a/nbmanips/cell/__init__.py b/nbmanips/cell/__init__.py
@@ -0,0 +1,10 @@
+from .cell_output import CellOutput
+from .cells import Cell, CodeCell, MarkdownCell, RawCell
+from .output_parsers import HtmlParser, ImageParser, TextParser
+
+CellOutput.register_parser('text', TextParser())
+CellOutput.register_parser('text/html', HtmlParser())
+CellOutput.register_parser('image', ImageParser())
+
+
+__all__ = ['Cell', 'CellOutput', 'MarkdownCell', 'CodeCell', 'RawCell']
diff --git a/nbmanips/cell_output.py → nbmanips/cell/cell_output.py b/nbmanips/cell_output.py → nbmanips/cell/cell_output.py
@@ -1,24 +1,7 @@
-import html
-from typing import Dict, Optional, Union
+from typing import Dict, Optional
 
-from nbmanips.cell_utils import HtmlParser, ImageParser, ParserBase, TextParser
-from nbmanips.utils import total_size
-
-
-def _get_output_types(output_type: Union[set, dict, str]) -> set:
-    if isinstance(output_type, str):
-        if '/' in output_type:
-            return {output_type, output_type.split('/')[0]}
-        return {output_type}
-
-    output_types = set()
-    for output in output_type:
-        output_types |= _get_output_types(output)
-    return output_types
-
-
-def _to_html(text):
-    return html.escape(text).encode('ascii', 'xmlcharrefreplace').decode('ascii')
+from .cell_utils import _get_output_types, _to_html, total_size
+from .output_parsers import ParserBase
 
 
 class CellOutput:
@@ -284,8 +267,3 @@ class ExecuteResult(DataOutput, output_type='execute_result'):
     @property
     def execution_count(self):
         return self.content.get('execution_count', None)
-
-
-CellOutput.register_parser('text', TextParser())
-CellOutput.register_parser('text/html', HtmlParser())
-CellOutput.register_parser('image', ImageParser())
diff --git a/nbmanips/cell_utils.py → nbmanips/cell/cell_utils.py b/nbmanips/cell_utils.py → nbmanips/cell/cell_utils.py
@@ -1,10 +1,14 @@
+import json
 import re
 import shutil
-from abc import ABCMeta, abstractmethod
+import urllib.parse
+import warnings
 from mimetypes import guess_type
+from pathlib import Path
 from textwrap import wrap
+from typing import Union
 
-from nbmanips.color import supports_color
+from nbmanips.cell.color import supports_color
 
 try:
     import pygments
@@ -23,16 +27,19 @@
 except ImportError:
     colorama = None
 
-try:
-    from html2text import html2text
-except ImportError:
-    html2text = None
 
-try:
-    from img2text import img_to_ascii
-except ImportError:
-    img_to_ascii = None
+# -- Constants --
+# --- Attachment Constants ---
+MD_IMG_REGEX = r'!\[(?P<ALT_TEXT>.*?)]\((?P<PATH>.*?)\)'
+MD_IMG_EXPRESSION = r'![{ALT_TEXT}](attachment:{attachment_name})'
+HTML_IMG_REGEX = (
+    r'<img\s(?P<PREFIX>.*?)'
+    r'src\s*=\s*\"?(?P<PATH>(?<=\")[^\"]*(?=\")|(?:[^\"\s]|(?<=\\)\s)*[^\s\\/])\"?'
+    r'(?P<SUFFIX>.*?)>'
+)
+HTML_IMG_EXPRESSION = r'<img {PREFIX}src="attachment:{attachment_name}"{SUFFIX}>'
 
+# -- Styles --
 styles = {
     'single': '││┌─┐└─┘',
     'double': '║║╔═╗╚═╝',
@@ -119,60 +126,50 @@ def get_mime_type(path):
     return guess_type(path)[0]
 
 
-class ParserBase(metaclass=ABCMeta):
-    @abstractmethod
-    def parse(self, content, **kwargs):
-        return content
-
-    @property
-    def default_state(self):
-        return True
-
-
-class TextParser(ParserBase):
-    def parse(self, content, **kwargs):
-        return content
-
-
-class ImageParser(ParserBase):
-    def parse(
-        self,
-        content,
-        width=80,
-        colorful=COLOR_SUPPORTED,
-        bright=COLOR_SUPPORTED,
-        reverse=True,
-        **kwargs,
-    ):
-        if callable(img_to_ascii):
-            return img_to_ascii(
-                content,
-                base64=True,
-                colorful=colorful,
-                reverse=reverse,
-                width=width,
-                bright=bright,
-                **kwargs,
-            )
-        else:
-            raise ModuleNotFoundError(
-                'You need to pip install img2text for readable option'
-            )
-
-    @property
-    def default_state(self):
-        return img_to_ascii is not None
-
-
-class HtmlParser(ParserBase):
-    def parse(self, content, width=78, **kwargs):
-        if callable(html2text):
-            return html2text(content, bodywidth=width, **kwargs)
-        else:
-            raise ModuleNotFoundError(
-                'You need to pip install html2txt for readable option'
-            )
-
-    @property
-    def default_state(self):
-        return html2text is not None
+def _get_output_types(output_type: Union[set, dict, str]) -> set:
+    if isinstance(output_type, str):
+        if '/' in output_type:
+            return {output_type, output_type.split('/')[0]}
+        return {output_type}
+
+    output_types = set()
+    for output in output_type:
+        output_types |= _get_output_types(output)
+    return output_types
+
+
+def _to_html(text):
+    import html
+
+    return html.escape(text).encode('ascii', 'xmlcharrefreplace').decode('ascii')
+
+
+def get_assets_path(nb, assets_path=None):
+    if assets_path is None:
+        assets_path = getattr(nb, '_original_path', None)
+        if assets_path:
+            return Path(assets_path).parent
+        return Path.cwd()
+
+    return Path(assets_path)
+
+
+def burn_attachment(match, cell, assets_path: Path, expr):
+    path = match.group('PATH')
+    if path.startswith('attachment:'):
+        return match.group(0)
+
+    path = assets_path / urllib.parse.unquote(path)
+    if not path.exists():
+        path = match.group('PATH')
+        warnings.warn(f"Couldn't find '{path}'")
+        return match.group(0)
+
+    match_dict = match.groupdict()
+    attachment_name = match_dict.pop('PATH').replace(' ', '%20')
+    cell.attach(str(path), attachment_name=attachment_name)
+    return expr.format(**match_dict, attachment_name=attachment_name)
+
+
+def total_size(o):
+    return len(json.dumps(o).encode('utf-8'))
diff --git a/nbmanips/cell.py → nbmanips/cell/cells.py b/nbmanips/cell.py → nbmanips/cell/cells.py
@@ -1,7 +1,4 @@
-import base64
 import re
-import uuid
-from copy import deepcopy
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
@@ -14,12 +11,15 @@
     pygments = None
     get_lexer_by_name = None
 
-from bs4 import BeautifulSoup
-from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
+from nbmanips.cell.cell_utils import (
+    FORMATTER,
+    get_mime_type,
+    monochrome,
+    printable_cell,
+    total_size,
+)
 
-from nbmanips.cell_output import CellOutput
-from nbmanips.cell_utils import FORMATTER, get_mime_type, monochrome, printable_cell
-from nbmanips.utils import total_size
+from .cell_output import CellOutput
 
 
 class Cell:
@@ -72,6 +72,8 @@ def outputs(self):
         return map(CellOutput, self.cell.get('outputs', []))
 
     def get_copy(self, new_id=None):
+        from copy import deepcopy
+
         cell = self.__class__(deepcopy(self.cell), None)
         if new_id is not None:
             cell.id = new_id
@@ -123,14 +125,25 @@ def contains(self, text, case=True, output=False, regex=False, flags=0):
             search_target += '\n' + self.output
 
         if not regex:
-            text = re.escape(text)
+            if not case:
+                text = text.lower()
+                search_target = search_target.lower()
+
+            return text in search_target
 
         if case is False:
             flags = flags | re.IGNORECASE
         else:
             flags = flags & ~re.IGNORECASE
         return bool(re.search(text, search_target, flags=flags))
 
+    def has_match(self, regex, output=False):
+        search_target = self.source
+        if output:
+            search_target += '\n' + self.output
+
+        return bool(regex.search(search_target))
+
     def erase_output(self, output_types: Optional[Union[str, set]] = None):
         """
         erase output of cells that have a given output_type
@@ -242,6 +255,8 @@ def remove_tag(self, tag: str):
 
     @staticmethod
     def generate_id_candidate():
+        import uuid
+
         return uuid.uuid4().hex[:8]
 
     def __new__(cls, content, *args, **kwargs):
@@ -325,6 +340,8 @@ def attachments(self):
         return self.cell.setdefault('attachments', {})
 
     def attach(self, path: Union[str, Path], attachment_name: Optional[str] = None):
+        import base64
+
         mime_type = get_mime_type(str(path))
         path = Path(path)
         attachment_name = attachment_name or path.name
@@ -334,13 +351,17 @@ def attach(self, path: Union[str, Path], attachment_name: Optional[str] = None):
 
     @property
     def html(self):
+        from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
+
         renderer = IPythonRenderer(
             escape=False, attachments=self.attachments, exclude_anchor_links=True
         )
         return MarkdownWithMath(renderer=renderer).render(self.source)
 
     @property
     def soup(self):
+        from bs4 import BeautifulSoup
+
         return BeautifulSoup(self.html, self._bs4_parser)
 
 

diff --git a/nbmanips/color.py → nbmanips/cell/color.py b/nbmanips/color.py → nbmanips/cell/color.py
diff --git a/nbmanips/cell/output_parsers.py b/nbmanips/cell/output_parsers.py
@@ -0,0 +1,72 @@
+from abc import ABCMeta, abstractmethod
+
+from nbmanips.cell.cell_utils import COLOR_SUPPORTED
+
+try:
+    from html2text import html2text
+except ImportError:
+    html2text = None
+
+try:
+    from img2text import img_to_ascii
+except ImportError:
+    img_to_ascii = None
+
+
+class ParserBase(metaclass=ABCMeta):
+    @abstractmethod
+    def parse(self, content, **kwargs):
+        return content
+
+    @property
+    def default_state(self):
+        return True
+
+
+class TextParser(ParserBase):
+    def parse(self, content, **kwargs):
+        return content
+
+
+class ImageParser(ParserBase):
+    def parse(
+        self,
+        content,
+        width=80,
+        colorful=COLOR_SUPPORTED,
+        bright=COLOR_SUPPORTED,
+        reverse=True,
+        **kwargs,
+    ):
+        if callable(img_to_ascii):
+            return img_to_ascii(
+                content,
+                base64=True,
+                colorful=colorful,
+                reverse=reverse,
+                width=width,
+                bright=bright,
+                **kwargs,
+            )
+        else:
+            raise ModuleNotFoundError(
+                'You need to pip install img2text for readable option'
+            )
+
+    @property
+    def default_state(self):
+        return img_to_ascii is not None
+
+
+class HtmlParser(ParserBase):
+    def parse(self, content, width=78, **kwargs):
+        if callable(html2text):
+            return html2text(content, bodywidth=width, **kwargs)
+        else:
+            raise ModuleNotFoundError(
+                'You need to pip install html2txt for readable option'
+            )
+
+    @property
+    def default_state(self):
+        return html2text is not None